diff --git a/Dockerfile b/Dockerfile index 291841700ef122ce781ac74d35a5aebea85f02d3..fccebec9a3d131913615cfba6836a91d5e84976b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.13.11-buster as build +FROM ubuntu:18.04 as build RUN useradd -d /home/hwMindX -u 9000 -m -s /bin/bash hwMindX && \ useradd -d /home/HwHiAiUser -u 1000 -m -s /bin/bash HwHiAiUser && \ @@ -6,18 +6,9 @@ RUN useradd -d /home/hwMindX -u 9000 -m -s /bin/bash hwMindX && \ ENV USE_ASCEND_DOCKER true -ENV GOPATH /usr/app/ - -ENV GO111MODULE off - -ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH - ENV LD_LIBRARY_PATH /usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/driver/lib64/common ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/Ascend/driver/lib64/ -RUN mkdir -p /usr/app/src/ascend-device-plugin - -COPY . /usr/app/src/Ascend-device-plugin +COPY ./output/ascendplugin /usr/local/bin -WORKDIR /usr/app/src/Ascend-device-plugin \ No newline at end of file diff --git a/README.md b/README.md index ff637014494bc0761a7a8dd8d36c5a57687b2559..e0f66102c672a57fa3ebb0aaf521cdefe7291942 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ -# Ascend Device Plugin +# Ascend Device Plugin.en - [Ascend Device Plugin](#ascend-device-plugin.md) - [Description](#description.md) - [Compiling the Ascend Device Plugin](#compiling-the-ascend-device-plugin.md) - - [Quickly Compiling the Ascend Device Plugin](#quickly-compiling-the-ascend-device-plugin.md) - - [Compiling the Ascend Device Plugin](#compiling-the-ascend-device-plugin-0.md) - - [Creating DaemonSet.](#creating-daemonset.md) + - [Creating DaemonSet](#creating-daemonset.md) - [Creating a Service Container](#creating-a-service-container.md) - [Environment Dependencies](#environment-dependencies.md) - [Directory Structure](#directory-structure.md) @@ -15,7 +13,7 @@ - **[Compiling the Ascend Device Plugin](#compiling-the-ascend-device-plugin.md)** -- **[Creating DaemonSet.](#creating-daemonset.md)** +- **[Creating DaemonSet](#creating-daemonset.md)** - **[Creating a Service Container](#creating-a-service-container.md)** @@ -30,105 +28,9 @@ The device management plug-in provides the following functions:

Compiling the Ascend Device Plugin

-- **[Quickly Compiling the Ascend Device Plugin](#quickly-compiling-the-ascend-device-plugin.md)** -You can modify the configuration parameters during compilation by running a shell script. You only need to modify the parameters in the script to quickly complete the compilation. -- **[Compiling the Ascend Device Plugin](#compiling-the-ascend-device-plugin-0.md)** - - -

Quickly Compiling the Ascend Device Plugin

- -You can modify the configuration parameters during compilation by running a shell script. You only need to modify the parameters in the script to quickly complete the compilation. - -## Procedure - -1. Run the following command to install the latest pkg-config tool: - - **apt-get install -y pkg-config** - -2. Run the following commands to set environment variables: - - **export GO111MODULE=on** - - **export GOPROXY=**_Proxy address_ - - **export GONOSUMDB=\*** - - >![](figures/icon-note.gif) **NOTE:** - >Use the actual GOPROXY proxy address. You can run the **go mod download** command in the **ascend-device-plugin** directory to check the address. - -3. Create and execute the shell file in **./build/**. - - ``` - #!/bin/bash - ASCEND_TYPE=910 #Select 310 or 910 based on the processor model. - ASCNED_INSTALL_PATH=/usr/local/Ascend #Driver installation path. Change it as required. - USE_ASCEND_DOCKER=false #whether to use Ascend Docker. - - - CUR_DIR=$(dirname $(readlink -f $0)) - TOP_DIR=$(realpath ${CUR_DIR}/..) - LD_LIBRARY_PATH_PARA1=${ASCNED_INSTALL_PATH}/driver/lib64/driver - LD_LIBRARY_PATH_PARA2=${ASCNED_INSTALL_PATH}/driver/lib64 - TYPE=Ascend910 - PKG_PATH=${TOP_DIR}/src/plugin/config/config_910 - PKG_PATH_STRING=\$\{TOP_DIR\}/src/plugin/config/config_910 - LIBDRIVER="driver/lib64/driver" - if [ ${ASCNED_TYPE} == "310" ]; then - TYPE=Ascend310 - LD_LIBRARY_PATH_PARA1=${ASCNED_INSTALL_PATH}/driver/lib64 - PKG_PATH=${TOP_DIR}/src/plugin/config/config_310 - PKG_PATH_STRING=\$\{TOP_DIR\}/src/plugin/config/config_310 - LIBDRIVER="/driver/lib64" - sed -i "s#ascendplugin --useAscendDocker=\${USE_ASCEND_DOCKER}#ascendplugin --mode=ascend310 --useAscendDocker=${USE_ASCEND_DOCKER}#g" ${TOP_DIR}/ascendplugin.yaml - fi - sed -i "s/Ascend[0-9]\{3\}/${TYPE}/g" ${TOP_DIR}/ascendplugin.yaml - sed -i "s#ath: /usr/local/Ascend/driver#ath: ${ASCNED_INSTALL_PATH}/driver#g" ${TOP_DIR}/ascendplugin.yaml - sed -i "/^ENV LD_LIBRARY_PATH /c ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH_PARA1}:${LD_LIBRARY_PATH_PARA2}/common" ${TOP_DIR}/Dockerfile - sed -i "/^ENV USE_ASCEND_DOCKER /c ENV USE_ASCEND_DOCKER ${USE_ASCEND_DOCKER}" ${TOP_DIR}/Dockerfile - sed -i "/^libdriver=/c libdriver=$\{prefix\}/${LIBDRIVER}" ${PKG_PATH}/ascend_device_plugin.pc - sed -i "/^prefix=/c prefix=${ASCNED_INSTALL_PATH}" ${PKG_PATH}/ascend_device_plugin.pc - sed -i "/^CONFIGDIR=/c CONFIGDIR=${PKG_PATH_STRING}" ${CUR_DIR}/build_in_docker.sh - ``` - -4. Run the following commands to generate a binary file and image file \(use the actual script name\): - - Select **build\_910.sh** for Ascend 910 and select **build\_310.sh** for Ascend 310. - - **cd** _/home/test/_ascend-device-plugin**/build**/ - - **chmod +x** _build\_XXX.sh_ - - **dos2unix** _build\_XXX.sh_ - - **./**_build\_XXX.sh_ **dockerimages** - -5. Run the following command to view the generated software package: - - **ll** _/home/test/_ascend-device-plugin**/output** - - The software package name for the x86 environment and that for the ARM environment are different. The following uses the ARM environment as an example. - - >![](figures/icon-note.gif) **NOTE:** - >- **Ascend-K8sDevicePlugin-**_xxx_**-arm64-Docker.tar.gz**: K8s device plugin image. - >- **Ascend-K8sDevicePlugin-**_xxx_**-arm64-Linux.tar.gz**: binary installation package of the K8s device plugin. - - ``` - drwxr-xr-x 2 root root 4096 Jun 8 18:42 ./ - drwxr-xr-x 9 root root 4096 Jun 8 17:12 ../ - -rw-r--r-- 1 root root 29584705 Jun 9 10:37 Ascend-K8sDevicePlugin-xxx-arm64-Docker.tar.gz - -rwxr-xr-x 1 root root 6721073 Jun 9 16:20 Ascend-K8sDevicePlugin-xxx-arm64-Linux.tar.gz - ``` - - -

Compiling the Ascend Device Plugin

- ## Procedure -1. Run the following command to install the latest pkg-config tool: - - **apt-get install -y pkg-config** - -2. Run the following commands to set environment variables: +1. Set environment variables. **export GO111MODULE=on** @@ -137,9 +39,10 @@ You can modify the configuration parameters during compilation by running a shel **export GONOSUMDB=\*** >![](figures/icon-note.gif) **NOTE:** - >Use the actual GOPROXY proxy address. You can run the **go mod download** command in the **ascend-device-plugin** directory to check the address. + >- Use the actual GOPROXY proxy address. You can run the **go mod download** command in the **ascend-device-plugin** directory to check the address. + >- If no error information is displayed, the proxy is set successfully. -3. Go to the **ascend-device-plugin** directory and run the following command to modify the YAML file: +2. Go to the **ascend-device-plugin** directory and run the following command to modify the YAML file: - Common YAML file **vi ascendplugin.yaml** @@ -166,7 +69,7 @@ You can modify the configuration parameters during compilation by running a shel tolerations: - key: CriticalAddonsOnly operator: Exists - - key: huawei.com/Ascend910 #Resource name. Set the value based on the chip type. + - key: huawei.com/Ascend910 #Resource name. Set the value based on the processor type. operator: Exists effect: NoSchedule - key: "ascendplugin" @@ -175,9 +78,9 @@ You can modify the configuration parameters during compilation by running a shel effect: NoSchedule priorityClassName: "system-node-critical" nodeSelector: - accelerator: huawei-Ascend910 #Set the label name based on the chip type. + accelerator: huawei-Ascend910 #Set the label name based on the processor type. containers: - - image: ascend-device-plugin:v1.0.1 #Image name and version, which must be the same as the settings in build_common.sh. + - image: ascend-device-plugin:v1.0.1 #Image name and version name: device-plugin-01 resources: requests: @@ -187,7 +90,7 @@ You can modify the configuration parameters during compilation by running a shel memory: 500Mi cpu: 500m command: [ "/bin/bash", "-c", "--"] - args: [ "./build/build_in_docker.sh;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER}" ] #Add --mode=ascend310 if Ascend310 is used. + args: [ "ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER}" ] securityContext: privileged: true imagePullPolicy: Never @@ -211,7 +114,7 @@ You can modify the configuration parameters during compilation by running a shel ``` - - The YAML file used by MindX DL + - YAML file of MindX DL **ascendplugin-volcano.yaml** @@ -261,7 +164,7 @@ You can modify the configuration parameters during compilation by running a shel nodeSelector: accelerator: huawei-Ascend910 containers: - - image: ascend-k8sdeviceplugin:V20.1.0 #Image name and version, which must be the same as the settings in build_common.sh. + - image: ascend-k8sdeviceplugin:v0.0.1 #Image name and version name: device-plugin-01 resources: requests: @@ -271,7 +174,7 @@ You can modify the configuration parameters during compilation by running a shel memory: 500Mi cpu: 500m command: [ "/bin/bash", "-c", "--"] - args: [ "./build/build_in_docker.sh;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER} --volcanoType=true" ] #Add --mode=ascend310 if Ascend310 is used. + args: [ "ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER} --volcanoType=true" ] securityContext: privileged: true imagePullPolicy: Never @@ -300,71 +203,36 @@ You can modify the configuration parameters during compilation by running a shel ``` -4. Run the following command to edit the **Dockerfile** file and change the image name and version to the obtained values: - **vi **_/home/test/_ascend-device-plugin**/Dockerfile** +3. Run the following command to edit the **Dockerfile** file and change the image name and version to the obtained values: + + **vi** _/home/test/_ascend-device-plugin**/Dockerfile** ``` - #Select the basic image with go compilation. You can run the docker images command to query the basic image. - FROM golang:1.13.11-buster as build - + #Select the basic image as required. You can run the docker images command to query the basic image. + FROM ubuntu:18.04 as build #Specify whether to use Ascend Docker. The default value is true. Change it to false. - ENV USE_ASCEND_DOCKER false + ENV USE_ASCEND_DOCKER true - ENV GOPATH /usr/app/ - - ENV GO111MODULE off - - ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH - #Directory where libdrvdsmi_host.so is located. The directories of Ascend 310 and Ascend 910 are different. ENV LD_LIBRARY_PATH /usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/driver/lib64/common - RUN mkdir -p /usr/app/src/ascend-device-plugin + ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/Ascend/driver/lib64/ - COPY . /usr/app/src/Ascend-device-plugin + COPY ./output/ascendplugin /usr/local/bin/ - WORKDIR /usr/app/src/Ascend-device-plugin ``` -5. Go to the directory where the **ascend\_device\_plugin.pc** file is stored and run the following command to check whether the following paths are correct. - - - Ascend 310 path: **ascend-device-plugin/src/plugin/config/config\_310** - - Ascend 910 path: **ascend-device-plugin/src/plugin/config/config\_910** - - **vi ascend\_device\_plugin.pc** - - ``` - #Package Information for pkg-config - #Set the value to the actual driver installation directory. - prefix=/usr/local/Ascend - #Change the value to the actual DSMI dynamic library address. - libdriver=${prefix}/driver/lib64 - #Directory of the DSMI driver header file dsmi_common_interface.h. - includedir=${prefix}/driver/kernel/inc/driver/ - Name: ascend_docker_plugin - Description: Ascend device plugin - Version: 0.0.1 - Libs: -L${libdriver}/ -ldrvdsmi_host - Cflags: -I${includedir} - ``` - - >![](figures/icon-note.gif) **NOTE:** - >You can change the value of **docker\_images\_name** in **build\_common.sh** in the **build** directory to change the plugin image name. Ensure that the value is the same as the setting in **ascendplugin.yaml**. - -6. Run the following commands to generate a binary file and image file \(use the actual script name\): - - Select **build\_910.sh** for Ascend 910 and select **build\_310.sh** for Ascend 310. +4. Run the following commands to generate a binary file and image file \(use the actual script name\): **cd** _/home/test/_ascend-device-plugin**/build**/ - **chmod +x** _build\_XXX.sh_ - - **dos2unix** _build\_XXX.sh_ + **chmod +x build.sh** - **./**_build\_XXX.sh_ **dockerimages** + **dos2unix build.sh** + **./build.sh dockerimages** -1. Run the following command to view the generated software package: +5. Run the following command to view the generated software package: **ll** _/home/test/_ascend-device-plugin**/output** @@ -378,11 +246,11 @@ You can modify the configuration parameters during compilation by running a shel drwxr-xr-x 2 root root 4096 Jun 8 18:42 ./ drwxr-xr-x 9 root root 4096 Jun 8 17:12 ../ -rw-r--r-- 1 root root 29584705 Jun 9 10:37 Ascend-K8sDevicePlugin-xxx-arm64-Docker.tar.gz - -rwxr-xr-x 1 root root 6721073 Jun 9 16:20 Ascend-K8sDevicePlugin-xxx-arm64-Linux.tar.gz + -rw-r--r-- 1 root root 6721073 Jun 9 16:20 Ascend-K8sDevicePlugin-xxx-arm64-Linux.tar.gz ``` -

Creating DaemonSet.

+

Creating DaemonSet

## Procedure @@ -400,18 +268,18 @@ You can modify the configuration parameters during compilation by running a shel **cd** _/home/test/_**ascend-device-plugin/output** - **docker load < **_Ascend-K8sDevicePlugin-xxx-arm64-Docker.tar.gz_ + **docker load** **-i** _Ascend-K8sDevicePlugin-xxx-arm64-Docker.tar.gz_ 3. Run the following command to label the node with Ascend 910 or Ascend 310: - **kubectl label nodes **_localhost.localdomain_** accelerator=**_huawei-Ascend910_ + **kubectl label nodes** _localhost.localdomain_ **accelerator=**_huawei-Ascend910_ **localhost.localdomain** is the name of the node with Ascend 910 or Ascend 310. You can run the **kubectl get node** command to view the node name. - The label name must be the same as the name specified by **nodeSelector** in [3](#compiling-the-ascend-device-plugin-0.md#en-us_topic_0252775101_li8538035183714). + The label name must be the same as the **nodeSelector** label name in the YAML file in "Compiling the Ascend Device Plugin." >![](figures/icon-note.gif) **NOTE:** - >You can perform [2](#en-us_topic_0269670254_li1372334715567) to [3](#en-us_topic_0269670254_li26268471380) to add new nodes to the cluster. + >If the K8s plugin needs to be deployed on a new node, perform [2](#en-us_topic_0269670254_li1372334715567) to [3](#en-us_topic_0269670254_li26268471380). 4. Run the following commands to deploy DaemonSet: @@ -422,8 +290,7 @@ You can modify the configuration parameters during compilation by running a shel >![](figures/icon-note.gif) **NOTE:** >To view the node deployment information, you need to wait for several minutes after the deployment is complete. - -1. Run the following command to view the node device deployment information: +5. Run the following command to view the node device deployment information: **kubectl describe node** @@ -453,7 +320,7 @@ You can modify the configuration parameters during compilation by running a shel 1. Go to the **ascend-device-plugin** directory and run the following command to edit the pod configuration file: - **cd**_ /home/test/_**ascend-device-plugin** + **cd** _/home/test/_**ascend-device-plugin** **vi ascend.yaml** @@ -494,8 +361,7 @@ You can modify the configuration parameters during compilation by running a shel path: /usr/local/Ascend/driver #Change the value based on the actual driver path. ``` - -1. Run the following command to create a pod: +2. Run the following command to create a pod: **kubectl apply -f ascend.yaml** @@ -503,10 +369,9 @@ You can modify the configuration parameters during compilation by running a shel >To delete the pod, run the following command: >**kubectl delete -f** **ascend.yaml** +3. Run the following commands to access the pod and view the allocation information: -1. Run the following commands to access the pod and view the allocation information: - - **kubectl exec -it **_Pod name_** bash** + **kubectl exec -it** _Pod name_ **bash** The pod name is the one configured in [1](#en-us_topic_0269670251_en-us_topic_0249483204_li104071617503). @@ -521,48 +386,49 @@ You can modify the configuration parameters during compilation by running a shel

Environment Dependencies

-**Table 1** Environment dependencies +**Table 1** Environment Dependencies -

Check Item

+ - - - - - - - - - - - - - - - @@ -572,23 +438,12 @@ You can modify the configuration parameters during compilation by running a shel ``` ├── build # Compilation scripts -│ ├── build_310.sh -│ ├── build_910.sh -│ ├── build_common.sh -│ ├── build_in_docker.sh -│ ├── build.sh -│ ├── deploy.sh -│ └── sample_check.sh +│ └── build.sh ├── output # Compilation result directory. ├── src # Source code directory. │ └── plugin │ │ ├── cmd/ascendplugin -│ │ │ └── ascend_plugin.go -│ │ ├── config -│ │ │ ├── config_310 -│ │ │ │ └── ascend_device_plugin.pc -│ │ │ └── config_910 -│ │ │ └── ascend_device_plugin.pc +│ │ │ └── ascend_plugin.go │ │ └── pkg/npu/huawei ├── test # Test directory. ├── Dockerfile # Image file. @@ -607,19 +462,26 @@ You can modify the configuration parameters during compilation by running a shel

Version Updates

-

Check Item

Requirement

+

Requirement

dos2unix

+

dos2unix

Run the dos2unix --version command to check that the software has been installed. There is no requirement on the version.

+

Run the dos2unix --version command to check that the software has been installed. There is no requirement on the version.

Driver version of the RUN package

+

Driver version of the RUN package

Go to the directory of the driver (for example, /usr/local/Ascend/driver) and run the cat version.info command to confirm that the driver version is 1.73 or later.

+

Go to the directory of the driver (for example, /usr/local/Ascend/driver) and run the cat version.info command to confirm that the driver version is 1.73 or later.

Go language environment

+

Go language environment

Run the go version command to check that the version is 1.14.3 or later.

+

Run the go version command to confirm that the version is 1.14.3 or later.

gcc version

+

gcc version

Run the gcc --version command to check that the version is 7.3.0 or later.

+

Run the gcc --version command to confirm that the version is 7.3.0 or later.

Kubernetes version

+

Kubernetes version

Run the kubectl version command to check that the version is 1.13.0 or later.

+

1.17.x. Select the latest bugfix version.

+

You can run the kubectl version command to view the version.

Docker environment

+

Docker environment

Run the docker info command to check that Docker has been installed.

+

Run the docker info command to confirm that Docker has been installed.

root user permission

+

root user permission

Check that the root user permission of the BMS is available.

+

Check that the root user permission of the BMS is available.

Version

+ - - - + + + + - - diff --git a/README.zh.md b/README.zh.md index 94259dda69870ff23391390f56dcf49afb968f7c..1aec1669b29b407687e070f3689f198d34146b08 100644 --- a/README.zh.md +++ b/README.zh.md @@ -2,8 +2,6 @@ - [Ascend Device Plugin](#Ascend-Device-Plugin.md) - [组件介绍](#组件介绍.md) - [编译Ascend Device Plugin](#编译Ascend-Device-Plugin.md) - - [快速编译Ascend Device Plugin](#快速编译Ascend-Device-Plugin.md) - - [编译Ascend Device Plugin](#编译Ascend-Device-Plugin-0.md) - [创建DaemonSet](#创建DaemonSet.md) - [创建业务容器](#创建业务容器.md) - [环境依赖](#环境依赖.md) @@ -30,105 +28,9 @@

编译Ascend Device Plugin

-- **[快速编译Ascend Device Plugin](#快速编译Ascend-Device-Plugin.md)** -将修改编译过程中的配置参数通过执行一个shell脚本来完成,用户只需要修改脚本中的参数,就能快速完成编译。 -- **[编译Ascend Device Plugin](#编译Ascend-Device-Plugin-0.md)** - - -

快速编译Ascend Device Plugin

- -将修改编译过程中的配置参数通过执行一个shell脚本来完成,用户只需要修改脚本中的参数,就能快速完成编译。 - -## 操作步骤 - -1. 执行以下命令安装最新版本的pkg-config。 - - **apt-get install -y pkg-config** - -2. 执行以下命令,设置环境变量。 - - **export GO111MODULE=on** - - **export GOPROXY=**_代理地址_ - - **export GONOSUMDB=\*** - - >![](figures/icon-note.gif) **说明:** - >GOPROXY代理地址请根据实际选择,可通过在ascend-device-plugin目录下执行**go mod download**命令进行检查。 - -3. 在“./build/”中创建并执行shell文件。 - - ``` - #!/bin/bash - ASCNED_TYPE=910 #根据芯片类型选择310或910。 - ASCNED_INSTALL_PATH=/usr/local/Ascend #驱动安装路径,根据实际修改。 - USE_ASCEND_DOCKER=false #是否使用昇腾Docker。 - - - CUR_DIR=$(dirname $(readlink -f $0)) - TOP_DIR=$(realpath ${CUR_DIR}/..) - LD_LIBRARY_PATH_PARA1=${ASCNED_INSTALL_PATH}/driver/lib64/driver - LD_LIBRARY_PATH_PARA2=${ASCNED_INSTALL_PATH}/driver/lib64 - TYPE=Ascend910 - PKG_PATH=${TOP_DIR}/src/plugin/config/config_910 - PKG_PATH_STRING=\$\{TOP_DIR\}/src/plugin/config/config_910 - LIBDRIVER="driver/lib64/driver" - if [ ${ASCNED_TYPE} == "310" ]; then - TYPE=Ascend310 - LD_LIBRARY_PATH_PARA1=${ASCNED_INSTALL_PATH}/driver/lib64 - PKG_PATH=${TOP_DIR}/src/plugin/config/config_310 - PKG_PATH_STRING=\$\{TOP_DIR\}/src/plugin/config/config_310 - LIBDRIVER="/driver/lib64" - sed -i "s#ascendplugin --useAscendDocker=\${USE_ASCEND_DOCKER}#ascendplugin --mode=ascend310 --useAscendDocker=${USE_ASCEND_DOCKER}#g" ${TOP_DIR}/ascendplugin.yaml - fi - sed -i "s/Ascend[0-9]\{3\}/${TYPE}/g" ${TOP_DIR}/ascendplugin.yaml - sed -i "s#ath: /usr/local/Ascend/driver#ath: ${ASCNED_INSTALL_PATH}/driver#g" ${TOP_DIR}/ascendplugin.yaml - sed -i "/^ENV LD_LIBRARY_PATH /c ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH_PARA1}:${LD_LIBRARY_PATH_PARA2}/common" ${TOP_DIR}/Dockerfile - sed -i "/^ENV USE_ASCEND_DOCKER /c ENV USE_ASCEND_DOCKER ${USE_ASCEND_DOCKER}" ${TOP_DIR}/Dockerfile - sed -i "/^libdriver=/c libdriver=$\{prefix\}/${LIBDRIVER}" ${PKG_PATH}/ascend_device_plugin.pc - sed -i "/^prefix=/c prefix=${ASCNED_INSTALL_PATH}" ${PKG_PATH}/ascend_device_plugin.pc - sed -i "/^CONFIGDIR=/c CONFIGDIR=${PKG_PATH_STRING}" ${CUR_DIR}/build_in_docker.sh - ``` - -4. 执行以下命令,根据实际选择执行的脚本,生成二进制和镜像文件。 - - Ascend 910请选择build\_910.sh,Ascend 310请选择build\_310.sh。 - - **cd** _/home/test/_ascend-device-plugin**/build**/ - - **chmod +x** _build\_XXX.sh_ - - **dos2unix** _build\_XXX.sh_ - - **./**_build\_XXX.sh_ **dockerimages** - -5. 执行以下命令,查看生成的软件包。 - - **ll** _/home/test/_ascend-device-plugin**/output** - - x86和ARM生成的软件包名不同,以下示例为ARM环境: - - >![](figures/icon-note.gif) **说明:** - >- **Ascend-K8sDevicePlugin-**_xxx_**-arm64-Docker.tar.gz**:K8s设备插件镜像。 - >- **Ascend-K8sDevicePlugin-**_xxx_**-arm64-Linux.tar.gz**:K8s设备插件二进制安装包。 - - ``` - drwxr-xr-x 2 root root 4096 Jun 8 18:42 ./ - drwxr-xr-x 9 root root 4096 Jun 8 17:12 ../ - -rw-r--r-- 1 root root 29584705 Jun 9 10:37 Ascend-K8sDevicePlugin-xxx-arm64-Docker.tar.gz - -rwxr-xr-x 1 root root 6721073 Jun 9 16:20 Ascend-K8sDevicePlugin-xxx-arm64-Linux.tar.gz - ``` - - -

编译Ascend Device Plugin

- ## 操作步骤 -1. 执行以下命令安装最新版本的pkg-config。 - - **apt-get install -y pkg-config** - -2. 执行以下命令,设置环境变量。 +1. 执行以下命令,设置环境变量。 **export GO111MODULE=on** @@ -137,9 +39,10 @@ **export GONOSUMDB=\*** >![](figures/icon-note.gif) **说明:** - >GOPROXY代理地址请根据实际选择,可通过在ascend-device-plugin目录下执行**go mod download**命令进行检查。 + >- GOPROXY代理地址请根据实际选择,可通过在ascend-device-plugin目录下执行**go mod download**命令进行检查。 + >- 若无返回错误信息,则表示代理设置成功。 -3. 进入ascend-device-plugin目录,执行以下命令,修改yaml文件。 +2. 进入ascend-device-plugin目录,执行以下命令,修改yaml文件。 - 通用yaml文件。 **vi ascendplugin.yaml** @@ -177,7 +80,7 @@ nodeSelector: accelerator: huawei-Ascend910 #根据芯片类型设置标签名称。 containers: - - image: ascend-device-plugin:v1.0.1 #镜像名称及版本号,需要和build_common.sh中保持一致。 + - image: ascend-device-plugin:v1.0.1 #镜像名称及版本号 name: device-plugin-01 resources: requests: @@ -187,7 +90,7 @@ memory: 500Mi cpu: 500m command: [ "/bin/bash", "-c", "--"] - args: [ "./build/build_in_docker.sh;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER}" ] #使用Ascend310,则需要增加--mode=ascend310 + args: [ "ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER}" ] securityContext: privileged: true imagePullPolicy: Never @@ -211,7 +114,7 @@ ``` - - Atlas深度学习组件使用yaml文件。 + - MindX DL使用yaml文件。 **ascendplugin-volcano.yaml** @@ -261,7 +164,7 @@ nodeSelector: accelerator: huawei-Ascend910 containers: - - image: ascend-k8sdeviceplugin:V20.1.0 #镜像名称及版本号,需要和build_common.sh中保持一致。 + - image: ascend-k8sdeviceplugin:v0.0.1 #镜像名称及版本号。 name: device-plugin-01 resources: requests: @@ -271,7 +174,7 @@ memory: 500Mi cpu: 500m command: [ "/bin/bash", "-c", "--"] - args: [ "./build/build_in_docker.sh;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER} --volcanoType=true" ] #使用Ascend310,则需要增加--mode=ascend310 + args: [ "ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER} --volcanoType=true" ] securityContext: privileged: true imagePullPolicy: Never @@ -300,71 +203,36 @@ ``` -4. 执行以下命令,编辑Dockerfile文件,将镜像修改为查询的镜像名及版本号。 - **vi **_/home/test/_ascend-device-plugin**/Dockerfile** +3. 执行以下命令,编辑Dockerfile文件,将镜像修改为查询的镜像名及版本号。 + + **vi** _/home/test/_ascend-device-plugin**/Dockerfile** ``` - #用户根据实际选择需要使用的带Go编译的基础镜像,可通过docker images命令查询。 - FROM golang:1.13.11-buster as build - + #用户根据实际选择基础镜像,可通过docker images命令查询。 + FROM ubuntu:18.04 as build #是否使用昇腾Docker,默认为true,请修改为false。 - ENV USE_ASCEND_DOCKER false + ENV USE_ASCEND_DOCKER true - ENV GOPATH /usr/app/ - - ENV GO111MODULE off - - ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH - #libdrvdsmi_host.so所在目录,Ascend 310和Ascend 910目录不同。 ENV LD_LIBRARY_PATH /usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/driver/lib64/common - RUN mkdir -p /usr/app/src/ascend-device-plugin + ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/Ascend/driver/lib64/ - COPY . /usr/app/src/Ascend-device-plugin + COPY ./output/ascendplugin /usr/local/bin/ - WORKDIR /usr/app/src/Ascend-device-plugin ``` -5. 进入ascend\_device\_plugin.pc文件所在目录,执行以下命令,查看以下路径是否正确,根据实际修改。 - - - Ascend 310目录:ascend-device-plugin/src/plugin/config/config\_310 - - Ascend 910目录:ascend-device-plugin/src/plugin/config/config\_910 - - **vi ascend\_device\_plugin.pc** - - ``` - #Package Information for pkg-config - #驱动安装目录,根据实际填写。 - prefix=/usr/local/Ascend - #dsmi动态库地址,根据实际修改。 - libdriver=${prefix}/driver/lib64 - #dsmi驱动头文件dsmi_common_interface.h所在目录。 - includedir=${prefix}/driver/kernel/inc/driver/ - Name: ascend_docker_plugin - Description: Ascend device plugin - Version: 0.0.1 - Libs: -L${libdriver}/ -ldrvdsmi_host - Cflags: -I${includedir} - ``` - - >![](figures/icon-note.gif) **说明:** - >支持修改插件镜像的名称,build目录下build\_common.sh中修改“docker\_images\_name”即可,需要和ascendplugin.yaml中保持一致。 - -6. 执行以下命令,根据实际选择执行的脚本,生成二进制和镜像文件。 - - Ascend 910请选择build\_910.sh,Ascend 310请选择build\_310.sh。 +4. 执行以下命令,根据实际选择执行的脚本,生成二进制和镜像文件。 **cd** _/home/test/_ascend-device-plugin**/build**/ - **chmod +x** _build\_XXX.sh_ - - **dos2unix** _build\_XXX.sh_ + **chmod +x build.sh** - **./**_build\_XXX.sh_ **dockerimages** + **dos2unix build.sh** + **./build.sh dockerimages** -1. 执行以下命令,查看生成的软件包。 +5. 执行以下命令,查看生成的软件包。 **ll** _/home/test/_ascend-device-plugin**/output** @@ -378,7 +246,7 @@ drwxr-xr-x 2 root root 4096 Jun 8 18:42 ./ drwxr-xr-x 9 root root 4096 Jun 8 17:12 ../ -rw-r--r-- 1 root root 29584705 Jun 9 10:37 Ascend-K8sDevicePlugin-xxx-arm64-Docker.tar.gz - -rwxr-xr-x 1 root root 6721073 Jun 9 16:20 Ascend-K8sDevicePlugin-xxx-arm64-Linux.tar.gz + -rw-r--r-- 1 root root 6721073 Jun 9 16:20 Ascend-K8sDevicePlugin-xxx-arm64-Linux.tar.gz ``` @@ -400,18 +268,18 @@ **cd** _/home/test/_**ascend-device-plugin/output** - **docker load < **_Ascend-K8sDevicePlugin-xxx-arm64-Docker.tar.gz_ + **docker load** **-i** _Ascend-K8sDevicePlugin-xxx-arm64-Docker.tar.gz_ 3. 执行如下命令,给带有Ascend 910(或Ascend 310)的节点打标签。 - **kubectl label nodes **_localhost.localdomain_** accelerator=**_huawei-Ascend910_ + **kubectl label nodes** _localhost.localdomain_ **accelerator=**_huawei-Ascend910_ localhost.localdomain为有Ascend 910(或Ascend 310)的节点名称,可通过**kubectl get node**命令查看。 - 标签名称需要和[3](#编译Ascend-Device-Plugin-0.md#zh-cn_topic_0252775101_li8538035183714)中的nodeSelector标签名称保持一致。 + 标签名称需要和“编译Ascend Device Plugin”章节中yaml文件里的nodeSelector标签名称保持一致。 >![](figures/icon-note.gif) **说明:** - >如需扩容集群节点,请参考[2](#zh-cn_topic_0269670254_li1372334715567)\~[3](#zh-cn_topic_0269670254_li26268471380)操作将新节点加入集群。 + >如有新节点需要部署K8s插件,请执行[2](#zh-cn_topic_0269670254_li1372334715567)\~[3](#zh-cn_topic_0269670254_li26268471380)。 4. 执行以下命令,部署DaemonSet。 @@ -422,8 +290,7 @@ >![](figures/icon-note.gif) **说明:** >部署完成后需要等待几分钟,才能看到节点设备部署信息。 - -1. 执行如下命令,查看节点设备部署信息。 +5. 执行如下命令,查看节点设备部署信息。 **kubectl describe node** @@ -453,7 +320,7 @@ 1. 进入ascend-device-plugin目录,执行如下命令编辑Pod的配置文件,根据文件模板编写配置文件。 - **cd**_ /home/test/_**ascend-device-plugin** + **cd** _/home/test/_**ascend-device-plugin** **vi ascend.yaml** @@ -494,8 +361,7 @@ path: /usr/local/Ascend/driver #根据Driver实际所在路径修改。 ``` - -1. 执行如下命令,创建Pod。 +2. 执行如下命令,创建Pod。 **kubectl apply -f ascend.yaml** @@ -503,10 +369,9 @@ >如需删除请执行以下命令: >**kubectl delete -f** **ascend.yaml** +3. 分别执行以下命令,进入Pod查看分配信息。 -1. 分别执行以下命令,进入Pod查看分配信息。 - - **kubectl exec -it **_pod名称_** bash** + **kubectl exec -it** _pod名称_ **bash** Pod名称为[1](#zh-cn_topic_0269670251_zh-cn_topic_0249483204_li104071617503)中配置的Pod名称。 @@ -524,45 +389,46 @@ **表 1** 环境依赖 -

Version

Date

+

Date

Description

+

Description

V20.1.0

+

v20.2.0

+

2021-01-08

+

Optimized the description in "Creating DaemonSet."

+

v20.2.0

2020-09-30

+

2020-11-18

This issue is the first official release.

+

This is the first official release.

检查项

+ - - - - - - - - - - - - - - - @@ -571,24 +437,13 @@

目录结构

``` -├── build # 编译脚本 -│ ├── build_310.sh -│ ├── build_910.sh -│ ├── build_common.sh -│ ├── build_in_docker.sh -│ ├── build.sh -│ ├── deploy.sh -│ └── sample_check.sh +├── build # 编译脚本 +│ └── build.sh ├── output # 编译结果目录 ├── src # 源代码目录 │ └── plugin │ │ ├── cmd/ascendplugin -│ │ │ └── ascend_plugin.go -│ │ ├── config -│ │ │ ├── config_310 -│ │ │ │ └── ascend_device_plugin.pc -│ │ │ └── config_910 -│ │ │ └── ascend_device_plugin.pc +│ │ │ └── ascend_plugin.go │ │ └── pkg/npu/huawei ├── test # 测试目录 ├── Dockerfile # 镜像文件 @@ -607,19 +462,26 @@

版本更新记录

-

检查项

要求

+

要求

dos2unix

+

dos2unix

已安装(无版本要求),执行dos2unix --version命令查看。

+

已安装(无版本要求),执行dos2unix --version命令查看。

run包的驱动版本

+

run包的驱动版本

大于等于1.73,进入驱动所在路径(如“/usr/local/Ascend/driver”),执行cat version.info命令查看。

+

大于等于1.73,进入驱动所在路径(如“/usr/local/Ascend/driver”),执行cat version.info命令查看。

Go语言环境版本

+

Go语言环境版本

大于等于1.14.3,执行go version命令查看。

+

大于等于1.14.3,执行go version命令查看。

gcc版本

+

gcc版本

大于等于7.3.0,执行gcc --version命令查看。

+

大于等于7.3.0,执行gcc --version命令查看。

Kubernetes版本

+

Kubernetes版本

大于等于1.13.0,执行kubectl version命令查看。

+

1.17.x,建议选择最新的bugfix版本。

+

执行kubectl version命令查看。

Docker环境

+

Docker环境

已安装Docker,执行docker info命令查看。

+

已安装Docker,执行docker info命令查看。

root用户

+

root用户

裸机拥有root用户权限。

+

裸机拥有root用户权限。

版本

+ - - - + + + + - - diff --git a/ascendplugin-310.yaml b/ascendplugin-310.yaml index e41e66087b90c87376d912bab14aea37a2612558..7e63e52e70dfc4d30c1dba925e35fcc4c1fb9f68 100644 --- a/ascendplugin-310.yaml +++ b/ascendplugin-310.yaml @@ -30,7 +30,7 @@ spec: nodeSelector: accelerator: huawei-Ascend310 containers: - - image: ascend-k8sdeviceplugin:V20.1.0 + - image: ascend-k8sdeviceplugin:v20.2.0 name: device-plugin-01 resources: requests: @@ -40,7 +40,7 @@ spec: memory: 500Mi cpu: 500m command: [ "/bin/bash", "-c", "--"] - args: [ "umask 0027;./build/build_in_docker.sh;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER}" ] + args: [ "umask 0027;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER}" ] securityContext: privileged: true imagePullPolicy: Never @@ -51,8 +51,6 @@ spec: mountPath: /usr/local/Ascend/driver - name: log-path mountPath: /var/log/devicePlugin - - name: timezone - mountPath: /etc/timezone - name: localtime mountPath: /etc/localtime volumes: @@ -65,9 +63,6 @@ spec: - name: log-path hostPath: path: /var/log/devicePlugin - - name: timezone - hostPath: - path: /etc/timezone - name: localtime hostPath: path: /etc/localtime \ No newline at end of file diff --git a/ascendplugin-volcano.yaml b/ascendplugin-volcano.yaml index dd96fffe25f315abf45199ed7373d327056e3451..6a19edc6a4b8eb8f2ee01c28ef63cecc16f36514 100644 --- a/ascendplugin-volcano.yaml +++ b/ascendplugin-volcano.yaml @@ -43,7 +43,7 @@ spec: nodeSelector: accelerator: huawei-Ascend910 containers: - - image: ascend-k8sdeviceplugin:V20.1.0 + - image: ascend-k8sdeviceplugin:v20.2.0 name: device-plugin-01 resources: requests: @@ -53,7 +53,7 @@ spec: memory: 500Mi cpu: 500m command: [ "/bin/bash", "-c", "--"] - args: [ "umask 0027;./build/build_in_docker.sh;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER} --volcanoType=true" ] + args: [ "umask 0027;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER} --volcanoType=true" ] securityContext: privileged: true imagePullPolicy: Never @@ -64,8 +64,6 @@ spec: mountPath: /usr/local/Ascend/driver - name: log-path mountPath: /var/log/devicePlugin - - name: timezone - mountPath: /etc/timezone - name: localtime mountPath: /etc/localtime env: @@ -83,9 +81,6 @@ spec: - name: log-path hostPath: path: /var/log/devicePlugin - - name: timezone - hostPath: - path: /etc/timezone - name: localtime hostPath: path: /etc/localtime diff --git a/ascendplugin.yaml b/ascendplugin.yaml index 019a671622a4971c433d293dc6f825929af2da15..8c258f90e8a7c1e2c97c40e30aee8b3226f69e4e 100644 --- a/ascendplugin.yaml +++ b/ascendplugin.yaml @@ -30,7 +30,7 @@ spec: nodeSelector: accelerator: huawei-Ascend910 containers: - - image: ascend-k8sdeviceplugin:V20.1.0 + - image: ascend-k8sdeviceplugin:v20.2.0 name: device-plugin-01 resources: requests: @@ -40,7 +40,7 @@ spec: memory: 500Mi cpu: 500m command: [ "/bin/bash", "-c", "--"] - args: [ "umask 0027;./build/build_in_docker.sh;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER}" ] + args: [ "umask 0027;ascendplugin --useAscendDocker=${USE_ASCEND_DOCKER}" ] securityContext: privileged: true imagePullPolicy: Never diff --git a/build/build.sh b/build/build.sh index 6dc8202f49a8d8c8647567cd47c232b2a56d1635..fca067d9934d44bc18b1b2ad8c46f922db7619d0 100644 --- a/build/build.sh +++ b/build/build.sh @@ -1,13 +1,78 @@ #!/bin/bash -CUR_DIR=$(dirname $(readlink -f $0)) -TOP_DIR=$(realpath ${CUR_DIR}/..) -set -x -dos2unix build_310.sh -chmod 550 build_310.sh -bash -x ${CUR_DIR}/build_310.sh ci -rm -rf ${TOP_DIR}/output/* -cd ${TOP_DIR} -tar -zcvf ascend-device-plugin.tar.gz ./build ./output ./src \ - ./ascend.yaml ./ascendplugin.yaml ./docker_run.sh ./Dockerfile ./go.mod \ - ./README.zh.md ./'Open Source Software Notice.md' ./LICENSE -mv ascend-device-plugin.tar.gz ${TOP_DIR}/output/ \ No newline at end of file +# Perform build k8s-device-plugin +# Copyright @ Huawei Technologies CO., Ltd. 2020-2021. All rights reserved +set -e +CUR_DIR=$(dirname $(readlink -f "$0")) +TOP_DIR=$(realpath "${CUR_DIR}"/..) + +build_version="v20.2.0" +output_name="ascendplugin" +deploy_name="deploy.sh" +docker_images_name="ascend-k8sdeviceplugin:v20.2.0" +ostype=$(arch) +if [ "${ostype}" = "aarch64" ]; then + ostype="arm64" +else + ostype="amd64" +fi +tar_name="Ascend-K8sDevicePlugin-${build_version}-${ostype}-Linux.tar.gz" +docker_zip_name="Ascend-K8sDevicePlugin-${build_version}-${ostype}-Docker.tar.gz" +docker_type=nodocker +if [ "$1" == "dockerimages" ] || [ "$2" == "dockerimages" ]; then + DOCKER_TYPE=dockerimages +fi + +function clear_env() { + rm -rf ${TOP_DIR}/output/* +} + +function build_plugin() { + cd ${TOP_DIR}/src/plugin/cmd/ascendplugin + go build -ldflags "-X main.BuildName=${output_name} \ + -X main.BuildVersion=${build_version} \ + -buildid none \ + -s \ + -extldflags=-Wl,-z,relro,-z,now,-z,noexecstack" \ + -o "${output_name}" \ + -trimpath + + ls ${output_name} + if [ $? -ne 0 ]; then + echo "fail to find ascendplugin" + exit 1 + fi +} + +function mv_file() { + mv ${TOP_DIR}/src/plugin/cmd/ascendplugin/${output_name} ${TOP_DIR}/output/ + cp ${TOP_DIR}/other/${deploy_name} ${TOP_DIR}/output/ +} + +function zip_file(){ + cd ${TOP_DIR}/output + tar -zcvf ${tar_name} ${output_name} ${deploy_name} + if [ $? == 0 ]; then + echo "build device plugin success" + fi + rm -f ${output_name} ${deploy_name} +} + +function build_docker_images() +{ + cd ${TOP_DIR} + docker rmi ${docker_images_name} || true + docker build -t ${docker_images_name} . + docker save ${docker_images_name} | gzip > ${docker_zip_name} + mv ${docker_zip_name} ./output/ +} + +function main() { + clear_env + build_plugin + mv_file + if [ "${DOCKER_TYPE}" == "dockerimages" ]; then + build_docker_images + fi + zip_file +} +main \ No newline at end of file diff --git a/build/build_310.sh b/build/build_310.sh deleted file mode 100644 index 8b3c89f5d668497941484fb2d880201cd72c6d57..0000000000000000000000000000000000000000 --- a/build/build_310.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -CUR_DIR=$(dirname $(readlink -f $0)) -TOP_DIR=$(realpath ${CUR_DIR}/..) -DOWN_DRIVER_FILE="platform/Tuscany" -DRIVER_FILE="310driver" -CONFIGDIR=${TOP_DIR}/src/plugin/config/config_310 -SODIR=/usr/local/Ascend/driver/lib64 -BUILD_TYPE=build -DOCKER_TYPE=nodockerimages -if [ "$1" == "ci" ] || [ "$2" == "ci" ]; then - export GO111MODULE="on" - export GOPROXY="http://mirrors.tools.huawei.com/goproxy/" - export GONOSUMDB="*" - BUILD_TYPE=ci - SODIR=${TOP_DIR}/${DRIVER_FILE}/driver/lib64/ -fi - -if [ "$1" == "dockerimages" ] || [ "$2" == "dockerimages" ]; then - DOCKER_TYPE=dockerimages -fi - -chmod 550 build_common.sh -dos2unix build_common.sh -source build_common.sh - -function make_lib() { - ls ${TOP_DIR}/${DOWN_DRIVER_FILE} - plateform=$(arch) - chmod 550 ${TOP_DIR}/${DOWN_DRIVER_FILE}/Ascend310-driver-*.${plateform}.run - - ${TOP_DIR}/${DOWN_DRIVER_FILE}/Ascend310-driver-*${osname}*.${plateform}*.run \ - --noexec --extract=${TOP_DIR}/${DRIVER_FILE} - sed -i "/^prefix=/c prefix=${TOP_DIR}/${DRIVER_FILE}" ${CONFIGDIR}/${PC_File} -} - -function main() { - clear_env - if [ "${BUILD_TYPE}" = "ci" ]; then - make_lib - fi - build_plugin - mv_file - if [ "${DOCKER_TYPE}" == "dockerimages" ]; then - dos2unix ${CUR_DIR}/build_in_docker.sh - chmod 550 ${CUR_DIR}/build_in_docker.sh - build_docker_images - fi - zip_file -} -main diff --git a/build/build_910.sh b/build/build_910.sh deleted file mode 100644 index e0909b07a30d70b054570c78ad4693145f4bb990..0000000000000000000000000000000000000000 --- a/build/build_910.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -CUR_DIR=$(dirname $(readlink -f $0)) -TOP_DIR=$(realpath ${CUR_DIR}/..) -DOWN_DRIVER_FILE="platform/Tuscany" -DRIVER_FILE="910driver" -CONFIGDIR=${TOP_DIR}/src/plugin/config/config_910 -SODIR=/usr/local/Ascend/driver/lib64/driver -BUILD_TYPE=build -DOCKER_TYPE=nodocker -if [ "$1" == "ci" ] || [ "$2" == "ci" ]; then - export GO111MODULE="on" - export GOPROXY="http://mirrors.tools.huawei.com/goproxy/" - export GONOSUMDB="*" - BUILD_TYPE=ci - SODIR=${TOP_DIR}/${DRIVER_FILE}/driver/lib64/driver -fi -if [ "$1" == "dockerimages" ] || [ "$2" == "dockerimages" ]; then - DOCKER_TYPE=dockerimages -fi -chmod 550 build_common.sh -dos2unix build_common.sh -source build_common.sh - -function make_lib() { - ls ${TOP_DIR}/${DOWN_DRIVER_FILE} - plateform=$(arch) - chmod 550 ${TOP_DIR}/${DOWN_DRIVER_FILE}/Ascend910-driver-*.${plateform}.run - - ${TOP_DIR}/${DOWN_DRIVER_FILE}/Ascend910-driver-*${osname}*.${plateform}*.run \ - --noexec --extract=${TOP_DIR}/${DRIVER_FILE} - sed -i "/^prefix=/c prefix=${TOP_DIR}/${DRIVER_FILE}" ${CONFIGDIR}/${PC_File} -} -function main() { - clear_env - if [ "${BUILD_TYPE}" = "ci" ]; then - make_lib - fi - build_plugin - mv_file - if [ "${DOCKER_TYPE}" == "dockerimages" ]; then - dos2unix ${CUR_DIR}/build_in_docker.sh - chmod 550 ${CUR_DIR}/build_in_docker.sh - build_docker_images - fi - zip_file -} -main diff --git a/build/build_common.sh b/build/build_common.sh deleted file mode 100644 index eb2af52fadce13e50bc8458d6a5fd5aedeac9df5..0000000000000000000000000000000000000000 --- a/build/build_common.sh +++ /dev/null @@ -1,136 +0,0 @@ -#!/bin/bash - -build_version="V20.1.0" -build_time=$(date +'%Y-%m-%d_%T') -OUTPUT_NAME="ascendplugin" -DEPLOYNAME="deploy.sh" -DOCKER_FILE_NAME="Dockerfile" -PC_File="ascend_device_plugin.pc" -docker_images_name="ascend-k8sdeviceplugin:V20.1.0" - -osname=$(grep -i ^id= /etc/os-release| cut -d"=" -f2 | sed 's/"//g'); -ostype=$(arch) -if [ "${ostype}" = "aarch64" ]; then - ostype="arm64" -else - ostype="x86" -fi -PKGNAME="Ascend-k8s_device_plugin-${build_version}-${ostype}-linux.run" -TARNAME="Ascend-K8sDevicePlugin-${build_version}-${ostype}-Linux.tar.gz" -docker_zip_name="Ascend-K8sDevicePlugin-${build_version}-${ostype}-Docker.tar.gz" -# export so library path -export LD_LIBRARY_PATH=${SODIR}:${LD_LIBRARY_PATH} -export PKG_CONFIG_PATH=${CONFIGDIR}:$PKG_CONFIG_PATH - - -function clear_env() { - rm -rf ${TOP_DIR}/output/* - rm -rf ~/.cache/go-build - if [ ! -d "${TOP_DIR}/makerunout" ]; then - mkdir -p ${TOP_DIR}/makerunout - chmod 750 ${TOP_DIR}/makerunout - fi -} - - - -function build_plugin() { - - rm -rf /tmp/gobuildplguin - mkdir -p /tmp/gobuildplguin - chmod 750 /tmp/gobuildplguin - cd ${TOP_DIR}/src/plugin/cmd/ascendplugin - go build -ldflags "-X main.BuildName=${OUTPUT_NAME} \ - -X main.BuildVersion=${build_version} \ - -buildid none \ - -s \ - -tmpdir /tmp/gobuildplguin" \ - -o ${OUTPUT_NAME} \ - -trimpath - - ls ${OUTPUT_NAME} - if [ $? -ne 0 ]; then - echo "fail to find ascendplugin" - exit 1 - fi -} - -function mv_file() { - - mv ${TOP_DIR}/src/plugin/cmd/ascendplugin/${OUTPUT_NAME} ${TOP_DIR}/output - dos2unix ${TOP_DIR}/build/${DEPLOYNAME} - chmod 550 ${TOP_DIR}/build/${DEPLOYNAME} - cp ${TOP_DIR}/build/${DEPLOYNAME} ${TOP_DIR}/output - -} - -function copy2runpackage() { - mv ${TOP_DIR}/src/plugin/cmd/ascendplugin/${OUTPUT_NAME} ${TOP_DIR}/makerunout - cp ${TOP_DIR}/build/${DEPLOYNAME} ${TOP_DIR}/makerunout/ - if [ ! -d "${TOP_DIR}/makerunout/script" ]; then - mkdir -p ${TOP_DIR}/makerunout/script - chmod 750 ${TOP_DIR}/makerunout/script - fi - chmod 550 ${TOP_DIR}/build/script/uninstall.sh - cp ${TOP_DIR}/build/script/uninstall.sh ${TOP_DIR}/makerunout/script/ -} - -function zip_file(){ - cd ${TOP_DIR}/output - tar -zcvf ${TARNAME} ${OUTPUT_NAME} ${DEPLOYNAME} - if [ $? == 0 ]; then - echo "build device plugin success" - fi - rm -f ${OUTPUT_NAME} ${DEPLOYNAME} -} - -function make_run_package() { - chmod 550 ${CUR_DIR}/script/makepackgeinstall.sh - dos2unix ${CUR_DIR}/script/makepackgeinstall.sh - cp ${CUR_DIR}/script/makepackgeinstall.sh ${TOP_DIR}/makerunout - dirname="${ostype}-$(get_os_name)$(get_os_version)" - if [ ! -d "${TOP_DIR}/output/${dirname}" ]; then - mkdir -p "${TOP_DIR}/output/${dirname}" - chmod 750 ${TOP_DIR}/output/${dirname} - fi - if [ -d "${TOP_DIR}/tools/makeself-release-2.4.0" ]; then - rm -rf ${TOP_DIR}/tools/makeself-release-2.4.0 - fi - cd ${TOP_DIR}/tools || retrun - unzip makeself-release-2.4.0.zip - cd ${TOP_DIR}/tools/makeself-release-2.4.0 || return - cp makeself.sh ${CUR_DIR}/script - cp makeself-header.sh ${CUR_DIR}/script - cd ${CUR_DIR}/script || retrun - patch -p0 < mkselfmodify.patch - - ./makeself.sh --nomd5 --nocrc --header ./makeself-header.sh --help-header \ - ./help.info ../../makerunout "${PKGNAME}" ascendplugin ./makepackgeinstall.sh - mv ${PKGNAME} ${TOP_DIR}/output/${dirname} - rm -rf ${TOP_DIR}/makerunout - rm -f ${TOP_DIR}/output/${OUTPUT_NAME} ${TOP_DIR}/output/${DEPLOYNAME} -} -function build_docker_images() -{ - cd ${TOP_DIR} - docker rmi ${docker_images_name} - docker build -t ${docker_images_name} . - docker save ${docker_images_name} | gzip > ${docker_zip_name} - mv ${docker_zip_name} ./output/ -} - -function get_os_name() { - lsb_release -i | awk '{print $3}' | tr 'A-Z' 'a-z' -} - -function get_os_version() { - local os_name=$(get_os_name) - declare -A os_version=(["ubuntu"]="18.04" ["centos"]="7.6" ["euleros"]="2.8" ["debian"]="9.9") - for key in "${!os_version[@]}"; do - if [ $key == $os_name ]; then - echo "${os_version[$key]}" - return 0 - fi - done - exit 1 -} \ No newline at end of file diff --git a/build/build_in_docker.sh b/build/build_in_docker.sh deleted file mode 100644 index 1c9a7c3fb7a5b70e06dcf3cc345e5270a9660425..0000000000000000000000000000000000000000 --- a/build/build_in_docker.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -CUR_DIR=$(dirname $(readlink -f $0)) -TOP_DIR=$(realpath ${CUR_DIR}/..) - -OUTPUT_NAME="ascendplugin" - -function main() { - cp ${TOP_DIR}/output/${OUTPUT_NAME} /usr/local/bin/ -} -main diff --git a/build/deploy.sh b/other/deploy.sh similarity index 100% rename from build/deploy.sh rename to other/deploy.sh diff --git a/build/sample_check.sh b/other/sample_check.sh similarity index 100% rename from build/sample_check.sh rename to other/sample_check.sh diff --git a/src/plugin/config/config_310/ascend_device_plugin.pc b/src/plugin/config/config_310/ascend_device_plugin.pc deleted file mode 100644 index 8aa8402e377ba37cb17b666ba2847ce78c1886ff..0000000000000000000000000000000000000000 --- a/src/plugin/config/config_310/ascend_device_plugin.pc +++ /dev/null @@ -1,9 +0,0 @@ -#Package Information for pkg-config -prefix=/usr/local/Ascend -libdriver=${prefix}/driver/lib64 -includedir=${prefix}/driver/kernel/inc/driver/ -Name: ascend_docker_plugin -Description: Ascend device plugin -Version: 0.0.1 -Libs: -L${libdriver}/ -ldrvdsmi_host -Cflags: -I${includedir} diff --git a/src/plugin/config/config_910/ascend_device_plugin.pc b/src/plugin/config/config_910/ascend_device_plugin.pc deleted file mode 100644 index 6e8791c59747c610a01d52b27ea1e11db33b27d2..0000000000000000000000000000000000000000 --- a/src/plugin/config/config_910/ascend_device_plugin.pc +++ /dev/null @@ -1,9 +0,0 @@ -#Package Information for pkg-config -prefix=/usr/local/Ascend -libdriver=${prefix}/driver/lib64/driver -includedir=${prefix}/driver/kernel/inc/driver/ -Name: ascend_docker_plugin -Description: Ascend device plugin -Version: 0.0.1 -Libs: -L${libdriver}/ -ldrvdsmi_host -Cflags: -I${includedir} diff --git a/src/plugin/pkg/npu/huawei/constants.go b/src/plugin/pkg/npu/huawei/constants.go index 5375a4e51d3ebd904a4f596af0bec2783cc9815d..a8a7d1a1697aa5bcfb5adf5b5ae3c9c2ff2c9c4f 100644 --- a/src/plugin/pkg/npu/huawei/constants.go +++ b/src/plugin/pkg/npu/huawei/constants.go @@ -55,7 +55,7 @@ const ( fileMaxSize = 1000 // each log file size maxBackups = 20 // max backup maxAge = 28 // the log file last time - podDeviceKey = "atlas.kubectl.kubernetes.io/ascend-910-configuration" // config map name + podDeviceKey = "ascend.kubectl.kubernetes.io/ascend-910-configuration" // config map name ascendVisibleDevicesEnv = "ASCEND_VISIBLE_DEVICES" // visible env logChmod = 0640 diff --git a/src/plugin/pkg/npu/huawei/dsmi.go b/src/plugin/pkg/npu/huawei/dsmi.go index 357b4111c1cfc00241840b9e045be0aed6bd7fa7..79a41dd8284b96d63f5670586ffef86f41317aa6 100644 --- a/src/plugin/pkg/npu/huawei/dsmi.go +++ b/src/plugin/pkg/npu/huawei/dsmi.go @@ -16,8 +16,137 @@ package huawei -// #cgo pkg-config: ascend_device_plugin -// #include "dsmi_common_interface.h" +// #cgo LDFLAGS: -ldl +/* +#include +#include +#include + +#include "dsmi_common_interface.h" + +// dsmiHandle is the handle for dynamically loaded libdrvdsmi_host.so +void *dsmiHandle; +#define SO_NOT_FOUND -99999 +#define FUNCTION_NOT_FOUND -99998 +#define SUCCESS 0 +#define ERROR_UNKNOWN -99997 +#define CALL_FUNC(func_name,...) \ + if(func_name##_func == NULL){ \ + return FUNCTION_NOT_FOUND; \ + } \ + return func_name##_func(__VA_ARGS__); \ + +int (*dsmi_get_device_count_func)(int *device_count); +int dsmi_get_device_count(int *device_count){ + CALL_FUNC(dsmi_get_device_count,device_count) +} + +int (*dsmi_list_device_func)(int device_id_list[], int count); +int dsmi_list_device(int device_id_list[], int count){ + CALL_FUNC(dsmi_list_device,device_id_list,count) +} + +int (*dsmi_get_device_health_func)(int device_id, unsigned int *phealth); +int dsmi_get_device_health(int device_id, unsigned int *phealth){ + CALL_FUNC(dsmi_get_device_health,device_id,phealth) +} + +int (*dsmi_get_device_utilization_rate_func)(int device_id, int device_type, unsigned int *putilization_rate); +int dsmi_get_device_utilization_rate(int device_id,int device_type, unsigned int *putilization_rate){ + CALL_FUNC(dsmi_get_device_utilization_rate,device_id, device_type,putilization_rate) +} + +int (*dsmi_get_phyid_from_logicid_func)(unsigned int logicid, unsigned int *phyid); +int dsmi_get_phyid_from_logicid(unsigned int logicid, unsigned int *phyid){ + CALL_FUNC(dsmi_get_phyid_from_logicid,logicid,phyid) +} + +int (*dsmi_get_logicid_from_phyid_func)(unsigned int phyid, unsigned int *logicid); +int dsmi_get_logicid_from_phyid(unsigned int phyid, unsigned int *logicid){ + CALL_FUNC(dsmi_get_logicid_from_phyid,phyid,logicid) +} + +int (*dsmi_get_device_temperature_func)(int device_id, int *ptemperature); +int dsmi_get_device_temperature(int device_id, int *ptemperature){ + CALL_FUNC(dsmi_get_device_temperature,device_id,ptemperature) +} + +int (*dsmi_get_device_voltage_func)(int device_id, unsigned int *pvoltage); +int dsmi_get_device_voltage(int device_id, unsigned int *pvoltage){ + CALL_FUNC(dsmi_get_device_voltage,device_id,pvoltage) +} + +int (*dsmi_get_device_power_info_func)(int device_id, struct dsmi_power_info_stru *pdevice_power_info); +int dsmi_get_device_power_info(int device_id, struct dsmi_power_info_stru *pdevice_power_info){ + CALL_FUNC(dsmi_get_device_power_info,device_id,pdevice_power_info) +} + +int (*dsmi_get_device_frequency_func)(int device_id, int device_type,unsigned int *pfrequency); +int dsmi_get_device_frequency(int device_id, int device_type,unsigned int *pfrequency){ + CALL_FUNC(dsmi_get_device_frequency,device_id,device_type,pfrequency) +} + +int (*dsmi_get_hbm_info_func)(int device_id, struct dsmi_hbm_info_stru *pdevice_hbm_info); +int dsmi_get_hbm_info(int device_id, struct dsmi_hbm_info_stru *pdevice_hbm_info){ + CALL_FUNC(dsmi_get_hbm_info,device_id,pdevice_hbm_info) +} + +int (*dsmi_get_memory_info_func)(int device_id, struct dsmi_memory_info_stru *pdevice_memory_info); +int dsmi_get_memory_info(int device_id, struct dsmi_memory_info_stru *pdevice_memory_info){ + CALL_FUNC(dsmi_get_memory_info,device_id,pdevice_memory_info) +} + +int (*dsmi_get_device_errorcode_func)(int device_id, int *errorcount,unsigned int *perrorcode); +int dsmi_get_device_errorcode(int device_id, int *errorcount,unsigned int *perrorcode){ + CALL_FUNC(dsmi_get_device_errorcode,device_id,errorcount,perrorcode) +} + +int (*dsmi_get_chip_info_func)(int device_id, struct dsmi_chip_info_stru *chip_info); +int dsmi_get_chip_info(int device_id, struct dsmi_chip_info_stru *chip_info){ + CALL_FUNC(dsmi_get_chip_info,device_id,chip_info) +} + +int (*dsmi_get_device_ip_address_func)(int device_id, int port_type, int port_id, ip_addr_t *ip_address, ip_addr_t *mask_address); +int dsmi_get_device_ip_address(int device_id, int port_type, int port_id, ip_addr_t *ip_address, ip_addr_t *mask_address){ + CALL_FUNC(dsmi_get_device_ip_address,device_id,port_type,port_id,ip_address,mask_address) +} + +// load .so files and functions +int dsmiInit_dl(void){ + dsmiHandle = dlopen("libdrvdsmi_host.so",RTLD_LAZY); + if (dsmiHandle == NULL){ + dsmiHandle = dlopen("libdrvdsmi.so",RTLD_LAZY); + } + if (dsmiHandle == NULL){ + return SO_NOT_FOUND; + } + + dsmi_list_device_func = dlsym(dsmiHandle,"dsmi_list_device"); + + dsmi_get_device_count_func = dlsym(dsmiHandle,"dsmi_get_device_count"); + + dsmi_get_device_health_func = dlsym(dsmiHandle,"dsmi_get_device_health"); + + dsmi_get_phyid_from_logicid_func = dlsym(dsmiHandle,"dsmi_get_phyid_from_logicid"); + + dsmi_get_logicid_from_phyid_func = dlsym(dsmiHandle,"dsmi_get_logicid_from_phyid"); + + dsmi_get_device_errorcode_func = dlsym(dsmiHandle,"dsmi_get_device_errorcode"); + + dsmi_get_chip_info_func = dlsym(dsmiHandle,"dsmi_get_chip_info"); + + dsmi_get_device_ip_address_func = dlsym(dsmiHandle,"dsmi_get_device_ip_address"); + + return SUCCESS; +} + +int dsmiShutDown(void){ + if (dsmiHandle == NULL){ + return SUCCESS; + } + return (dlclose(dsmiHandle) ? ERROR_UNKNOWN : SUCCESS); +} +*/ import "C" import ( "fmt" @@ -43,7 +172,6 @@ type ChipInfo struct { // DeviceMgrInterface interface for dsmi type DeviceMgrInterface interface { - EnableContainerService() error GetDeviceCount() (int32, error) GetDeviceList(*[hiAIMaxDeviceNum]uint32) (int32, error) GetDeviceHealth(int32) (uint32, error) @@ -51,25 +179,21 @@ type DeviceMgrInterface interface { GetLogicID(uint32) (uint32, error) GetChipInfo(int32) (*ChipInfo, error) GetDeviceIP(logicID int32) (string, error) + ShutDown() } // DeviceManager struct definition type DeviceManager struct{} +func init() { + C.dsmiInit_dl() +} + // NewDeviceManager new DeviceManager instance func NewDeviceManager() *DeviceManager { return &DeviceManager{} } -// EnableContainerService enable container service -func (d *DeviceManager) EnableContainerService() error { - err := C.dsmi_enable_container_service() - if err != 0 { - return fmt.Errorf("enable container service faild , error code: %d", int32(err)) - } - return nil -} - // GetDeviceCount get ascend910 device quantity func (d *DeviceManager) GetDeviceCount() (int32, error) { var count C.int @@ -170,7 +294,11 @@ func convertToCharArr(charArr []rune, cgoArr [maxChipName]C.uchar) []rune { } // GetDeviceIP get deviceIP -func (d *DeviceManager) GetDeviceIP(logicID int32) (string, error) { +func (d *DeviceManager) GetDeviceIP(phyID int32) (string, error) { + logicID, err := d.GetLogicID(uint32(phyID)) + if err != nil { + return ERROR, fmt.Errorf("transfor phyID %d to logicID failed, error code : %v", phyID, err) + } var portType C.int = 1 var portID C.int var ipAddress [hiAIMaxDeviceNum]C.ip_addr_t @@ -178,10 +306,10 @@ func (d *DeviceManager) GetDeviceIP(logicID int32) (string, error) { var retIPAddress string var ipString [4]uint8 - err := C.dsmi_get_device_ip_address(C.int(logicID), portType, portID, &ipAddress[C.int(logicID)], + retCode := C.dsmi_get_device_ip_address(C.int(logicID), portType, portID, &ipAddress[C.int(logicID)], &maskAddress[C.int(logicID)]) - if err != 0 { - return ERROR, fmt.Errorf("getDevice IP address failed, error code: %d", int32(err)) + if retCode != 0 { + return ERROR, fmt.Errorf("getDevice IP address failed, error code: %d", int32(retCode)) } unionPara := ipAddress[C.int(logicID)].u_addr @@ -192,3 +320,8 @@ func (d *DeviceManager) GetDeviceIP(logicID int32) (string, error) { retIPAddress = fmt.Sprintf("%d.%d.%d.%d", ipString[0], ipString[1], ipString[2], ipString[3]) return retIPAddress, nil } + +// ShutDown clean the dynamically loaded resource +func (d *DeviceManager) ShutDown() { + C.dsmiShutDown() +} diff --git a/src/plugin/pkg/npu/huawei/dsmi_common_interface.h b/src/plugin/pkg/npu/huawei/dsmi_common_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..a1940f26fa297f2881563ffdc22b737ee6eabec9 --- /dev/null +++ b/src/plugin/pkg/npu/huawei/dsmi_common_interface.h @@ -0,0 +1,267 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2012-2019. All rights reserved. + * Description: + * Author: huawei + * Create: 2019-10-15 + */ +#ifndef __DSMI_COMMON_INTERFACE_H__ +#define __DSMI_COMMON_INTERFACE_H__ +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum rdfx_detect_result { + RDFX_DETECT_OK = 0, + RDFX_DETECT_SOCK_FAIL = 1, + RDFX_DETECT_RECV_TIMEOUT = 2, + RDFX_DETECT_UNREACH = 3, + RDFX_DETECT_TIME_EXCEEDED = 4, + RDFX_DETECT_FAULT = 5, + RDFX_DETECT_INIT = 6, + RDFX_DETECT_MAX +} DSMI_NET_HEALTH_STATUS; + +struct dsmi_power_info_stru { + unsigned short power; +}; +struct dsmi_memory_info_stru { + unsigned long memory_size; + unsigned int freq; + unsigned int utiliza; +}; + +struct dsmi_hbm_info_stru { + unsigned long memory_size; /**< HBM total size, KB */ + unsigned int freq; /**< HBM freq, MHZ */ + unsigned long memory_usage; /**< HBM memory_usage, KB */ + int temp; /**< HBM temperature */ + unsigned int bandwith_util_rate; +}; + +#define MAX_CHIP_NAME 32 +#define MAX_DEVICE_COUNT 64 + +struct dsmi_chip_info_stru { + unsigned char chip_type[MAX_CHIP_NAME]; + unsigned char chip_name[MAX_CHIP_NAME]; + unsigned char chip_ver[MAX_CHIP_NAME]; +}; + +#define DSMI_VNIC_PORT 0 +#define DSMI_ROCE_PORT 1 + +enum ip_addr_type { + IPADDR_TYPE_V4 = 0U, /**< IPv4 */ + IPADDR_TYPE_V6 = 1U, /**< IPv6 */ + IPADDR_TYPE_ANY = 2U +}; + +#define DSMI_ARRAY_IPV4_NUM 4 +#define DSMI_ARRAY_IPV6_NUM 16 + +typedef struct ip_addr { + union { + unsigned char ip6[DSMI_ARRAY_IPV6_NUM]; + unsigned char ip4[DSMI_ARRAY_IPV4_NUM]; + } u_addr; + enum ip_addr_type ip_type; +} ip_addr_t; + +/** +* @ingroup driver +* @brief Get the number of devices +* @attention NULL +* @param [out] device_count The space requested by the user is used to store the number of returned devices +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_count(int *device_count); + +/** +* @ingroup driver +* @brief Get the id of all devices +* @attention NULL +* @param [out] device_id_list[] The space requested by the user is used to store the id of all returned devices +* @param [in] count Number of equipment +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_list_device(int device_id_list[], int count); + + + +/** +* @ingroup driver +* @brief Convert the logical ID of the device to a physical ID +* @attention NULL +* @param [in] logicid logic id +* @param [out] phyid physic id +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_phyid_from_logicid(unsigned int logicid, unsigned int *phyid); + +/** +* @ingroup driver +* @brief Convert the physical ID of the device to a logical ID +* @attention NULL +* @param [in] phyid physical id +* @param [out] logicid logic id +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_logicid_from_phyid(unsigned int phyid, unsigned int *logicid); + +/** +* @ingroup driver +* @brief Query the overall health status of the device, support AI Server +* @attention NULL +* @param [in] device_id The device id +* @param [out] phealth The pointer of the overall health status of the device only represents this component, + and does not include other components that have a logical relationship with this component. +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_health(int device_id, unsigned int *phealth); + +/** +* @ingroup driver +* @brief Query device fault code +* @attention NULL +* @param [in] device_id The device id. +* @param [out] errorcount Number of error codes, count:0~128 +* @param [out] perrorcode error codes +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_errorcode(int device_id, int *errorcount, unsigned int *perrorcode); + +/** +* @ingroup driver +* @brief Query the temperature of the ICE SOC of Ascend AI processor +* @attention NULL +* @param [in] device_id The device id +* @param [out] ptemperature The temperature of the HiSilicon SOC of the Shengteng AI processor: unit Celsius, + the accuracy is 1 degree Celsius, and the decimal point is rounded. 16-bit signed type, + little endian. The value returned by the device is the actual temperature. +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_temperature(int device_id, int *ptemperature); + +/** +* @ingroup driver +* @brief Query device power consumption +* @attention NULL +* @param [in] device_id The device id +* @param [out] pdevice_power_info Device power consumption: unit is W, accuracy is 0.1W. 16-bit unsigned short type, + little endian +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_power_info(int device_id, struct dsmi_power_info_stru *pdevice_power_info); + + +/** +* @ingroup driver +* @brief Query the voltage of Sheng AI SOC of ascend AI processor +* @attention NULL +* @param [in] device_id The device id +* @param [out] pvoltage The voltage of the HiSilicon SOC of the Shengteng AI processor: the unit is V, + and the accuracy is 0.01V +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_voltage(int device_id, unsigned int *pvoltage); + +/** +* @ingroup driver +* @brief Get the occupancy rate of the HiSilicon SOC of the Ascension AI processor +* @attention NULL +* @param [in] device_id The device id +* @param [in] device_type device_type +* @param [out] putilization_rate Utilization rate of HiSilicon SOC of ascend AI processor, unit:% +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_utilization_rate(int device_id, int device_type, unsigned int *putilization_rate); + +/** +* @ingroup driver +* @brief Get the frequency of the HiSilicon SOC of the Ascension AI processor +* @attention NULL +* @param [in] device_id The device id +* @param [in] device_type device_type +* @param [out] pfrequency Frequency, unit MHZ +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_frequency(int device_id, int device_type, unsigned int *pfrequency); + +/** +* @ingroup driver +* @brief Get memory information +* @attention NULL +* @param [in] device_id The device id +* @param [out] pdevice_memory_info Return memory information +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_memory_info(int device_id, struct dsmi_memory_info_stru *pdevice_memory_info); + + +/** +* @ingroup driver +* @brief get the ip address and mask address. +* @attention NULL +* @param [in] device_id The device id +* @param [in] port_type Specify the network port type +* @param [in] port_id Specify the network port number, reserved field +* @param [out] ip_address return ip address info +* @param [out] mask_address return mask address info +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_device_ip_address(int device_id, int port_type, int port_id, ip_addr_t *ip_address, + ip_addr_t *mask_address); + +/** +* @ingroup driver +* @brief Relevant information about the HiSilicon SOC of the AI ??processor, including chip_type, chip_name, + chip_ver version number +* @attention NULL +* @param [in] device_id The device id +* @param [out] chip_info Get the relevant information of ascend AI processor Hisilicon SOC +* @return 0 for success, others for fail +* @note Support:Ascend310,Ascend910 +*/ +int dsmi_get_chip_info(int device_id, struct dsmi_chip_info_stru *chip_info); + + +/** +* @ingroup driver +* @brief Query the frequency, capacity and utilization information of hbm +* @attention NULL +* @param [in] device_id The device id +* @param [out] pdevice_hbm_info return hbm infomation +* @return 0 for success, others for fail +* @note Support:Ascend910 +*/ +int dsmi_get_hbm_info(int device_id, struct dsmi_hbm_info_stru *pdevice_hbm_info); + + +/** +* @ingroup driver +* @brief Query the connectivity status of the RoCE network card's IP address +* @attention NULL +* @param [in] device_id The device id +* @param [out] presult return the result wants to query +* @return 0 for success, others for fail +* @note Support:Ascend910 +*/ +int dsmi_get_network_health(int device_id, DSMI_NET_HEALTH_STATUS *presult); + +#ifdef __cplusplus +} +#endif +#endif \ No newline at end of file diff --git a/src/plugin/pkg/npu/huawei/manager.go b/src/plugin/pkg/npu/huawei/manager.go index 4247090d912f301e946ac6875719ea2a010f7571..54db686a45f7836a4728ebd636522039debc0b1d 100644 --- a/src/plugin/pkg/npu/huawei/manager.go +++ b/src/plugin/pkg/npu/huawei/manager.go @@ -78,13 +78,7 @@ func NewHwDevManager(mode, dlogPath string) *HwDevManager { // GetNPUs get npu types func (hdm *HwDevManager) GetNPUs() error { - // start dsmi in contaioner - err := hdm.dmgr.EnableContainerService() - if err != nil { - logger.Error("enable container Service failed. error", zap.String("error", err.Error())) - } - - err = hdm.setRunMode() + err := hdm.setRunMode() if err != nil { logger.Error("err to set Run mode ", zap.Error(err)) return err @@ -205,6 +199,7 @@ func (hdm *HwDevManager) signalWatch(watcher *fsnotify.Watcher, sigs chan os.Sig default: logger.Info("Received signal, shutting down.", zap.String("signal", s.String())) hps.Stop() + hdm.dmgr.ShutDown() os.Exit(0) } } @@ -223,7 +218,7 @@ func (hdm *HwDevManager) setRunMode() error { return nil } devNum, err := hdm.dmgr.GetDeviceCount() - if err != nil && devNum == 0 { + if err != nil || devNum == 0 { return err } chipinfo, err := hdm.dmgr.GetChipInfo(0)

版本

发布日期

+

发布日期

修改说明

+

修改说明

V20.1.0

+

v20.2.0

+

2021-01-08

+

优化“创建DaemonSet”描述。

+

v20.2.0

2020-09-30

+

2020-11-18

第一次正式发布。

+

第一次正式发布。