[关闭]
@yangwenbo 2023-02-10T17:29:54.000000Z 字数 11351 阅读 237

清华大学-FIB实验室

多版本cuda环境管理

1、anaconda安装

1.1 下载 Anaconda 脚本

  1. #wget下载 Anaconda 安装脚本
  2. root@bc23574385ad:~# wget -P ./ https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-2020.02-Linux-x86_64.sh
  3. #检查包的完整性
  4. root@bc23574385ad:~# sha256sum Anaconda3-2020.02-Linux-x86_64.sh
  5. 2b9f088b2022edb474915d9f69a803d6449d5fdb4c303041f60ac4aefcc208bb Anaconda3-2020.02-Linux-x86_64.sh

1.2 安装Anaconda

  1. #运行脚本启动安装进程
  2. root@bc23574385ad:~# bash Anaconda3-2020.02-Linux-x86_64.sh
  3. #你应该能看到下面的输出:
  4. Welcome to Anaconda3 2020.02
  5. In order to continue the installation process, please review the license
  6. agreement.
  7. Please, press ENTER to continue
  8. >>>
  9. #按ENTER继续。往下滑动阅读协议,使用ENTER按键。一旦你看完协议,你将会被询问是否接受协议条款:
  10. Do you accept the license terms? [yes|no]
  11. [no] >>> yes
  12. 输入yes接受协议,并且你会被提示选择安装路径:
  13. Anaconda3 will now be installed into this location:
  14. /root/anaconda3 #这里是默认路径
  15. - Press ENTER to confirm the location
  16. - Press CTRL-C to abort the installation
  17. - Or specify a different location below
  18. [/root/anaconda3] >>> /usr/local/anaconda3 #可以在这里自定义新的的安装路径
  19. #安装过程将会花费一些时间,并且一旦完成,脚本将会问你是否想要运行conda init。输入yes。
  20. installation finished.
  21. Do you wish the installer to initialize Anaconda3
  22. by running conda init? [yes|no]
  23. [no] >>> yes
  24. #这将会将命令行工具conda添加到系统的PATH环境变量中。
  1. #想要激活 Anaconda,你可以关闭并且重新打开你的 shell 或者在当前 shell 会话中输入下面的命令,来重新加载PATH环境变量:
  2. root@bc23574385ad:~# source ~/.bashrc
  3. #查看安装的版本
  4. (base) root@bc23574385ad:~# conda -V
  5. conda 4.8.2

1.3 添加清华源

  1. #添加清华源
  2. (base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
  3. (base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
  4. (base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
  5. (base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/
  6. (base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r/
  7. (base) root@bc23574385ad:~# conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/pro/

2、完善python-3.7环境

名称 版本
cuda 10.2
cudnn 7.6
pytorch 1.6
tensorflow 2.3.0

2.1 创建python-3.7环境

  1. #创建python-3.7环境
  2. (base) root@bc23574385ad:~# conda create --name python-3.7 python=3.7
  3. (base) root@bc23574385ad:~# conda activate python-3.7
  1. #安装常用命令
  2. (python-3.7) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ipython pandas pillow matplotlib setproctitle networkx scikit-learn scipy tqdm GPUtil jupyterlab notebook h5py statsmodels

2.2 安装CUDA

  1. #下载cuda包
  2. (python-3.7) root@bc23574385ad:~# wget https://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run
  3. #安装cuda
  4. (python-3.7) root@bc23574385ad:~# sh cuda_10.2.89_440.33.01_linux.run --silent --toolkit --samples --librarypath=/usr/local/cuda-10.2
  1. #设置软链接
  2. (python-3.7) root@bc23574385ad:~# ln -s /usr/local/cuda-10.2/bin/nvcc /usr/bin/nvcc-python-3.7
  3. (python-3.7) root@bc23574385ad:~# which nvcc-python-3.7
  4. /usr/bin/nvcc-python-3.7
  5. #检查安装的版本
  6. (python-3.7) root@bc23574385ad:~# nvcc-python-3.7 -V
  7. nvcc-python-3: NVIDIA (R) Cuda compiler driver
  8. Copyright (c) 2005-2019 NVIDIA Corporation
  9. Built on Wed_Oct_23_19:24:38_PDT_2019
  10. Cuda compilation tools, release 10.2, V10.2.89
  1. #测试 CUDA Toolkit 以验证是否安装成功
  2. #Result = PASS则安装成功
  3. (python-3.7) root@bc23574385ad:~# cd /usr/local/cuda-10.2/extras/demo_suite/
  4. (python-3.7) root@bc23574385ad:/usr/local/cuda-10.2/extras/demo_suite# ./deviceQuery
  5. ......
  6. deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 11.6, CUDA Runtime Version = 10.2, NumDevs = 4, Device0 = NVIDIA GeForce RTX 2080 Ti, Device1 = NVIDIA GeForce RTX 2080 Ti, Device2 = NVIDIA GeForce RTX 2080 Ti, Device3 = NVIDIA GeForce RTX 2080 Ti
  7. Result = PASS

2.3 安装CUDNN

  1. #下载cudnn包
  2. (python-3.7) root@bc23574385ad:~# ll -d cudnn-10.2-linux-x64-v7.6.5.32.tgz
  3. -rw-r--r-- 1 root root 548210361 Mar 31 13:53 cudnn-10.2-linux-x64-v7.6.5.32.tgz
  4. #解压缩
  5. (python-3.7) root@bc23574385ad:~# tar xf cudnn-10.2-linux-x64-v7.6.5.32.tgz
  6. (python-3.7) root@bc23574385ad:~# ll -d cuda
  7. drwxr-xr-x 4 root root 4096 Mar 31 14:03 cuda/
  1. #把相应的文件,复制到指定目录即可
  2. (python-3.7) root@bc23574385ad:~# cp cuda/include/cudnn* /usr/local/cuda-10.2/include/
  3. (python-3.7) root@bc23574385ad:~# cp cuda/lib64/libcudnn* /usr/local/cuda-10.2/lib64/
  4. #添加权限
  5. (python-3.7) root@bc23574385ad:~# chmod a+r /usr/local/cuda-10.2/include/cudnn* /usr/local/cuda-10.2/lib64/libcudnn*

2.4 安装tensorflow

  1. #安装依赖包
  2. (python-3.7) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --user pytest-cov==2.0 pytest-filter-subpackage==0.1
  3. #安装指定版本的tensorflow
  4. (python-3.7) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow==2.3.0
  1. #验证tensorflow是否安装成功
  2. (python-3.7) root@bc23574385ad:~# pip show tensorflow
  3. Name: tensorflow
  4. Version: 2.3.0
  5. Summary: TensorFlow is an open source machine learning framework for everyone.
  6. Home-page: https://www.tensorflow.org/
  7. Author: Google Inc.
  8. Author-email: packages@tensorflow.org
  9. License: Apache 2.0
  10. Location: /usr/local/anaconda3/envs/python-3.7/lib/python3.7/site-packages
  11. Requires: absl-py, astunparse, gast, google-pasta, grpcio, h5py, keras-preprocessing, numpy, opt-einsum, protobuf, scipy, six, tensorboard, tensorflow-estimator, termcolor, wheel, wrapt
  12. Required-by:

2.5 安装pytorch

  1. #下载对应cuda版本编译的安装包
  2. (python-3.7) root@bc23574385ad:~# wget -P ./ https://download.pytorch.org/whl/cu102/torch-1.6.0-cp37-cp37m-linux_x86_64.whl
  3. #安装下载好的安装包
  4. (python-3.7) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple torch-1.6.0-cp37-cp37m-linux_x86_64.whl
  1. #验证pytorch是否安装成功,这里可以写一个小脚本验证下,如没有报错就是安装成功了
  2. (python-3.7) root@bc23574385ad:~# vim pytorch.py
  3. (python-3.7) root@bc23574385ad:~# cat pytorch.py
  4. from __future__ import print_function
  5. import torch
  6. x = torch.rand(5, 3)
  7. print(x)
  8. (python-3.7) root@bc23574385ad:~# python3.7 pytorch.py
  9. tensor([[0.6295, 0.4860, 0.4348],
  10. [0.2331, 0.1373, 0.6409],
  11. [0.8252, 0.2289, 0.3068],
  12. [0.2569, 0.0396, 0.2084],
  13. [0.3917, 0.4409, 0.2219]])

2.6 多版本cuda环境管理

  1. #进入环境生效脚本
  2. (python-3.7) root@bc23574385ad:~# mkdir -p ~/anaconda3/envs/python-3.7/etc/conda/activate.d
  3. (python-3.7) root@bc23574385ad:~# vim ~/anaconda3/envs/python-3.7/etc/conda/activate.d/activate.sh
  4. (python-3.7) root@bc23574385ad:~# cat ~/anaconda3/envs/python-3.7/etc/conda/activate.d/activate.sh
  5. #!/bin/sh
  6. ORIGINAL_LD_LIBRARY_PATH=$LD_LIBRARY_PATH
  7. export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64:/usr/local/cuda-10.2/extras/CUPTI/lib64:/lib/nccl/cuda-10.2:$LD_LIBRARY_PATH
  8. (python-3.7) root@bc23574385ad:~# chmod +x ~/anaconda3/envs/python-3.7/etc/conda/activate.d/activate.sh
  1. #退出环境生效脚本
  2. (python-3.7) root@bc23574385ad:~# mkdir -p ~/anaconda3/envs/python-3.7/etc/conda/deactivate.d
  3. (python-3.7) root@bc23574385ad:~# vim ~/anaconda3/envs/python-3.7/etc/conda/deactivate.d/deactivate.sh
  4. (python-3.7) root@bc23574385ad:~# cat ~/anaconda3/envs/python-3.7/etc/conda/deactivate.d/deactivate.sh
  5. #!/bin/sh
  6. export LD_LIBRARY_PATH=$ORIGINAL_LD_LIBRARY_PATH
  7. unset ORIGINAL_LD_LIBRARY_PATH
  8. (python-3.7) root@bc23574385ad:~# chmod +x ~/anaconda3/envs/python-3.7/etc/conda/deactivate.d/deactivate.sh

3、完善python-3.8环境

名称 版本
cuda 11.0
cudnn 8.0
pytorch 1.7
tensorflow 2.4.0

3.1 创建python-3.8环境

  1. #创建python-3.8环境
  2. (python-3.7) root@bc23574385ad:~# conda create --name python-3.8 python=3.8
  3. (python-3.7) root@bc23574385ad:~# conda activate python-3.8
  1. #安装常用命令
  2. (python-3.8) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ipython pandas pillow matplotlib setproctitle networkx scikit-learn scipy tqdm GPUtil jupyterlab notebook h5py statsmodels

3.2 安装CUDA

  1. #下载cuda包
  2. (python-3.8) root@bc23574385ad:~# wget http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda_11.0.2_450.51.05_linux.run
  3. #安装cuda
  4. (python-3.8) root@bc23574385ad:~# sh cuda_11.0.2_450.51.05_linux.run --silent --toolkit --samples --librarypath=/usr/local/cuda-11.0
  1. #设置软链接
  2. (python-3.8) root@bc23574385ad:~# ln -s /usr/local/cuda-11.0/bin/nvcc /usr/bin/nvcc-python-3.8
  3. (python-3.8) root@bc23574385ad:~# which nvcc-python-3.8
  4. /usr/bin/nvcc-python-3.8
  5. #检查安装的版本
  6. (python-3.8) root@bc23574385ad:~# nvcc-python-3.8 -V
  7. nvcc-python-3: NVIDIA (R) Cuda compiler driver
  8. Copyright (c) 2005-2020 NVIDIA Corporation
  9. Built on Thu_Jun_11_22:26:38_PDT_2020
  10. Cuda compilation tools, release 11.0, V11.0.194
  11. Build cuda_11.0_bu.TC445_37.28540450_0#
  1. 测试 CUDA Toolkit 以验证是否安装成功
  2. #Result = PASS则安装成功
  3. (python-3.8) root@bc23574385ad:~# cd /usr/local/cuda-11.0/extras/demo_suite/
  4. (python-3.8) root@bc23574385ad:/usr/local/cuda-11.0/extras/demo_suite# ./deviceQuery
  5. ......
  6. deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 11.6, CUDA Runtime Version = 11.0, NumDevs = 4, Device0 = NVIDIA GeForce RTX 2080 Ti, Device1 = NVIDIA GeForce RTX 2080 Ti, Device2 = NVIDIA GeForce RTX 2080 Ti, Device3 = NVIDIA GeForce RTX 2080 Ti
  7. Result = PASS

3.3 安装CUDNN

  1. #下载cudnn包
  2. (python-3.8) root@bc23574385ad:~# ll -d cudnn-11.0-linux-x64-v8.0.1.13.tgz
  3. -rw-r--r-- 1 root root 1142456047 Mar 31 14:30 cudnn-11.0-linux-x64-v8.0.1.13.tgz
  4. #解压缩
  5. (python-3.8) root@bc23574385ad:~# tar xf cudnn-11.0-linux-x64-v8.0.1.13.tgz
  6. (python-3.8) root@bc23574385ad:~# ll -d cuda
  7. drwxr-xr-x 4 root root 4096 Mar 31 14:38 cuda/
  1. #把相应的文件,复制到指定目录即可
  2. (python-3.8) root@bc23574385ad:~# cp cuda/include/cudnn* /usr/local/cuda-11.0/include/
  3. (python-3.8) root@bc23574385ad:~# cp cuda/lib64/libcudnn* /usr/local/cuda-11.0/lib64/
  4. #添加权限
  5. (python-3.8) root@bc23574385ad:~# chmod a+r /usr/local/cuda-11.0/include/cudnn* /usr/local/cuda-11.0/lib64/libcudnn*

3.4 安装tensorflow

  1. #安装指定版本的tensorflow
  2. (python-3.8) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow==2.4.0
  1. #验证tensorflow是否安装成功
  2. (python-3.8) root@bc23574385ad:~# pip show tensorflow
  3. Name: tensorflow
  4. Version: 2.4.0
  5. Summary: TensorFlow is an open source machine learning framework for everyone.
  6. Home-page: https://www.tensorflow.org/
  7. Author: Google Inc.
  8. Author-email: packages@tensorflow.org
  9. License: Apache 2.0
  10. Location: /usr/local/anaconda3/envs/python-3.8/lib/python3.8/site-packages
  11. Requires: absl-py, astunparse, flatbuffers, gast, google-pasta, grpcio, h5py, keras-preprocessing, numpy, opt-einsum, protobuf, six, tensorboard, tensorflow-estimator, termcolor, typing-extensions, wheel, wrapt
  12. Required-by:

3.5 安装pytorch

  1. #下载对应cuda版本编译的安装包
  2. (python-3.8) root@bc23574385ad:~# wget -P ./ https://download.pytorch.org/whl/cu110/torch-1.7.1%2Bcu110-cp38-cp38-linux_x86_64.whl
  3. #安装下载好的安装包
  4. (python-3.8) root@bc23574385ad:~# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple torch-1.7.1+cu110-cp38-cp38-linux_x86_64.whl
  1. #验证pytorch是否安装成功,这里可以写一个小脚本验证下,如没有报错就是安装成功了
  2. (python-3.8) root@bc23574385ad:~# vim pytorch.py
  3. (python-3.8) root@bc23574385ad:~# cat pytorch.py
  4. from __future__ import print_function
  5. import torch
  6. x = torch.rand(5, 3)
  7. print(x)
  8. (python-3.8) root@bc23574385ad:~# python3.8 pytorch.py
  9. tensor([[0.4627, 0.5238, 0.0711],
  10. [0.2442, 0.7200, 0.0021],
  11. [0.3826, 0.1364, 0.1059],
  12. [0.2161, 0.9110, 0.2768],
  13. [0.1932, 0.7716, 0.2172]])

3.6 多版本cuda环境管理

  1. #进入环境生效脚本
  2. (python-3.8) root@bc23574385ad:~# mkdir -p ~/anaconda3/envs/python-3.8/etc/conda/activate.d
  3. (python-3.8) root@bc23574385ad:~# vim ~/anaconda3/envs/python-3.8/etc/conda/activate.d/activate.sh
  4. (python-3.8) root@bc23574385ad:~# cat ~/anaconda3/envs/python-3.8/etc/conda/activate.d/activate.sh
  5. #!/bin/sh
  6. ORIGINAL_LD_LIBRARY_PATH=$LD_LIBRARY_PATH
  7. export LD_LIBRARY_PATH=/usr/local/cuda-11.0/lib64:/usr/local/cuda-11.0/extras/CUPTI/lib64:/lib/nccl/cuda-11.0:$LD_LIBRARY_PATH
  8. (python-3.8) root@bc23574385ad:~# chmod +x ~/anaconda3/envs/python-3.8/etc/conda/activate.d/activate.sh
  1. #退出环境生效脚本
  2. (python-3.8) root@bc23574385ad:~# mkdir -p ~/anaconda3/envs/python-3.8/etc/conda/deactivate.d
  3. (python-3.8) root@bc23574385ad:~# vim ~/anaconda3/envs/python-3.8/etc/conda/deactivate.d/deactivate.sh
  4. (python-3.8) root@bc23574385ad:~# cat ~/anaconda3/envs/python-3.8/etc/conda/deactivate.d/deactivate.sh
  5. #!/bin/sh
  6. export LD_LIBRARY_PATH=$ORIGINAL_LD_LIBRARY_PATH
  7. unset ORIGINAL_LD_LIBRARY_PATH
  8. (python-3.8) root@bc23574385ad:~# chmod +x ~/anaconda3/envs/python-3.8/etc/conda/deactivate.d/deactivate.sh
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注