[关闭]
@CrazyHenry 2018-04-19T20:27:53.000000Z 字数 6896 阅读 3354

faiss教程跟进--Makefile

hhhhfaiss


官方教程:https://github.com/facebookresearch/faiss/wiki

编译所有教程

  1. 在目录:
  2. /home/users/yingmin.li/proj/faiss_proj/faiss/tutorial/cpp
  3. 下执行:
  4. make all #即5个都编译
  5. make cpu #编译1、2、3
  6. make gpu #编译4、5
  7. make clean #删除二进制文件
  8. 只编译其中一个,比如编译4-GPU
  9. make 4-GPU #注意没有.cpp

看懂了80%的Makefile:

  1. MAKEFILE_INC=../../makefile.inc
  2. -include $(MAKEFILE_INC)
  3. NVCCLDFLAGS = -Xcompiler \"-Wl,-rpath=../../:../../gpu/\" \
  4. -L../.. -L../../gpu -lfaiss -lgpufaiss
  5. LDFLAGS = -L../.. -Wl,-rpath=../.. -lfaiss
  6. #在此添加新的程序
  7. all: cpu gpu #make all 指令所执行的范围
  8. cpu: 1-Flat 2-IVFFlat 3-IVFPQ #观察all: cpu gpu
  9. gpu: 4-GPU 5-Multiple-GPUs #观察all: cpu gpu
  10. 1-Flat: 1-Flat.cpp ../../libfaiss.$(SHAREDEXT)
  11. $(CXX) -o $@ $(CXXFLAGS) $< -I../../../ $(LDFLAGS)
  12. 2-IVFFlat: 2-IVFFlat.cpp ../../libfaiss.$(SHAREDEXT)
  13. $(CXX) -o $@ $(CXXFLAGS) $< -I../../../ $(LDFLAGS)
  14. 3-IVFPQ: 3-IVFPQ.cpp ../../libfaiss.$(SHAREDEXT)
  15. $(CXX) -o $@ $(CXXFLAGS) $< -I../../../ $(LDFLAGS)
  16. #GPU版本的写法同样可以编译CPU的程序
  17. 4-GPU: 4-GPU.cpp ../../libfaiss.$(SHAREDEXT) ../../gpu/libgpufaiss.$(SHAREDEXT)
  18. $(NVCC) $(NVCCFLAGS) -o $@ $< $(NVCCLDFLAGS) -I../../../
  19. 5-Multiple-GPUs: 5-Multiple-GPUs.cpp ../../libfaiss.$(SHAREDEXT) \
  20. ../../gpu/libgpufaiss.$(SHAREDEXT)
  21. $(NVCC) $(NVCCFLAGS) -o $@ $< $(NVCCLDFLAGS) -I../../../
  22. #在此添加新的程序
  23. ../../libfaiss.$(SHAREDEXT):
  24. cd ../../ && make libfaiss.$(SHAREDEXT)
  25. ../../gpu/libgpufaiss.$(SHAREDEXT):
  26. cd ../../gpu/ && make libgpufaiss.$(SHAREDEXT)
  27. clean:
  28. rm -f 1-Flat 2-IVFFlat 3-IVFPQ 4-GPU 5-Multiple-GPUs #删除所有可执行程序

看一下makefile.inc

  1. # Copyright (c) 2015-present, Facebook, Inc.
  2. # All rights reserved.
  3. #
  4. # This source code is licensed under the BSD+Patents license found in the
  5. # LICENSE file in the root directory of this source tree.
  6. # -*- makefile -*-
  7. # tested on CentOS 7, Ubuntu 16 and Ubuntu 14, see below to adjust flags to distribution.
  8. CC=gcc
  9. CXX=g++ #CXX原来是g++
  10. CFLAGS=-fPIC -m64 -Wall -g -O3 -mavx -msse4 -mpopcnt -fopenmp -Wno-sign-compare -fopenmp
  11. CXXFLAGS=$(CFLAGS) -std=c++11 #使用c++11
  12. LDFLAGS=-g -fPIC -fopenmp
  13. # common linux flags
  14. SHAREDEXT=so #这里就是一个so
  15. SHAREDFLAGS=-shared
  16. FAISSSHAREDFLAGS=-shared
  17. ##########################################################################
  18. # Uncomment one of the 4 BLAS/Lapack implementation options
  19. # below. They are sorted # from fastest to slowest (in our
  20. # experiments).
  21. ##########################################################################
  22. #
  23. # 1. Intel MKL
  24. #
  25. # This is the fastest BLAS implementation we tested. Unfortunately it
  26. # is not open-source and determining the correct linking flags is a
  27. # nightmare. See
  28. #
  29. # https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor
  30. #
  31. # The latest tested version is MLK 2017.0.098 (2017 Initial Release) and can
  32. # be downloaded here:
  33. #
  34. # https://registrationcenter.intel.com/en/forms/?productid=2558&licensetype=2
  35. #
  36. # The following settings are working if MLK is installed on its default folder:
  37. #
  38. # MKLROOT=/opt/intel/compilers_and_libraries/linux/mkl/
  39. #
  40. # BLASLDFLAGS=-Wl,--no-as-needed -L$(MKLROOT)/lib/intel64 -lmkl_intel_ilp64 \
  41. # -lmkl_core -lmkl_gnu_thread -ldl -lpthread
  42. #
  43. # BLASCFLAGS=-DFINTEGER=long
  44. #
  45. # you may have to set the LD_LIBRARY_PATH=$MKLROOT/lib/intel64 at runtime.
  46. # If at runtime you get the error:
  47. # Intel MKL FATAL ERROR: Cannot load libmkl_avx2.so or libmkl_def.so.
  48. # You may add set
  49. # LD_PRELOAD=$MKLROOT/lib/intel64/libmkl_core.so:$MKLROOT/lib/intel64/libmkl_sequential.so
  50. # at runtime as well.
  51. # 默认使用OpenBLAS,比MKL慢30%
  52. # 2. Openblas
  53. #
  54. # The library contains both BLAS and Lapack. About 30% slower than MKL. Please see
  55. # https://github.com/facebookresearch/faiss/wiki/Troubleshooting#slow-brute-force-search-with-openblas
  56. # to fix performance problemes with OpenBLAS
  57. BLASCFLAGS=-DFINTEGER=int
  58. # This is for Centos: #默认使用centos
  59. BLASLDFLAGS?=/usr/lib64/libopenblas.so.0
  60. # for Ubuntu 16:
  61. # sudo apt-get install libopenblas-dev python-numpy python-dev
  62. # BLASLDFLAGS?=/usr/lib/libopenblas.so.0
  63. # for Ubuntu 14:
  64. # sudo apt-get install libopenblas-dev liblapack3 python-numpy python-dev
  65. # BLASLDFLAGS?=/usr/lib/libopenblas.so.0 /usr/lib/lapack/liblapack.so.3.0
  66. #
  67. # 3. Atlas
  68. #
  69. # Automatically tuned linear algebra package. As the name indicates,
  70. # it is tuned automatically for a give architecture, and in Linux
  71. # distributions, it the architecture is typically indicated by the
  72. # directory name, eg. atlas-sse3 = optimized for SSE3 architecture.
  73. #
  74. # BLASCFLAGS=-DFINTEGER=int
  75. # BLASLDFLAGS=/usr/lib64/atlas-sse3/libptf77blas.so.3 /usr/lib64/atlas-sse3/liblapack.so
  76. #
  77. # 4. reference implementation
  78. #
  79. # This is just a compiled version of the reference BLAS
  80. # implementation, that is not optimized at all.
  81. #
  82. # BLASCFLAGS=-DFINTEGER=int
  83. # BLASLDFLAGS=/usr/lib64/libblas.so.3 /usr/lib64/liblapack.so.3.2
  84. #
  85. ##########################################################################
  86. # SWIG and Python flags #对于我没啥用
  87. ##########################################################################
  88. # SWIG executable. This should be at least version 3.x
  89. SWIGEXEC=swig
  90. # The Python include directories for a given python executable can
  91. # typically be found with
  92. #
  93. # python -c "import distutils.sysconfig; print distutils.sysconfig.get_python_inc()"
  94. # python -c "import numpy ; print numpy.get_include()"
  95. #
  96. # or, for Python 3, with
  97. #
  98. # python3 -c "import distutils.sysconfig; print(distutils.sysconfig.get_python_inc())"
  99. # python3 -c "import numpy ; print(numpy.get_include())"
  100. #
  101. PYTHONCFLAGS=-I/usr/include/python2.7/ -I/usr/lib64/python2.7/site-packages/numpy/core/include/
  102. ###########################################################################
  103. # Cuda GPU flags
  104. ###########################################################################
  105. # root of the cuda 8 installation
  106. CUDAROOT=/usr/local/cuda-8.0/ #使用cuda-8.0
  107. CUDACFLAGS=-I$(CUDAROOT)/include
  108. NVCC=$(CUDAROOT)/bin/nvcc
  109. NVCCFLAGS= $(CUDAFLAGS) \
  110. -I $(CUDAROOT)/targets/x86_64-linux/include/ \
  111. -Xcompiler -fPIC \
  112. -Xcudafe --diag_suppress=unrecognized_attribute \
  113. -gencode arch=compute_35,code="compute_35" \ #可以删除此行,因为我们的显卡很强
  114. -gencode arch=compute_52,code="compute_52" \
  115. -gencode arch=compute_60,code="compute_60" \
  116. --std c++11 -lineinfo \
  117. -ccbin $(CXX) -DFAISS_USE_FLOAT16
  118. # GeForce GTX TITAN X 5.2是公司显卡的型号
  119. # 在Makefile里写$(NVCC) $(NVCCFLAGS) -o $@ $< $(NVCCLDFLAGS) -I../../../
  120. # 等价于 $($(CUDAROOT)/bin/nvcc) $($(CUDAFLAGS) \
  121. # -I $(CUDAROOT)/targets/x86_64-linux/include/ \
  122. # -Xcompiler -fPIC \
  123. # -Xcudafe --diag_suppress=unrecognized_attribute \
  124. # -gencode arch=compute_35,code="compute_35" \
  125. # -gencode arch=compute_52,code="compute_52" \
  126. # -gencode arch=compute_60,code="compute_60" \
  127. # --std c++11 -lineinfo \
  128. # -ccbin $(CXX) -DFAISS_USE_FLOAT16) -o $@ $< $(NVCCLDFLAGS) -I../../../
  129. # NVCCLDFLAGS这个参数是存在于Makefile文件中的
  130. # NVCCLDFLAGS= -Xcompiler \"-Wl,-rpath=../../:../../gpu/\" \
  131. # -L../.. -L../../gpu -lfaiss -lgpufaiss
  132. # BLAS LD flags for nvcc (used to generate an executable)
  133. # if BLASLDFLAGS contains several flags, each one may
  134. # need to be prepended with -Xlinker
  135. BLASLDFLAGSNVCC=-Xlinker $(BLASLDFLAGS)
  136. # Same, but to generate a .so
  137. BLASLDFLAGSSONVCC=-Xlinker $(BLASLDFLAGS)

结论:

显卡型号:
image_1cbeti0381av7140e1qaecj7qqm.png-219.7kB

查阅官网得:
image_1cbetk5gu1r3pej6f72isr1rqp13.png-10kB

1.改成 -gencode arch=compute_52,code="compute_52"之后,计算能力进一步提升!!!!

2.GPU的完整编译指令是这样的:

  1. /usr/local/cuda-8.0//bin/nvcc -I /usr/local/cuda-8.0//targets/x86_64-linux/include/ -Xcompiler -fPIC -Xcudafe --diag_suppress=unrecognized_attribute -gencode arch=compute_52,code="compute_52" -gencode arch=compute_60,code="compute_60" --std c++11 -lineinfo -ccbin g++ -DFAISS_USE_FLOAT16 -o 5-Multiple-GPUs 5-Multiple-GPUs.cpp -Xcompiler \"-Wl,-rpath=../../:../../gpu/\" -L../.. -L../../gpu -lfaiss -lgpufaiss -I../../../

image_1cbetalls12djj1dbpo1bo417p39.png-141.4kB

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注