[关闭]
@CrazyHenry 2018-04-23T17:47:17.000000Z 字数 5063 阅读 1367

1-Flat.cpp

hhhhfaiss


方式 测试算法 d nb xb nq xq time
无k-means,brute-force L2 distance,CPU KNN(k=4) 64 100000 100000xd 10000 10000xd 2345ms
无k-means,brute-force L2 distance,CPU KNN(k=4) 256 100000 100000xd 10000 10000xd 6606ms(跟d呈线性关系?)
use k means,无PQ, CPU, nlist == 100, nprobe == 1(有错误) KNN(k=4) 256 100000 100000xd 10000 10000xd 145ms
use k means,无PQ, CPU, nlist == 100, nprobe == 11(正确结果) KNN(k=4) 256 100000 100000xd 10000 10000xd 1999ms
use k means,无PQ, CPU, nlist == 100, nprobe == 21(正确结果) KNN(k=4) 256 100000 100000xd 10000 10000xd 4825ms
use k means,无PQ, CPU, nlist == 100, nprobe == 31(正确结果) KNN(k=4) 256 100000 100000xd 10000 10000xd 6828ms

接口:

  1. faiss::IndexFlatL2 index(d); //类IndexFlatL2构造函数
  2. index.is_trained //a boolean that indicates whether training is required
  3. index.ntotal //ntotal, the number of indexed vectors(特征向量的个数,nb)
  4. index.add(nb, xb); // add vectors to the index(database的vector矩阵xb和矩阵行数nb)
  5. index.search(nq, xq, k, D, I);//第一个参数本来应该是nq,第二个参数本来应该是xq,表示在database matrix中查找与k个与xq batches最近的特征向量

计时模块:

  1. /*
  2. * Author: yingmin.li
  3. *
  4. * Date: 2018/4/21
  5. *
  6. * Detail: add a timespan part for demo1
  7. *
  8. */
  9. #include <iostream>
  10. #include <string>
  11. #include <vector>
  12. #include <algorithm>
  13. #include <cstdio>
  14. #include <cstdlib>
  15. #include <chrono>
  16. #include <faiss/IndexFlat.h>
  17. //using std::
  18. using std::endl;
  19. using std::cout;
  20. //note: header file do not use using
  21. int main() {
  22. int d = 64; // dimension
  23. int nb = 100000; // database size
  24. int nq = 10000; // nb of queries
  25. float *xb = new float[d * nb]; //database matrix
  26. float *xq = new float[d * nq]; //query matrix
  27. for(int i = 0; i < nb; i++) {
  28. for(int j = 0; j < d; j++)
  29. xb[d * i + j] = drand48();
  30. xb[d * i] += i / 1000.; //just for fun!
  31. }
  32. for(int i = 0; i < nq; i++) {
  33. for(int j = 0; j < d; j++)
  34. xq[d * i + j] = drand48();
  35. xq[d * i] += i / 1000.;
  36. }
  37. faiss::IndexFlatL2 index(d); // call constructor
  38. printf("is_trained = %s\n", index.is_trained ? "true" : "false"); //是否需要训练阶段
  39. index.add(nb, xb); // add vectors to the index,将database矩阵xb和database中特征向量的个数nb,给index对象
  40. printf("ntotal = %ld\n", index.ntotal); //返回database中特征向量的个数nb
  41. int k = 4; //KNN algorithm
  42. { // search xq
  43. long *I = new long[k * nq];
  44. float *D = new float[k * nq];
  45. auto begin_time = std::chrono::steady_clock::now();
  46. for(int i = 0; i < 10; ++i)
  47. {
  48. index.search(nq, xq, k, D, I);
  49. }
  50. auto end_time = std::chrono::steady_clock::now();
  51. auto time_span = std::chrono::duration_cast<std::chrono::duration<long,std::ratio<1,1000>>>(end_time - begin_time);
  52. cout << endl << endl
  53. << "avg_time(10 times avg) for:" << endl
  54. << " -vec dimension d = " << d << endl
  55. << " -database vec numbers nb = " << nb <<endl
  56. << " -query vec batchings nq = " << nq << endl
  57. << " -KNN algorithm's k = " << k << endl
  58. << " is " << time_span.count() / 10 << " milliseconds" << endl;
  59. delete [] I;
  60. delete [] D;
  61. }
  62. delete [] xb;
  63. delete [] xq;
  64. return 0;
  65. }

image_1cbjiahm3cln19t1v191lrr1h5626.png-149.4kB

1.查询结果:

nq:输入待查特征向量的个数(行数),batchings

xq:待查数据矩阵 nq*d

xb:database的数据矩阵 nb*d

nb:database的特征向量的个数(行数)

int d = 64;                            // dimension
int nb = 100000;                       // database size
int nq = 10000;                        // nb of queries

353898594303223829.jpg-59.7kB

2.数据分析:

输入待查矩阵xq = nq * d:32-bit floating point matrices, d = 64(特征向量的维度,可以修改)

输入数据库矩阵:xb = nb * d:32-bit floating point matrices, d = 64(特征向量的维度,可以修改)

3.实际运算:

  1. The output of the actual search is similar to
  2. ID号:
  3. [[ 381 207 210 477]
  4. [ 526 911 142 72]
  5. [ 838 527 1290 425]
  6. [ 196 184 164 359]
  7. [ 526 377 120 425]]
  8. ID号:
  9. [[ 9900 10500 9309 9831]
  10. [11055 10895 10812 11321]
  11. [11353 11103 10164 9787]
  12. [10571 10664 10632 9638]
  13. [ 9628 9554 10036 9582]]
  14. Because of the value added to the first component of the vectors, the dataset is smeared along the first axis in d-dim space. So the neighbors of the first few vectors are around the beginning of the dataset, and the ones of the vectors around ~10000 are also around index 10000 in the dataset.

4.运行:

image_1cbj5fhksjgb14dt1a8ruh0q25l.png-89.8kB

代码:

  1. /**
  2. * Copyright (c) 2015-present, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under the BSD+Patents license found in the
  6. * LICENSE file in the root directory of this source tree.
  7. */
  8. #include <cstdio>
  9. #include <cstdlib>
  10. #include <faiss/IndexFlat.h>
  11. int main() {
  12. int d = 64; // dimension
  13. int nb = 100000; // database size
  14. int nq = 10000; // nb of queries
  15. float *xb = new float[d * nb]; //database matrix
  16. float *xq = new float[d * nq]; //query matrix
  17. for(int i = 0; i < nb; i++) {
  18. for(int j = 0; j < d; j++)
  19. xb[d * i + j] = drand48();
  20. xb[d * i] += i / 1000.; //just for fun!
  21. }
  22. for(int i = 0; i < nq; i++) {
  23. for(int j = 0; j < d; j++)
  24. xq[d * i + j] = drand48();
  25. xq[d * i] += i / 1000.;
  26. }
  27. faiss::IndexFlatL2 index(d); // call constructor
  28. printf("is_trained = %s\n", index.is_trained ? "true" : "false"); //是否需要训练阶段
  29. index.add(nb, xb); // add vectors to the index,将database矩阵xb和database中特征向量的个数nb,给index对象
  30. printf("ntotal = %ld\n", index.ntotal); //返回database中特征向量的个数nb
  31. int k = 4; //KNN algorithm
  32. { // sanity check: search 5 first vectors of xb,database矩阵xb
  33. long *I = new long[k * 5]; //记录ID 号
  34. float *D = new float[k * 5]; //记录实际距离
  35. index.search(5, xb, k, D, I); //第一个参数本来应该是nq,第二个参数本来应该是xq,表示在database matrix中查找与k个与xq batches最近的特征向量
  36. //现在相当于是在database矩阵中查找k个与xb自身5个batches最近的特征向量
  37. // print results
  38. printf("I=\n");
  39. for(int i = 0; i < 5; i++) {
  40. for(int j = 0; j < k; j++)
  41. printf("%5ld ", I[i * k + j]);
  42. printf("\n");
  43. }
  44. printf("D=\n");
  45. for(int i = 0; i < 5; i++) {
  46. for(int j = 0; j < k; j++)
  47. printf("%7g ", D[i * k + j]);
  48. printf("\n");
  49. }
  50. delete [] I;
  51. delete [] D;
  52. }
  53. { // search xq
  54. long *I = new long[k * nq];
  55. float *D = new float[k * nq];
  56. index.search(nq, xq, k, D, I);
  57. // print results
  58. printf("I (5 first results)=\n"); //只输出5行最开始的数据
  59. for(int i = 0; i < 5; i++) {
  60. for(int j = 0; j < k; j++)
  61. printf("%5ld ", I[i * k + j]);
  62. printf("\n");
  63. }
  64. printf("I (5 last results)=\n"); //只输出5个最后的数据
  65. for(int i = nq - 5; i < nq; i++) {
  66. for(int j = 0; j < k; j++)
  67. printf("%5ld ", I[i * k + j]);
  68. printf("\n");
  69. }
  70. printf("D(5 first results)=\n");
  71. for(int i = 0; i < 5; i++) {
  72. for(int j = 0; j < k; j++)
  73. printf("%7g ", D[i * k + j]);
  74. printf("\n");
  75. }
  76. delete [] I;
  77. delete [] D;
  78. }
  79. delete [] xb;
  80. delete [] xq;
  81. return 0;
  82. }
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注