[关闭]
@cleardusk 2015-12-04T22:07:23.000000Z 字数 3213 阅读 1405

2015.12.4 相关代码

GjzCVCode


下载 ImageNet 数据库 shell 脚本

  1. #!/bin/bash
  2. # this bash script does not need base cmd such as down_wnid.sh,
  3. # but down_wnid.sh is a useful cmd to download one wnid
  4. # further version may use argument to specific wnid and download path
  5. DIR_DOWNLOADED="downloaded"
  6. DIR_DOWNLOADED_WNID="downloaded_wnid"
  7. # wnid path and file name, namely full file path
  8. WNID_PATH_NAME=$1
  9. # suffix for downloaded wnid
  10. SUFFIX="down"
  11. DIR_WNID="${WNID_PATH_NAME%.*}"
  12. DIR_WNID="${DIR_WNID##*/}"
  13. WNID_DIVIDE_NUM="${WNID_PATH_NAME%.*}"
  14. WNID_DIVIDE_NUM="${WNID_DIVIDE_NUM##*/}"
  15. DOWN_CMD="./down_wnid.sh"
  16. if [[ ! -d "$DIR_DOWNLOADED/$DIR_WNID" ]]; then
  17. mkdir -p "$DIR_DOWNLOADED/$DIR_WNID"
  18. fi
  19. if [[ ! -d "$DIR_DOWNLOADED_WNID" ]]; then
  20. mkdir "$DIR_DOWNLOADED_WNID"
  21. fi
  22. while IFS='' read -r wnid || [[ -n "$wnid" ]]; do
  23. filename="${wnid}.tar"
  24. full_filename_tar="${DIR_DOWNLOADED}/${DIR_WNID}/${filename}"
  25. full_filename_wnid="${DIR_DOWNLOADED_WNID}/${WNID_DIVIDE_NUM%.*}_${SUFFIX}.txt"
  26. grep "${wnid}" "${full_filename_wnid}" &> /dev/null || \
  27. (wget -c -4 "http://image-net.org/download/synset?wnid=$wnid&username=username&accesskey=key&release=latest&src=stanford" -O "${full_filename_tar}" && \
  28. echo "$wnid" >> "$full_filename_wnid")
  29. done < "$WNID_PATH_NAME"

打码器

  1. % specific machine for labeling digits
  2. %% version 0.01 @gjz
  3. % some initial value, rand number decides the file's name
  4. RAND_NUM_MIN = 1e8;
  5. RAND_NUM_MAX = 1e9 - 1;
  6. NUM_OF_FILES = 1e5;
  7. CHECK_CODE_DIGIT_NUM = 4;
  8. IMAGE_FORMAT_LABEL = '.bmp';
  9. DATABASE_SRC = '../database/check_code_hundred_thousand/';
  10. DATABASE_DST = '../database/check_code_digits_labels/';
  11. % the main loop
  12. tic;
  13. object_num = 100;
  14. current_num = 0;
  15. while current_num < object_num
  16. current_num = current_num + 1;
  17. % read random image file
  18. img_order = randi([1, NUM_OF_FILES]);
  19. filename = strcat(DATABASE_SRC, ...
  20. num2str(img_order), '.jpg');
  21. img = imread(filename);
  22. % binary and reverse
  23. bw = im2bw(img);
  24. bw = 1-bw;
  25. % show the image for humans to label
  26. imshow(bw);
  27. % divide the check code image
  28. % start_interval = 10;
  29. % step_interval = 46;
  30. % divide = linspace(start_interval, start_interval+4.*step_interval, 5);
  31. divide = [10, 56, 102, 148, 194]; % 46
  32. img_crops = false([4, 100, 46]); % store the four binary digit images
  33. for i = 1:length(divide)-1
  34. l = divide(i);
  35. r = divide(i+1)-1;
  36. img_crops(i, :, :) = bw(:, l:r);
  37. end
  38. % input check code by hand
  39. check_code_str = input(strcat('#', num2str(current_num), ' check code: '), 's'); % bug#1
  40. check_code_str(ismember(check_code_str, ' ')) = ''; % remove all white space
  41. if length(check_code_str) < 4
  42. display('invalid input');
  43. continue
  44. end
  45. suffix = randi([RAND_NUM_MIN, RAND_NUM_MAX], 1, CHECK_CODE_DIGIT_NUM);
  46. suffix_str = cell(1, CHECK_CODE_DIGIT_NUM);
  47. for i = 1:CHECK_CODE_DIGIT_NUM
  48. suffix_str{i} = strcat('_d', num2str(suffix(i)), IMAGE_FORMAT_LABEL);
  49. end
  50. % get the four digit filename
  51. check_code_digits_labels_filename = cell(1, CHECK_CODE_DIGIT_NUM);
  52. for i = 1:CHECK_CODE_DIGIT_NUM
  53. check_code_digits_labels_filename{i} = strcat(check_code_str(i), suffix_str{i});
  54. end
  55. % save images
  56. for i = 1:CHECK_CODE_DIGIT_NUM
  57. img_digit = squeeze(img_crops(i,:,:));
  58. imwrite(img_digit, strcat(DATABASE_DST, check_code_digits_labels_filename{i}));
  59. end
  60. end
  61. toc;

识别部分

  1. tic
  2. DATABASE_SRC = '../database/check_code10000/';
  3. % read image
  4. img_order = randi([1 10000]);
  5. filename = strcat(num2str(img_order), '.jpg');
  6. img = imread(strcat(DATABASE_SRC, filename));
  7. % preprocess: binary and reverse
  8. img_br = ocr_preprocess(img);
  9. % segmention
  10. img_crops = ocr_segmention(img_br);
  11. % verify
  12. verify_code = '';
  13. for i = 1:4
  14. img_digit = squeeze(img_crops(i,:,:));
  15. verify_code(i) = num2str(ocr_classify(img_digit));
  16. end
  17. toc
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注