@cleardusk
2015-12-04T22:07:23.000000Z
字数 3213
阅读 1405
GjzCVCode
#!/bin/bash
# this bash script does not need base cmd such as down_wnid.sh,
# but down_wnid.sh is a useful cmd to download one wnid
# further version may use argument to specific wnid and download path
DIR_DOWNLOADED="downloaded"
DIR_DOWNLOADED_WNID="downloaded_wnid"
# wnid path and file name, namely full file path
WNID_PATH_NAME=$1
# suffix for downloaded wnid
SUFFIX="down"
DIR_WNID="${WNID_PATH_NAME%.*}"
DIR_WNID="${DIR_WNID##*/}"
WNID_DIVIDE_NUM="${WNID_PATH_NAME%.*}"
WNID_DIVIDE_NUM="${WNID_DIVIDE_NUM##*/}"
DOWN_CMD="./down_wnid.sh"
if [[ ! -d "$DIR_DOWNLOADED/$DIR_WNID" ]]; then
mkdir -p "$DIR_DOWNLOADED/$DIR_WNID"
fi
if [[ ! -d "$DIR_DOWNLOADED_WNID" ]]; then
mkdir "$DIR_DOWNLOADED_WNID"
fi
while IFS='' read -r wnid || [[ -n "$wnid" ]]; do
filename="${wnid}.tar"
full_filename_tar="${DIR_DOWNLOADED}/${DIR_WNID}/${filename}"
full_filename_wnid="${DIR_DOWNLOADED_WNID}/${WNID_DIVIDE_NUM%.*}_${SUFFIX}.txt"
grep "${wnid}" "${full_filename_wnid}" &> /dev/null || \
(wget -c -4 "http://image-net.org/download/synset?wnid=$wnid&username=username&accesskey=key&release=latest&src=stanford" -O "${full_filename_tar}" && \
echo "$wnid" >> "$full_filename_wnid")
done < "$WNID_PATH_NAME"
% specific machine for labeling digits
%% version 0.01 @gjz
% some initial value, rand number decides the file's name
RAND_NUM_MIN = 1e8;
RAND_NUM_MAX = 1e9 - 1;
NUM_OF_FILES = 1e5;
CHECK_CODE_DIGIT_NUM = 4;
IMAGE_FORMAT_LABEL = '.bmp';
DATABASE_SRC = '../database/check_code_hundred_thousand/';
DATABASE_DST = '../database/check_code_digits_labels/';
% the main loop
tic;
object_num = 100;
current_num = 0;
while current_num < object_num
current_num = current_num + 1;
% read random image file
img_order = randi([1, NUM_OF_FILES]);
filename = strcat(DATABASE_SRC, ...
num2str(img_order), '.jpg');
img = imread(filename);
% binary and reverse
bw = im2bw(img);
bw = 1-bw;
% show the image for humans to label
imshow(bw);
% divide the check code image
% start_interval = 10;
% step_interval = 46;
% divide = linspace(start_interval, start_interval+4.*step_interval, 5);
divide = [10, 56, 102, 148, 194]; % 46
img_crops = false([4, 100, 46]); % store the four binary digit images
for i = 1:length(divide)-1
l = divide(i);
r = divide(i+1)-1;
img_crops(i, :, :) = bw(:, l:r);
end
% input check code by hand
check_code_str = input(strcat('#', num2str(current_num), ' check code: '), 's'); % bug#1
check_code_str(ismember(check_code_str, ' ')) = ''; % remove all white space
if length(check_code_str) < 4
display('invalid input');
continue
end
suffix = randi([RAND_NUM_MIN, RAND_NUM_MAX], 1, CHECK_CODE_DIGIT_NUM);
suffix_str = cell(1, CHECK_CODE_DIGIT_NUM);
for i = 1:CHECK_CODE_DIGIT_NUM
suffix_str{i} = strcat('_d', num2str(suffix(i)), IMAGE_FORMAT_LABEL);
end
% get the four digit filename
check_code_digits_labels_filename = cell(1, CHECK_CODE_DIGIT_NUM);
for i = 1:CHECK_CODE_DIGIT_NUM
check_code_digits_labels_filename{i} = strcat(check_code_str(i), suffix_str{i});
end
% save images
for i = 1:CHECK_CODE_DIGIT_NUM
img_digit = squeeze(img_crops(i,:,:));
imwrite(img_digit, strcat(DATABASE_DST, check_code_digits_labels_filename{i}));
end
end
toc;
tic
DATABASE_SRC = '../database/check_code10000/';
% read image
img_order = randi([1 10000]);
filename = strcat(num2str(img_order), '.jpg');
img = imread(strcat(DATABASE_SRC, filename));
% preprocess: binary and reverse
img_br = ocr_preprocess(img);
% segmention
img_crops = ocr_segmention(img_br);
% verify
verify_code = '';
for i = 1:4
img_digit = squeeze(img_crops(i,:,:));
verify_code(i) = num2str(ocr_classify(img_digit));
end
toc