[关闭]
@wanghuijiao 2021-10-26T09:41:16.000000Z 字数 3296 阅读 613

离线半自动标注流程

技术文档


前言

流程说明

  1. 用Yolov4在COCO上的预训练模型对测试集生成json格式的标签文件
  2. /ssd01/wanghuijiao/dataset/Summary/yolor_json2yolo.py脚本将json文件转换为yolo格式,此脚本可以实现在原有的yolo标签文件内容后进行添加操作,当原始yolo标签文件不存在时,也可以生成新的yolo标签文件。

操作

  1. COCO上的yolov4预训练模型,见56服务器上/ssd01/wanghuijiao/pose_detector02/yolov4.weights,需要自行准备/path/to/your_test_data.data文件,此文件格式为:

    1. classes = 80 # (yolov4.weights 模型对应检测类别)
    2. train = /ssd01/wanghuijiao/dataset/crowdhuman/crowdhuman_person_head/train.txt # 训练集路径(存放图片绝对路径,且图片和txt标签文件在同一文件夹)
    3. valid = /ssd01/wanghuijiao/dataset/crowdhuman/crowdhuman_person_head/val.txt # 测试集路径(存放图片绝对路径,且图片和txt标签文件在同一文件夹)
    4. names = /ssd01/wanghuijiao/dataset/Obj365/coco_classes.txt # yolov4.weights 模型对应检测类别名称,COCO就直接用这个路径吧
    5. backup = yolov4_human_head_detector_cfg/backup/ # 训练时权重文件输出路径,测试时这个不用管

    用以下命令对测试集生成json结果,结果默认为/ssd01/wanghuijiao/pose_detector02/results/coco_results.json:

    1. cd /ssd01/wanghuijiao/pose_detector02
    2. ./darknet detector valid \
    3. /path/to/your_test_data.data \
    4. /ssd01/wanghuijiao/pose_detector02/cfg_init/yolov4.cfg \
    5. /ssd01/wanghuijiao/pose_detector02/yolov4.weights \
  2. 将上述测试集结果/ssd01/wanghuijiao/pose_detector02/results/coco_results.json转换并与原本的标签文件合并。/ssd01/wanghuijiao/dataset/Summary/yolor_json2yolo.py脚本内容请根据需求自行更改。

    1. import json
    2. import os
    3. import cv2
    4. from cv2 import data
    5. import ast
    6. import tqdm
    7. import argparse
    8. def convert(size, box):
    9. # box: x, y, w, h 绝对坐标
    10. dw = 1. / (size[0])
    11. dh = 1. / (size[1])
    12. x_c = box[0] + box[2] / 2.0
    13. y_c = box[1] + box[3] / 2.0
    14. w = box[2]
    15. h = box[3]
    16. # 输出相对坐标 x_center, y_center, w, h
    17. x_c = x_c * dw
    18. w = w * dw
    19. y_c = y_c * dh
    20. h = h * dh
    21. return (x_c, y_c, w, h)
    22. parser = argparse.ArgumentParser()
    23. parser.add_argument('--json_path',type=str ,default='./v1.0/coco_head_results.json', help="which save the output file and classes")
    24. parser.add_argument('--label_path_dir',type=str ,default='', help="which save the output file and classes")
    25. arg = parser.parse_args()
    26. json_path = arg.json_path
    27. threshold = 0.5
    28. label_path_src = '/ssd01/wanghuijiao/dataset/human_head_vehicle_RGB/Open_imagesV6/v2.0/labels' # 原始标签路径,请根据需求自行更改
    29. label_path_dir = arg.label_path_dir # '/ssd01/wanghuijiao/dataset/human_head_vehicle_RGB/Open_imagesV6/v2.0/labels' # 生成的新标签文件放置的路径,请根据需求自行更改
    30. img_path = '/ssd01/wanghuijiao/dataset/Open_imagesV6/person_car/images'
    31. # crowdhuman head detector
    32. # category_ids = {'0': 'head'} # '1': human
    33. # COCO 80 yolov4-official
    34. category_ids ={1: "person", 3: "car", 6: "bus", 7: "train", 8: "truck"} # 这个可以筛选json的目标标签,比如json有80类结果,但只需要1,3,6,7,8这些类。
    35. # 0:person, 1:car, 2:head
    36. with open(json_path, 'r') as f:
    37. for k, line in tqdm(enumerate(f.readlines())):
    38. if k == 0 or k == len(f.readlines()):
    39. continue
    40. line = line.strip()[:-1]
    41. # print(line)
    42. data = ast.literal_eval(line)
    43. if data['score'] > threshold:
    44. print(data['bbox'])
    45. image_id = data['image_id']
    46. category_id = data['category_id']
    47. if category_id not in category_ids.keys():
    48. continue
    49. # print('category_id: ', category_id)
    50. bbox = data['bbox']
    51. bbox[0] = float(bbox[0])
    52. bbox[1] = float(bbox[1])
    53. bbox[2] = float(bbox[2])
    54. bbox[3] = float(bbox[3])
    55. img_file = os.path.join(img_path, f'{image_id}.jpg')
    56. img = cv2.imread(img_file)
    57. label_file_src = os.path.join(label_path_src, f'{image_id}.txt')
    58. img_height = img.shape[0]
    59. img_width = img.shape[1]
    60. label_file_dir = os.path.join(label_path_dir, f'{image_id}.txt')
    61. with open(label_file_dir, 'a+') as f:
    62. head_box = convert([img_width, img_height], bbox)
    63. content = str('1') + ' ' + str(head_box[0])+ ' ' + str(head_box[1]) + ' ' + \
    64. str(head_box[2]) + ' ' + str(head_box[3]) + '\n' # 这里是生成的目标类别的新标签,请根据需求自行更改
    65. # print(content)
    66. f.write(content)
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注