diff --git a/.gitignore b/.gitignore index 27b1e2b..424a237 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ +*.pyc .idea/ venv/ main/input/people_walking_mp4 main/output -object_detection/models -*.pyc \ No newline at end of file +object_detection/models/* diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..124d98f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "main/DaSiamRPN"] + path = main/DaSiamRPN + url = https://github.com/foolwood/DaSiamRPN/ diff --git a/README.md b/README.md index 1c45845..1c79382 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,5 @@ # OpenLabeling: open-source image and video labeler - -# New features: -- Auto labeling by using combination between Tracker and Deep Learning Object Detection Model - -# How to use -- Download one or some deep learning models from https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md - and put it into `object_detection` directory. The outline of `object_detection` looks like that: - + `tf_object_detection.py` - + `utils.py` - + `ssdlite_mobilenet_v2_coco_2018_05_09` . Download by clicking this link http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz - + Or `mask_rcnn_inception_v2_coco_2018_01_28`. Download by clicking this link http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz - - **Note**: Default model used in `main_auto.py` is `mask_rcnn_inception_v2_coco_2018_01_28`. We can - set `graph_model_path` in file `main_auto.py` to change the pretrain model -- Using `main_auto.py` to automatically label data first - -# Plan -- Combine with state of the art deep learning tracker - [![GitHub stars](https://img.shields.io/github/stars/Cartucho/OpenLabeling.svg?style=social&label=Stars)](https://github.com/Cartucho/OpenLabeling) Image labeling in multiple annotation formats: @@ -31,6 +12,8 @@ Image labeling in multiple annotation formats: ## Latest Features +- Jun 2019: Deep Learning Object Detection Model +- May 2019: [ECCV2018] Distractor-aware Siamese Networks for Visual Object Tracking - Jan 2019: easy and quick bounding-boxe's resizing! - Jan 2019: video object tracking with OpenCV trackers! - TODO: Label photos via Google drive to allow "team online labeling". @@ -92,6 +75,22 @@ Step by step: 5. You can find the annotations in the folder **output/** +#### How to use the deep learning feature + +- Download one or some deep learning models from https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md + and put it into `object_detection/models` directory (you need to create the `models` folder by yourself). The outline of `object_detection` looks like that: + + `tf_object_detection.py` + + `utils.py` + + `models/ssdlite_mobilenet_v2_coco_2018_05_09` + +Download the pre-trained model by clicking this link http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz and put it into `object_detection/models`. Create the `models` folder if necessary. Make sure to extract the model. + + **Note**: Default model used in `main_auto.py` is `ssdlite_mobilenet_v2_coco_2018_05_09`. We can + set `graph_model_path` in file `main_auto.py` to change the pretrain model +- Using `main_auto.py` to automatically label data first + + TODO: explain how the user can + ### GUI usage Keyboard, press: diff --git a/main/DaSiamRPN b/main/DaSiamRPN new file mode 160000 index 0000000..167d0db --- /dev/null +++ b/main/DaSiamRPN @@ -0,0 +1 @@ +Subproject commit 167d0dbb38162cf34c74d51c540a492faf3d6e01 diff --git a/main/dasiamrpn.py b/main/dasiamrpn.py new file mode 100644 index 0000000..96fded6 --- /dev/null +++ b/main/dasiamrpn.py @@ -0,0 +1,102 @@ +""" +Author : Will Stone +Date : 190407 +Desc : Wrapper class for the DaSiamRPN tracking method. This class has the + methods required to interface with the tracking class implemented + in main.py within the OpenLabeling package. +""" +import torch +import numpy as np +import sys +from os.path import realpath, dirname, join, exists +try: + from DaSiamRPN.code.run_SiamRPN import SiamRPN_init, SiamRPN_track +except ImportError: + # check if the user has downloaded the submodules + if not exists(join('DaSiamRPN', 'code', 'net.py')): + print('Error: DaSiamRPN files not found. Please run the following command:') + print('\tgit submodule update --init') + exit() + else: + # if python 3 + if sys.version_info >= (3, 0): + sys.path.append(realpath(join('DaSiamRPN', 'code'))) + else: + # check if __init__py files exist (otherwise create them) + path_temp = join('DaSiamRPN', 'code', '__init__.py') + if not exists(path_temp): + open(path_temp, 'w').close() + path_temp = join('DaSiamRPN', '__init__.py') + if not exists(path_temp): + open(path_temp, 'w').close() + # try to import again + from DaSiamRPN.code.run_SiamRPN import SiamRPN_init, SiamRPN_track +from DaSiamRPN.code.net import SiamRPNvot +from DaSiamRPN.code.utils import get_axis_aligned_bbox, cxy_wh_2_rect + +class dasiamrpn(object): + """ + Wrapper class for incorporating DaSiamRPN into OpenLabeling + (https://github.com/foolwood/DaSiamRPN, + https://github.com/Cartucho/OpenLabeling) + """ + def __init__(self): + self.net = SiamRPNvot() + # check if SiamRPNVOT.model was already downloaded (otherwise download it now) + model_path = join(realpath(dirname(__file__)), 'DaSiamRPN', 'code', 'SiamRPNVOT.model') + print(model_path) + if not exists(model_path): + print('\nError: module not found. Please download the pre-trained model and copy it to the directory \'DaSiamRPN/code/\'\n') + print('\tdownload link: https://drive.google.com/file/d/1-vNVZxfbIplXHrqMHiJJYWXYWsOIvGsf/view') + exit() + self.net.load_state_dict(torch.load(model_path)) + self.net.eval().cuda() + + def init(self, init_frame, initial_bbox): + """ + Initialize DaSiamRPN tracker with inital frame and bounding box. + """ + target_pos, target_sz = self.bbox_to_pos(initial_bbox) + self.state = SiamRPN_init( + init_frame, target_pos, target_sz, self.net) + + def update(self, next_image): + """ + Update bounding box position and size on next_image. Returns True + beacuse tracking is terminated based on number of frames predicted + in OpenLabeling, not based on feedback from tracking algorithm (unlike + the opencv tracking algorithms). + """ + self.state = SiamRPN_track(self.state, next_image) + target_pos = self.state["target_pos"] + target_sz = self.state["target_sz"] + bbox = self.pos_to_bbox(target_pos, target_sz) + + return True, bbox + + def bbox_to_pos(self, initial_bbox): + """ + Convert bounding box format from a tuple format containing + xmin, ymin, width, and height to a tuple of two arrays which contain + the x and y coordinates of the center of the box and its width and + height respectively. + """ + xmin, ymin, w, h = initial_bbox + cx = int(xmin + w/2) + cy = int(ymin + h/2) + target_pos = np.array([cx, cy]) + target_sz = np.array([w, h]) + + return target_pos, target_sz + + def pos_to_bbox(self, target_pos, target_sz): + """ + Invert the bounding box format produced in the above conversion + function. + """ + w = target_sz[0] + h = target_sz[1] + xmin = int(target_pos[0] - w/2) + ymin = int(target_pos[1] - h/2) + + return xmin, ymin, w, h diff --git a/main/main.py b/main/main.py index f14b064..79c8767 100755 --- a/main/main.py +++ b/main/main.py @@ -12,6 +12,7 @@ from lxml import etree import xml.etree.cElementTree as ET +from dasiamrpn import dasiamrpn DELAY = 20 # keyboard delay (in milliseconds) WITH_QT = False @@ -28,6 +29,8 @@ parser.add_argument('-i', '--input_dir', default='input', type=str, help='Path to input directory') parser.add_argument('-o', '--output_dir', default='output', type=str, help='Path to output directory') parser.add_argument('-t', '--thickness', default='1', type=int, help='Bounding box and cross line thickness') +parser.add_argument('--tracker', default='KCF', type=str, help='Type of tracker being used') +parser.add_argument('-n', '--n_frames', default='50', type=int, help='number of frames to track object for') args = parser.parse_args() class_index = 0 @@ -35,11 +38,13 @@ img = None img_objects = [] -INPUT_DIR = args.input_dir +INPUT_DIR = args.input_dir OUTPUT_DIR = args.output_dir +N_FRAMES = args.n_frames +TRACKER_TYPE = args.tracker -WINDOW_NAME = 'OpenLabeling' -TRACKBAR_IMG = 'Image' +WINDOW_NAME = 'OpenLabeling' +TRACKBAR_IMG = 'Image' TRACKBAR_CLASS = 'Class' annotation_formats = {'PASCAL_VOC' : '.xml', 'YOLO_darknet' : '.txt'} @@ -569,7 +574,7 @@ def mouse_listener(event, x, y, flags, param): point_1 = (x, y) else: # minimal size for bounding box to avoid errors - threshold = 10 + threshold = 5 if abs(x - point_1[0]) > threshold or abs(y - point_1[1]) > threshold: # second click point_2 = (x, y) @@ -790,7 +795,7 @@ class LabelTracker(): # TODO: press ESC to stop the tracking process def __init__(self, tracker_type, init_frame, next_frame_path_list): - tracker_types = ['CSRT', 'KCF','MOSSE', 'MIL', 'BOOSTING', 'MEDIANFLOW', 'TLD', 'GOTURN'] + tracker_types = ['CSRT', 'KCF','MOSSE', 'MIL', 'BOOSTING', 'MEDIANFLOW', 'TLD', 'GOTURN', 'DASIAMRPN'] ''' Recomended tracker_type: KCF -> KCF is usually very good (minimum OpenCV 3.1.0) CSRT -> More accurate than KCF but slightly slower (minimum OpenCV 3.4.2) @@ -809,27 +814,36 @@ def __init__(self, tracker_type, init_frame, next_frame_path_list): def call_tracker_constructor(self, tracker_type): - # -- TODO: remove this if I assume OpenCV version > 3.4.0 - if int(self.major_ver == 3) and int(self.minor_ver) < 3: - tracker = cv2.Tracker_create(tracker_type) - # -- + if tracker_type == 'DASIAMRPN': + tracker = dasiamrpn() else: - if tracker_type == 'CSRT': - tracker = cv2.TrackerCSRT_create() - elif tracker_type == 'KCF': - tracker = cv2.TrackerKCF_create() - elif tracker_type == 'MOSSE': - tracker = cv2.TrackerMOSSE_create() - elif tracker_type == 'MIL': - tracker = cv2.TrackerMIL_create() - elif tracker_type == 'BOOSTING': - tracker = cv2.TrackerBoosting_create() - elif tracker_type == 'MEDIANFLOW': - tracker = cv2.TrackerMedianFlow_create() - elif tracker_type == 'TLD': - tracker = cv2.TrackerTLD_create() - elif tracker_type == 'GOTURN': - tracker = cv2.TrackerGOTURN_create() + # -- TODO: remove this if I assume OpenCV version > 3.4.0 + if int(self.major_ver == 3) and int(self.minor_ver) < 3: + #tracker = cv2.Tracker_create(tracker_type) + pass + # -- + else: + try: + tracker = cv2.TrackerKCF_create() + except AttributeError as error: + print(error) + print('\nMake sure that OpenCV contribute is installed: opencv-contrib-python\n') + if tracker_type == 'CSRT': + tracker = cv2.TrackerCSRT_create() + elif tracker_type == 'KCF': + tracker = cv2.TrackerKCF_create() + elif tracker_type == 'MOSSE': + tracker = cv2.TrackerMOSSE_create() + elif tracker_type == 'MIL': + tracker = cv2.TrackerMIL_create() + elif tracker_type == 'BOOSTING': + tracker = cv2.TrackerBoosting_create() + elif tracker_type == 'MEDIANFLOW': + tracker = cv2.TrackerMedianFlow_create() + elif tracker_type == 'TLD': + tracker = cv2.TrackerTLD_create() + elif tracker_type == 'GOTURN': + tracker = cv2.TrackerGOTURN_create() return tracker @@ -848,6 +862,8 @@ def start_tracker(self, json_file_data, json_file_path, img_path, obj, color, an next_image = cv2.imread(frame_path) # get the new bbox prediction of the object success, bbox = tracker.update(next_image.copy()) + if pred_counter >= N_FRAMES: + success = False if success: pred_counter += 1 xmin, ymin, w, h = map(int, bbox) @@ -1080,7 +1096,7 @@ def complement_bgr(color): next_frame_path_list = get_next_frame_path_list(video_name, img_path) # initial frame init_frame = img.copy() - label_tracker = LabelTracker('KCF', init_frame, next_frame_path_list) # TODO: replace 'KCF' by 'CSRT' + label_tracker = LabelTracker(TRACKER_TYPE, init_frame, next_frame_path_list) for obj in object_list: class_index = obj[0] color = class_rgb[class_index].tolist() diff --git a/requirements.txt b/requirements.txt index 9733348..5a8b324 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ lxml==4.3.0 -numpy==1.16.0 +numpy==1.22.0 opencv-contrib-python==3.4.5.20 tqdm==4.29.1