diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..0d38c0934 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.bak +*.h5 +__pycache__/* diff --git a/README.md b/README.md index d06b7f498..ee076b301 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,51 @@ This repo contains the implementation of YOLOv2 in Keras with Tensorflow backend - [ ] Multiscale training - [ ] mAP Evaluation + +## Kabrau modifications: + +### Evaluation + +Validation and test evaluate, by classes, mAP, FPS, and PR CURVE image +- Important: Clone https://github.com/kabrau/mean_average_precision + +`python evaluate.py -c confg.json -w /path/to/best_weights.h5 -i /path/validation_images/ -a /path/validation_pascal_xml -j /path/test_images/ -t /path/test_pascal_xml` + +Example + +`python evaluate.py -c E:/rodney/configs/keras-yolo2/config.Inception3.json -w E:/rodney/weights/Inception3_V2c.h5 -i E:/rodney/DSDFinal/images/ -a E:/rodney/DSDFinal/split/validation/ -j E:/rodney/DSDFinal/images/ -t E:/rodney/DSDFinal/split/test/` + +Result example + +``` +Valid Evaluate +ascending_stair 0.8334 +descending_stair 0.6084 +door 0.8277 +elevator_door 0.9268 +Valid mAP: 0.7991 + +Total Images: 546 +Elapse Time: 11.0589 +Avg Image Time: 0.0203 +FPS: 49 + +Test Evaluate +ascending_stair 0.7365 +descending_stair 0.6934 +door 0.7482 +elevator_door 0.9024 +Test mAP: 0.7701 + +Total Images: 535 +Elapse Time: 9.8113 +Avg Image Time: 0.0183 +Desvio Padrão: 0.0059 +FPS: 54 +``` + + + ## Some example applications (click for videos): ### Raccon detection diff --git a/evaluate.py b/evaluate.py new file mode 100644 index 000000000..fe3129fb2 --- /dev/null +++ b/evaluate.py @@ -0,0 +1,112 @@ +#! /usr/bin/env python + +import argparse +import os +import numpy as np +from preprocessing import parse_annotation +from frontend_evaluate import YOLO +import json + +os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"]="0" + +argparser = argparse.ArgumentParser( + description='Train and validate YOLO_v2 model on any dataset') + +argparser.add_argument( + '-c', + '--conf', + help='path to configuration file') + +argparser.add_argument( + '-w', + '--weights', + help='path to pretrained weights') + +argparser.add_argument( + '-i', + '--inputVal', + help='val image folder') + +argparser.add_argument( + '-a', + '--annotVal', + help='val annotation folder') + +argparser.add_argument( + '-j', + '--inputTest', + help='test image folder') + +argparser.add_argument( + '-t', + '--annotTest', + help='test annotation folder') + +def _main_(args): + config_path = args.conf + + with open(config_path) as config_buffer: + config = json.loads(config_buffer.read()) + + ############################### + # Parse the annotations + ############################### + + valid_imgs, valid_labels = parse_annotation(args.annotVal, + args.inputVal, + config['model']['labels']) + + test_imgs, test_labels = parse_annotation(args.annotTest, + args.inputTest, + config['model']['labels']) + + + if len(config['model']['labels']) > 0: + overlap_labels = set(config['model']['labels']).intersection(set(valid_labels.keys())) + + print('Seen labels:\t', valid_labels) + print('Given labels:\t', config['model']['labels']) + print('Overlap labels:\t', overlap_labels) + + if len(overlap_labels) < len(config['model']['labels']): + print('Some labels have no annotations! Please revise the list of labels in the config.json file!') + return + else: + print('No labels are provided. Train on all seen labels.') + config['model']['labels'] = valid_labels.keys() + + ############################### + # Construct the model + ############################### + + yolo = YOLO(backend = config['model']['backend'], + input_size = config['model']['input_size'], + labels = config['model']['labels'], + max_box_per_image = config['model']['max_box_per_image'], + anchors = config['model']['anchors']) + + ############################### + # Load the pretrained weights (if any) + ############################### + + print("Loading pre-trained weights in", args.weights) + yolo.load_weights(args.weights) + + ############################### + # Start the training process + ############################### + + yolo.eval(valid_imgs = valid_imgs, + test_imgs = test_imgs, + learning_rate = config['train']['learning_rate'], + batch_size = config['train']['batch_size'], + object_scale = config['train']['object_scale'], + no_object_scale = config['train']['no_object_scale'], + coord_scale = config['train']['coord_scale'], + class_scale = config['train']['class_scale'], + debug = config['train']['debug']) + +if __name__ == '__main__': + args = argparser.parse_args() + _main_(args) diff --git a/frontend.py b/frontend.py index 85047174c..909e7024a 100644 --- a/frontend.py +++ b/frontend.py @@ -315,7 +315,7 @@ def train(self, train_imgs, # the list of images to train the model save_best_only=True, mode='min', period=1) - tensorboard = TensorBoard(log_dir=os.path.expanduser('~/logs/'), + tensorboard = TensorBoard(log_dir=os.path.expanduser('./logs/'), histogram_freq=0, #write_batch_performance=True, write_graph=True, diff --git a/frontend_evaluate.py b/frontend_evaluate.py new file mode 100644 index 000000000..3cd139d6d --- /dev/null +++ b/frontend_evaluate.py @@ -0,0 +1,531 @@ +import os +import sys + +sys.path.insert(0, '../mean_average_precision') +from mean_average_precision.detection_map import DetectionMAP +import matplotlib.pyplot as plt + +from keras.models import Model +from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda +from keras.layers.advanced_activations import LeakyReLU +import tensorflow as tf +import numpy as np +import cv2 +from utils import decode_netout, compute_overlap, compute_ap +from keras.applications.mobilenet import MobileNet +from keras.layers.merge import concatenate +from keras.optimizers import SGD, Adam, RMSprop +from preprocessing import BatchGenerator +from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard +from backend import TinyYoloFeature, FullYoloFeature, MobileNetFeature, SqueezeNetFeature, Inception3Feature, VGG16Feature, ResNet50Feature +import time + +import statistics + +class YOLO(object): + def __init__(self, backend, + input_size, + labels, + max_box_per_image, + anchors): + + self.input_size = input_size + + self.labels = list(labels) + self.nb_class = len(self.labels) + self.nb_box = len(anchors)//2 + self.class_wt = np.ones(self.nb_class, dtype='float32') + self.anchors = anchors + + self.max_box_per_image = max_box_per_image + + ########################## + # Make the model + ########################## + + # make the feature extractor layers + input_image = Input(shape=(self.input_size, self.input_size, 3)) + self.true_boxes = Input(shape=(1, 1, 1, max_box_per_image , 4)) + + if backend == 'Inception3': + self.feature_extractor = Inception3Feature(self.input_size) + elif backend == 'SqueezeNet': + self.feature_extractor = SqueezeNetFeature(self.input_size) + elif backend == 'MobileNet': + self.feature_extractor = MobileNetFeature(self.input_size) + elif backend == 'Full Yolo': + self.feature_extractor = FullYoloFeature(self.input_size) + elif backend == 'Tiny Yolo': + self.feature_extractor = TinyYoloFeature(self.input_size) + elif backend == 'VGG16': + self.feature_extractor = VGG16Feature(self.input_size) + elif backend == 'ResNet50': + self.feature_extractor = ResNet50Feature(self.input_size) + else: + raise Exception('Architecture not supported! Only support Full Yolo, Tiny Yolo, MobileNet, SqueezeNet, VGG16, ResNet50, and Inception3 at the moment!') + + print(self.feature_extractor.get_output_shape()) + self.grid_h, self.grid_w = self.feature_extractor.get_output_shape() + features = self.feature_extractor.extract(input_image) + + # make the object detection layer + output = Conv2D(self.nb_box * (4 + 1 + self.nb_class), + (1,1), strides=(1,1), + padding='same', + name='DetectionLayer', + kernel_initializer='lecun_normal')(features) + output = Reshape((self.grid_h, self.grid_w, self.nb_box, 4 + 1 + self.nb_class))(output) + output = Lambda(lambda args: args[0])([output, self.true_boxes]) + + self.model = Model([input_image, self.true_boxes], output) + + + # initialize the weights of the detection layer + layer = self.model.layers[-4] + weights = layer.get_weights() + + new_kernel = np.random.normal(size=weights[0].shape)/(self.grid_h*self.grid_w) + new_bias = np.random.normal(size=weights[1].shape)/(self.grid_h*self.grid_w) + + layer.set_weights([new_kernel, new_bias]) + + # print a summary of the whole model + self.model.summary() + + def custom_loss(self, y_true, y_pred): + mask_shape = tf.shape(y_true)[:4] + + cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(self.grid_w), [self.grid_h]), (1, self.grid_h, self.grid_w, 1, 1))) + cell_y = tf.transpose(cell_x, (0,2,1,3,4)) + + cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [self.batch_size, 1, 1, self.nb_box, 1]) + + coord_mask = tf.zeros(mask_shape) + conf_mask = tf.zeros(mask_shape) + class_mask = tf.zeros(mask_shape) + + seen = tf.Variable(0.) + total_recall = tf.Variable(0.) + + """ + Adjust prediction + """ + ### adjust x and y + pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid + + ### adjust w and h + pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(self.anchors, [1,1,1,self.nb_box,2]) + + ### adjust confidence + pred_box_conf = tf.sigmoid(y_pred[..., 4]) + + ### adjust class probabilities + pred_box_class = y_pred[..., 5:] + + """ + Adjust ground truth + """ + ### adjust x and y + true_box_xy = y_true[..., 0:2] # relative position to the containing cell + + ### adjust w and h + true_box_wh = y_true[..., 2:4] # number of cells accross, horizontally and vertically + + ### adjust confidence + true_wh_half = true_box_wh / 2. + true_mins = true_box_xy - true_wh_half + true_maxes = true_box_xy + true_wh_half + + pred_wh_half = pred_box_wh / 2. + pred_mins = pred_box_xy - pred_wh_half + pred_maxes = pred_box_xy + pred_wh_half + + intersect_mins = tf.maximum(pred_mins, true_mins) + intersect_maxes = tf.minimum(pred_maxes, true_maxes) + intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) + intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] + + true_areas = true_box_wh[..., 0] * true_box_wh[..., 1] + pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1] + + union_areas = pred_areas + true_areas - intersect_areas + iou_scores = tf.truediv(intersect_areas, union_areas) + + true_box_conf = iou_scores * y_true[..., 4] + + ### adjust class probabilities + true_box_class = tf.argmax(y_true[..., 5:], -1) + + """ + Determine the masks + """ + ### coordinate mask: simply the position of the ground truth boxes (the predictors) + coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * self.coord_scale + + ### confidence mask: penelize predictors + penalize boxes with low IOU + # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6 + true_xy = self.true_boxes[..., 0:2] + true_wh = self.true_boxes[..., 2:4] + + true_wh_half = true_wh / 2. + true_mins = true_xy - true_wh_half + true_maxes = true_xy + true_wh_half + + pred_xy = tf.expand_dims(pred_box_xy, 4) + pred_wh = tf.expand_dims(pred_box_wh, 4) + + pred_wh_half = pred_wh / 2. + pred_mins = pred_xy - pred_wh_half + pred_maxes = pred_xy + pred_wh_half + + intersect_mins = tf.maximum(pred_mins, true_mins) + intersect_maxes = tf.minimum(pred_maxes, true_maxes) + intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) + intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] + + true_areas = true_wh[..., 0] * true_wh[..., 1] + pred_areas = pred_wh[..., 0] * pred_wh[..., 1] + + union_areas = pred_areas + true_areas - intersect_areas + iou_scores = tf.truediv(intersect_areas, union_areas) + + best_ious = tf.reduce_max(iou_scores, axis=4) + conf_mask = conf_mask + tf.to_float(best_ious < 0.6) * (1 - y_true[..., 4]) * self.no_object_scale + + # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box + conf_mask = conf_mask + y_true[..., 4] * self.object_scale + + ### class mask: simply the position of the ground truth boxes (the predictors) + class_mask = y_true[..., 4] * tf.gather(self.class_wt, true_box_class) * self.class_scale + + """ + Warm-up training + """ + no_boxes_mask = tf.to_float(coord_mask < self.coord_scale/2.) + seen = tf.assign_add(seen, 1.) + + true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, self.warmup_batches+1), + lambda: [true_box_xy + (0.5 + cell_grid) * no_boxes_mask, + true_box_wh + tf.ones_like(true_box_wh) * \ + np.reshape(self.anchors, [1,1,1,self.nb_box,2]) * \ + no_boxes_mask, + tf.ones_like(coord_mask)], + lambda: [true_box_xy, + true_box_wh, + coord_mask]) + + """ + Finalize the loss + """ + nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0)) + nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0)) + nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0)) + + loss_xy = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy) * coord_mask) / (nb_coord_box + 1e-6) / 2. + loss_wh = tf.reduce_sum(tf.square(true_box_wh-pred_box_wh) * coord_mask) / (nb_coord_box + 1e-6) / 2. + loss_conf = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf) * conf_mask) / (nb_conf_box + 1e-6) / 2. + loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class) + loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6) + + loss = tf.cond(tf.less(seen, self.warmup_batches+1), + lambda: loss_xy + loss_wh + loss_conf + loss_class + 10, + lambda: loss_xy + loss_wh + loss_conf + loss_class) + + if self.debug: + nb_true_box = tf.reduce_sum(y_true[..., 4]) + nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5) * tf.to_float(pred_box_conf > 0.3)) + + current_recall = nb_pred_box/(nb_true_box + 1e-6) + total_recall = tf.assign_add(total_recall, current_recall) + + loss = tf.Print(loss, [loss_xy], message='Loss XY \t', summarize=1000) + loss = tf.Print(loss, [loss_wh], message='Loss WH \t', summarize=1000) + loss = tf.Print(loss, [loss_conf], message='Loss Conf \t', summarize=1000) + loss = tf.Print(loss, [loss_class], message='Loss Class \t', summarize=1000) + loss = tf.Print(loss, [loss], message='Total Loss \t', summarize=1000) + loss = tf.Print(loss, [current_recall], message='Current Recall \t', summarize=1000) + loss = tf.Print(loss, [total_recall/seen], message='Average Recall \t', summarize=1000) + + return loss + + def load_weights(self, weight_path): + self.model.load_weights(weight_path) + + def eval(self, valid_imgs, # the list of images to train the model + test_imgs, # the list of images used to validate the model + learning_rate, # the learning rate + batch_size, # the size of the batch + object_scale, + no_object_scale, + coord_scale, + class_scale, + debug=False): + + self.batch_size = batch_size + + self.object_scale = object_scale + self.no_object_scale = no_object_scale + self.coord_scale = coord_scale + self.class_scale = class_scale + + self.debug = debug + + ############################################ + # Make train and validation generators + ############################################ + + generator_config = { + 'IMAGE_H' : self.input_size, + 'IMAGE_W' : self.input_size, + 'GRID_H' : self.grid_h, + 'GRID_W' : self.grid_w, + 'BOX' : self.nb_box, + 'LABELS' : self.labels, + 'CLASS' : len(self.labels), + 'ANCHORS' : self.anchors, + 'BATCH_SIZE' : self.batch_size, + 'TRUE_BOX_BUFFER' : self.max_box_per_image, + } + + valid_generator = BatchGenerator(valid_imgs, + generator_config, + norm=self.feature_extractor.normalize, + jitter=False) + + test_generator = BatchGenerator(test_imgs, + generator_config, + norm=self.feature_extractor.normalize, + jitter=False) + + self.warmup_batches = 0 + + ############################################ + # Compile the model + ############################################ + + optimizer = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) + self.model.compile(loss=self.custom_loss, optimizer=optimizer) + + ############################################ + # Make a few callbacks + ############################################ + + # early_stop = EarlyStopping(monitor='val_loss', + # min_delta=0.0001, + # patience=5, + # mode='min', + # verbose=1) + # checkpoint = ModelCheckpoint(saved_weights_name, + # monitor='val_loss', + # verbose=1, + # save_best_only=True, + # mode='min', + # period=1) + # tensorboard = TensorBoard(log_dir=os.path.expanduser('./logs/'), + # histogram_freq=0, + # #write_batch_performance=True, + # write_graph=True, + # write_images=False) + + ############################################ + # Start the training process + ############################################ + + # self.model.fit_generator(generator = train_generator, + # steps_per_epoch = len(train_generator) * train_times, + # epochs = warmup_epochs + nb_epochs, + # verbose = 2 if debug else 1, + # validation_data = valid_generator, + # validation_steps = len(valid_generator) * valid_times, + # callbacks = [early_stop, checkpoint, tensorboard], + # workers = 3, + # max_queue_size = 8) + + ############################################ + # Compute mAP on the validation set + ############################################ + print() + print('Valid Evaluate') + average_precisions, timeHistory = self.evaluate(valid_generator) + + # print evaluation + for label, average_precision in average_precisions.items(): + print(self.labels[label], '{:.4f}'.format(average_precision)) + print('Valid mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions))) + + print() + print('Total Images: ', len(timeHistory) ) + print('Elapse Time: {:.4f}'.format( sum(timeHistory) ) ) + print('Avg Image Time: {:.4f}'.format( sum(timeHistory)/len(timeHistory) ) ) + print('FPS: ', int(1/(sum(timeHistory)/len(timeHistory))) ) + + print() + print('Test Evaluate') + average_precisions, timeHistory = self.evaluate(test_generator) + + # print evaluation + for label, average_precision in average_precisions.items(): + print(self.labels[label], '{:.4f}'.format(average_precision)) + print('Test mAP: {:.4f}'.format(sum(average_precisions.values()) / len(average_precisions))) + + timeHistory = timeHistory[1:] + + print() + print('Total Images: ', len(timeHistory) ) + print('Elapse Time: {:.4f}'.format( sum(timeHistory) ) ) + print('Avg Image Time: {:.4f}'.format( statistics.mean(timeHistory) ) ) + print('Desvio Padrão: {:.4f}'.format( statistics.stdev(timeHistory) ) ) + print('FPS: ', int(1/(sum(timeHistory)/len(timeHistory))) ) + + #print(timeHistory) + + + def evaluate(self, + generator, + iou_threshold=0.3, + score_threshold=0.3, + max_detections=100, + save_path=None): + """ Evaluate a given dataset using a given model. + code originally from https://github.com/fizyr/keras-retinanet + + # Arguments + generator : The generator that represents the dataset to evaluate. + model : The model to evaluate. + iou_threshold : The threshold used to consider when a detection is positive or negative. + score_threshold : The score confidence threshold to use for detections. + max_detections : The maximum number of detections to use per image. + save_path : The path to save images with visualized detections to. + # Returns + A dict mapping class names to mAP scores. + """ + # gather all detections and annotations + all_detections = [[None for i in range(generator.num_classes())] for j in range(generator.size())] + all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())] + + mAP = DetectionMAP(4,100,0.3) # Initialise metric + + timeHistory = [] + for i in range(generator.size()): + raw_image = generator.load_image(i) + raw_height, raw_width, raw_channels = raw_image.shape + + # make the boxes and the labels + start = time.time() + pred_boxes = self.predict(raw_image) + timeHistory.append(time.time() - start) + + score = np.array([box.score for box in pred_boxes]) + pred_labels = np.array([box.label for box in pred_boxes]) + + if len(pred_boxes) > 0: + pred_boxes = np.array([[box.xmin*raw_width, box.ymin*raw_height, box.xmax*raw_width, box.ymax*raw_height, box.score] for box in pred_boxes]) + else: + pred_boxes = np.array([[]]) + + # sort the boxes and the labels according to scores + score_sort = np.argsort(-score) + pred_labels = pred_labels[score_sort] + pred_boxes = pred_boxes[score_sort] + + # copy detections to all_detections + for label in range(generator.num_classes()): + all_detections[i][label] = pred_boxes[pred_labels == label, :] + + annotations = generator.load_annotation(i) + + # copy detections to all_annotations + for label in range(generator.num_classes()): + all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy() + + + # #=================================================== + # # INICIO PR + + # print(pred_boxes[:,0:4]) + # print(pred_labels) + # print(score) + # print(annotations[:,0:4]) + # print(annotations[:,4]) + + mAP.evaluate(pred_boxes[:,0:4], pred_labels, score, annotations[:,0:4], annotations[:,4]) + + # # FINAL PR + # #=================================================== + + + # compute mAP by comparing all detections and all annotations + average_precisions = {} + + for label in range(generator.num_classes()): + false_positives = np.zeros((0,)) + true_positives = np.zeros((0,)) + scores = np.zeros((0,)) + num_annotations = 0.0 + + for i in range(generator.size()): + detections = all_detections[i][label] + annotations = all_annotations[i][label] + num_annotations += annotations.shape[0] + detected_annotations = [] + + for d in detections: + scores = np.append(scores, d[4]) + + if annotations.shape[0] == 0: + false_positives = np.append(false_positives, 1) + true_positives = np.append(true_positives, 0) + continue + + overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations) + assigned_annotation = np.argmax(overlaps, axis=1) + max_overlap = overlaps[0, assigned_annotation] + + if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations: + false_positives = np.append(false_positives, 0) + true_positives = np.append(true_positives, 1) + detected_annotations.append(assigned_annotation) + else: + false_positives = np.append(false_positives, 1) + true_positives = np.append(true_positives, 0) + + # no annotations -> AP for this class is 0 (is this correct?) + if num_annotations == 0: + average_precisions[label] = 0 + continue + + # sort by score + indices = np.argsort(-scores) + false_positives = false_positives[indices] + true_positives = true_positives[indices] + + # compute false positives and true positives + false_positives = np.cumsum(false_positives) + true_positives = np.cumsum(true_positives) + + # compute recall and precision + recall = true_positives / num_annotations + precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) + + # compute average precision + average_precision = compute_ap(recall, precision) + average_precisions[label] = average_precision + + mAP.myPlot(self.labels) + plt.show() + + return average_precisions, timeHistory + + def predict(self, image): + image_h, image_w, _ = image.shape + image = cv2.resize(image, (self.input_size, self.input_size)) + image = self.feature_extractor.normalize(image) + + input_image = image[:,:,::-1] + input_image = np.expand_dims(input_image, 0) + dummy_array = np.zeros((1,1,1,1,self.max_box_per_image,4)) + + netout = self.model.predict([input_image, dummy_array])[0] + boxes = decode_netout(netout, self.anchors, self.nb_class) + + return boxes \ No newline at end of file diff --git a/predict.py b/predict.py index 544e29ea6..10c84147c 100644 --- a/predict.py +++ b/predict.py @@ -9,6 +9,7 @@ from utils import draw_boxes from frontend import YOLO import json +import xml.etree.ElementTree as ET os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]="0" @@ -31,6 +32,12 @@ '--input', help='path to an image or an video (mp4 format)') +argparser.add_argument( + '-a', + '--annotFile', + help='annotation File') + + def _main_(args): config_path = args.conf weights_path = args.weights @@ -68,15 +75,18 @@ def _main_(args): frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) video_writer = cv2.VideoWriter(video_out, - cv2.VideoWriter_fourcc(*'MPEG'), - 50.0, - (frame_w, frame_h)) + cv2.VideoWriter_fourcc(*'mp4v'), + 30.0, + (frame_h, frame_w),True) #(frame_w, frame_h) # Virando video for i in tqdm(range(nb_frames)): _, image = video_reader.read() + image = np.rot90(image,3) + image = image.copy() # Fix Bug np.rot90 boxes = yolo.predict(image) - image = draw_boxes(image, boxes, config['model']['labels']) + #image = draw_boxes(image, boxes, config['model']['labels'], 20, 3.5, -90) + image = draw_boxes(image, boxes, config['model']['labels'], 2, 1.1, -30) video_writer.write(np.uint8(image)) @@ -84,8 +94,36 @@ def _main_(args): video_writer.release() else: image = cv2.imread(image_path) + + if (args.annotFile != None): + boxes_ann = [] + tree = ET.parse(args.annotFile) + for elem in tree.iter(): + if 'object' in elem.tag or 'part' in elem.tag: + obj = {} + + for attr in list(elem): + if 'name' in attr.tag: + obj['name'] = attr.text + + boxes_ann.append(obj) + + if 'bndbox' in attr.tag: + for dim in list(attr): + if 'xmin' in dim.tag: + obj['xmin'] = int(round(float(dim.text))) + if 'ymin' in dim.tag: + obj['ymin'] = int(round(float(dim.text))) + if 'xmax' in dim.tag: + obj['xmax'] = int(round(float(dim.text))) + if 'ymax' in dim.tag: + obj['ymax'] = int(round(float(dim.text))) + + for box in boxes_ann: + cv2.rectangle(image, (box['xmin'],box['ymin']), (box['xmax'],box['ymax']), (255,0,0), 30) + boxes = yolo.predict(image) - image = draw_boxes(image, boxes, config['model']['labels']) + image = draw_boxes(image, boxes, config['model']['labels'], 30, 4.5, 35) print(len(boxes), 'boxes are found') diff --git a/utils.py b/utils.py index 81116aa71..cd7759901 100644 --- a/utils.py +++ b/utils.py @@ -4,6 +4,7 @@ import tensorflow as tf import copy import cv2 +from scipy.special import expit class BoundBox: def __init__(self, xmin, ymin, xmax, ymax, c = None, classes = None): @@ -55,23 +56,45 @@ def bbox_iou(box1, box2): return float(intersect) / union -def draw_boxes(image, boxes, labels): +def draw_boxes(image, boxes, labels, thickness=3, fonte=None, position=13): image_h, image_w, _ = image.shape + colors = [] + colors.append((0,0,255)) + colors.append((0,255,0)) + colors.append((255,0,0)) + colors.append((0,255,255)) + colors.append((255,0,255)) + colors.append((255,255,0)) + for box in boxes: + # if box.get_label()==1: + # continue + + cor = colors[box.get_label()] + xmin = int(box.xmin*image_w) ymin = int(box.ymin*image_h) xmax = int(box.xmax*image_w) ymax = int(box.ymax*image_h) - cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (0,255,0), 3) + if fonte==None: + fonte = int(1e-3 * image_h) + + cv2.rectangle(image, (xmin,ymin), (xmax,ymax), cor, thickness) cv2.putText(image, - labels[box.get_label()] + ' ' + str(box.get_score()), - (xmin, ymin - 13), + labels[box.get_label()] , + (xmin, ymin - position), cv2.FONT_HERSHEY_SIMPLEX, - 1e-3 * image_h, - (0,255,0), 2) - + fonte, + cor, int(thickness)) + cv2.putText(image, + str(box.get_score()), + (xmin, ymin - (position*2)), + cv2.FONT_HERSHEY_SIMPLEX, + fonte, + cor, int(thickness)) + return image def decode_netout(netout, anchors, nb_class, obj_threshold=0.3, nms_threshold=0.3): @@ -195,7 +218,8 @@ def _interval_overlap(interval_a, interval_b): return min(x2,x4) - x3 def _sigmoid(x): - return 1. / (1. + np.exp(-x)) + return expit(x) + #return 1. / (1. + np.exp(-x)) def _softmax(x, axis=-1, t=-100.): x = x - np.max(x)