galliot-us · mrn-mln · Sep 25, 2021 · Sep 27, 2021 · Oct 2, 2021 · Oct 2, 2021
diff --git a/inference/inference.py b/inference/inference.py
@@ -40,7 +40,7 @@ def inference(args):
                                           batch_size=batch_size,
                                           pose_model_path=pose_model_path)
     else:
-        raise ValueError("device should be 'x86' or 'jetson-tx2' but you provided {0}".format(device))
+        raise ValueError("device should be 'x86' or 'jetson' but you provided {0}".format(device))
     video_uri = args.input_video
     if not os.path.isfile(video_uri):
         raise FileNotFoundError('video file does not exist under: {}'.format(video_uri))

diff --git a/models/jetson_pose_estimator.py b/models/jetson_pose_estimator.py
@@ -6,9 +6,9 @@
 from adaptive_object_detection.detectors.jetson_detector import JetsonDetector
 from .base_pose_estimator import BasePoseEstimator
 from tools.convert_results_format import prepare_detection_results
-from tools.bbox import box_to_center_scale, center_scale_to_box
+from tools.bbox import vectorized_boxes_to_centers_scales, vectorized_centers_scales_to_boxes
 from tools.pose_nms import pose_nms
-from tools.transformations import get_affine_transform, get_max_pred
+from tools.transformations import get_affine_transform, vectorized_affine_transform, get_max_pred
 import numpy as np
 import cv2
 
@@ -136,6 +136,7 @@ def transform_detections(self, image, dets):
         dets = dets[dets[:, 0] == 0]
         boxes = dets[:, 1:5]
         scores = dets[:, 5:6]
+        rot = 0
         ids = np.zeros(scores.shape)
         inps = np.zeros([boxes.shape[0], int(input_size[0]), int(input_size[1]), 3])
         cropped_boxes = np.zeros([boxes.shape[0], 4])
@@ -144,22 +145,16 @@ def transform_detections(self, image, dets):
         image[..., 1] = image[..., 1] - 0.457
         image[..., 2] = image[..., 2] - 0.480
         aspect_ratio = input_size[1] / input_size[0]
-        for i, box in enumerate(boxes):
-            inps[i], cropped_box = self.transform_single_detection(image, box, input_size, aspect_ratio)
-            cropped_boxes[i] = np.float32(cropped_box)
+        centers, scales = vectorized_boxes_to_centers_scales(boxes, aspect_ratio)
+        cropped_boxes = vectorized_centers_scales_to_boxes(centers, scales)
+        dst, src = vectorized_affine_transform(centers, scales, rot, input_size)
+
+        for i, itm in enumerate(dst):
+            trans = cv2.getAffineTransform(np.float32(src[i,:,:]), np.float32(itm))
+            inps[i] = cv2.warpAffine(image, trans, (int(input_size[1]), int(input_size[0])), flags=cv2.INTER_LINEAR)
+
         return inps, cropped_boxes, boxes, scores, ids
 
-    @staticmethod
-    def transform_single_detection(image, bbox, input_size, aspect_ratio):
-        xmin, ymin, xmax, ymax = bbox
-        center, scale = box_to_center_scale(
-            xmin, ymin, xmax - xmin, ymax - ymin, aspect_ratio)
-
-        trans = get_affine_transform(center, scale, 0, [input_size[1], input_size[0]])
-        img = cv2.warpAffine(image, trans, (int(input_size[1]), int(input_size[0])), flags=cv2.INTER_LINEAR)
-        bbox = center_scale_to_box(center, scale)
-        return img, bbox
-
     def heatmap_to_coord(self, hms, bbox, hms_flip=None, **kwargs):
         if hms_flip is not None:
             hms = (hms + hms_flip) / 2

diff --git a/models/x86_pose_estimator.py b/models/x86_pose_estimator.py
@@ -9,10 +9,10 @@
 import cv2
 from adaptive_object_detection.detectors.x86_detector import X86Detector
 from tools.convert_results_format import prepare_detection_results
-from tools.bbox import box_to_center_scale, center_scale_to_box
+from tools.bbox import vectorized_boxes_to_centers_scales, vectorized_centers_scales_to_boxes
 from tools.transformations import get_affine_transform, im_to_tensor
 from tools.pose_nms import pose_nms
-from tools.transformations import get_affine_transform, get_max_pred
+from tools.transformations import get_affine_transform, get_max_pred, vectorized_affine_transform
 
 
 class X86PoseEstimator(BasePoseEstimator):
@@ -104,6 +104,7 @@ def transform_detections(self, image, dets):
         dets = dets[dets[:, 0] == 0]
         boxes = dets[:, 1:5]
         scores = dets[:, 5:6]
+        rot = 0
         ids = np.zeros(scores.shape)
         inps = np.zeros([boxes.shape[0], int(input_size[0]), int(input_size[1]), 3])
         cropped_boxes = np.zeros([boxes.shape[0], 4])
@@ -112,22 +113,15 @@ def transform_detections(self, image, dets):
         image[..., 1] = image[..., 1] - 0.457
         image[..., 2] = image[..., 2] - 0.480
         aspect_ratio = input_size[1] / input_size[0]
-        for i, box in enumerate(boxes):
-            inps[i], cropped_box = self.transform_single_detection(image, box, input_size, aspect_ratio)
-            cropped_boxes[i] = np.float32(cropped_box)
+        centers, scales = vectorized_boxes_to_centers_scales(boxes, aspect_ratio)
+        cropped_boxes = vectorized_centers_scales_to_boxes(centers, scales)
+        dst, src = vectorized_affine_transform(centers, scales, rot, input_size)
+        for i, itm in enumerate(dst):
+            trans = cv2.getAffineTransform(np.float32(src[i,:,:]), np.float32(itm))
+            inps[i] = cv2.warpAffine(image, trans, (int(input_size[1]), int(input_size[0])), flags=cv2.INTER_LINEAR)
         inps = im_to_tensor(inps)
         return inps, cropped_boxes, boxes, scores, ids
 
-    @staticmethod
-    def transform_single_detection(image, bbox, input_size, aspect_ratio):
-        xmin, ymin, xmax, ymax = bbox
-        center, scale = box_to_center_scale(
-            xmin, ymin, xmax - xmin, ymax - ymin, aspect_ratio)
-        trans = get_affine_transform(center, scale, 0, [input_size[1], input_size[0]])
-        img = cv2.warpAffine(image, trans, (int(input_size[1]), int(input_size[0])), flags=cv2.INTER_LINEAR)
-        bbox = center_scale_to_box(center, scale)
-        return img, bbox
-
     def heatmap_to_coord(self, hms, bbox, hms_flip=None, **kwargs):
         if hms_flip is not None:
             hms = (hms + hms_flip) / 2

diff --git a/tools/bbox.py b/tools/bbox.py
@@ -1,6 +1,33 @@
 import numpy as np
 
 
+def vectorized_boxes_to_centers_scales(boxes, aspect_ratio=1.0, scale_mult=1.25):
+   """The vectorized version of convert box coordinates to center and scale."""
+   centers = np.zeros((boxes.shape[0], 2), dtype=np.float32)
+   w = boxes[:,2] - boxes[:,0]
+   h = boxes[:,3] - boxes[:,1]
+   centers[:,0] = boxes[:, 0] + w * 0.5
+   centers[:,1] = boxes[:, 1] + h * 0.5
+   idx = np.where(np.array((w > aspect_ratio * h), dtype=int) > 0)
+   h[idx] = w[idx] / aspect_ratio
+   idx = np.where(np.array((w < aspect_ratio * h), dtype=int) > 0 )
+   w[idx] = h[idx] * aspect_ratio
+   scales = np.zeros((boxes.shape[0], 2), dtype=np.float32)
+   scales[:,0] = w
+   scales[:,1] = h
+   idx = np.where(centers[:,0] != -1)
+   scales[idx,:] = scales[idx,:] * scale_mult
+   return centers, scales
+
+def vectorized_centers_scales_to_boxes(centers, scales):
+    xmin = np.array(centers[:,0] - scales[:,0] * 0.5)
+    ymin = np.array(centers[:,1] - scales[:,1] * 0.5)
+    cropped_boxes = np.array([xmin, ymin,np.array(xmin + scales[:,0]),
+                np.array(ymin + scales[:,1])])
+    cropped_boxes = np.transpose(cropped_boxes)
+    return cropped_boxes
+
+
 def box_to_center_scale(x, y, w, h, aspect_ratio=1.0, scale_mult=1.25):
     """Convert box coordinates to center and scale.
     adapted from https://github.com/Microsoft/human-pose-estimation.pytorch

diff --git a/tools/transformations.py b/tools/transformations.py
@@ -2,6 +2,45 @@
 import cv2
 import tensorflow as tf
 
+def vectorized_affine_transform(centers, scales, rot, output_size):
+    scales_tmp = scales
+    src_w = scales_tmp[:,0]
+    dst_w = output_size[1]
+    dst_h = output_size[0]
+
+    rot_rad = np.pi * rot / 180
+    src_results = np.zeros(scales.shape, dtype=np.float32)
+    src_points = np.zeros(scales.shape, dtype=np.float32)
+    src_points[:,1] = src_w * -0.5
+    src_dir = vectorized_get_dir(src_points,scales, rot_rad)
+    dst_dir = np.zeros(scales.shape, dtype=np.float32)
+    dst_dir[:,1] = dst_w * -0.5
+    shift = np.zeros(scales.shape, dtype=np.float32)
+    src = np.zeros((scales.shape[0],3,2), dtype=np.float32)
+    dst = np.zeros((scales.shape[0],3,2), dtype=np.float32)
+    src[:,0,:] = centers + scales_tmp * shift
+    src[:,1,:] = centers + src_dir + scales_tmp * shift
+    dst[:,0,:] = [dst_w * 0.5, dst_h * 0.5]
+    dst[:,1,:] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+
+    src[:,2,:] = vectorized_get_3d_point(src[:,0,:], src[:,1,:])
+    dst[:,2,:] = vectorized_get_3d_point(dst[:,0,:], dst[:,1,:])
+
+    return dst, src
+
+def vectorized_get_3d_point(a, b):
+    direct = a - b
+    c = np.array([-direct[:,1], direct[:,0]])
+    c = np.transpose(c)
+    return b + c
+
+def vectorized_get_dir(src_points, scales, rot_rad):
+    """Rotate the point by `rot_rad` degree."""
+    sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+    src_results = np.zeros(scales.shape, dtype=np.float32)
+    src_results[:,0] = src_points[:,0] * cs - src_points[:,1] * sn
+    src_results[:,1] = src_points[:,0] * sn + src_points[:,1] * cs
+    return src_results
 
 def get_affine_transform(center,
                          scale,