Skip to content

Speed EstimationΒ #1343

@likith1908

Description

@likith1908

Search before asking

  • I have searched the Supervision issues and found no similar feature requests.

Question

Resource used : I have followed the steps mentioned in the youtube video : https://www.youtube.com/watch?v=uWP6UjDeZvY

I am using tokens on a treadmill and i want to do speed estimation, i am getting speed as 0.0 kmph since it's moving at very less speed and my source and target are mentioned below

SOURCE = np.array([[0, 0], [2868, 0], [2868, 1104], [0, 1104]])

TARGET_WIDTH = 0.758825
TARGET_HEIGHT = 0.29210

TARGET = np.array([
    [0, 0],
    [TARGET_WIDTH, 0],
    [TARGET_WIDTH, TARGET_HEIGHT],
    [0, TARGET_HEIGHT]
])

Video

link : https://drive.google.com/file/d/1z5QKJ9g7mCbz14N9NDoeso5YVskJwlig/view?usp=sharing

Since my video is of dimensions or resolution : 2868β€ŠΓ—β€Š1104
I have got my real life values as

My code

import argparse
import supervision as sv
import cv2
from ultralytics import YOLO
import numpy as np
from collections import defaultdict, deque

SOURCE = np.array([[0, 0], [2868, 0], [2868, 1104], [0, 1104]])

TARGET_WIDTH = 0.758825
TARGET_HEIGHT = 0.29210

TARGET = np.array([
    [0, 0],
    [TARGET_WIDTH, 0],
    [TARGET_WIDTH, TARGET_HEIGHT],
    [0, TARGET_HEIGHT]
])

class ViewTransformer:
    def __init__(self, source=np.ndarray, target=np.ndarray):
        source = source.astype(np.float32)
        target = target.astype(np.float32)
        self.m = cv2.getPerspectiveTransform(source, target)

    def transform_points(self, points: np.ndarray) -> np.ndarray:
        if points.size == 0:
            print("Warning: No points to transform.")
            return np.array([])
        reshaped_points = points.reshape(-1, 1, 2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshaped_points, self.m)
        return transformed_points.reshape(-1, 2)

def parse_arguments() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Speed Estimation using Ultralytics and Supervision"
    )
    parser.add_argument(
        "--source_video_path",
        required=False,
        default="/home/harvestedlabs/Desktop/Codes/test.mp4",
        help="Path to the source video file",
        type=str
    )
    return parser.parse_args()

def main():
    args = parse_arguments()
    video_info = sv.VideoInfo.from_video_path(args.source_video_path)
    model = YOLO("/home/harvestedlabs/Desktop/Codes/Likith/token.pt")
    byte_track = sv.ByteTrack(frame_rate=video_info.fps)
    thickness = sv.calculate_optimal_line_thickness(resolution_wh=video_info.resolution_wh)
    text_scale = sv.calculate_optimal_text_scale(resolution_wh=video_info.resolution_wh)
    bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=thickness)
    label_annotator = sv.LabelAnnotator(text_scale=text_scale, text_thickness=thickness)
    frame_generator = sv.get_video_frames_generator(args.source_video_path)
    polygon_zone = sv.PolygonZone(SOURCE, frame_resolution_wh=video_info.resolution_wh)
    view_transformer = ViewTransformer(SOURCE, TARGET)
    coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))

    for frame in frame_generator:
        result = model(frame)[0]  # Ensure using the correct method for prediction
        detections = sv.Detections.from_ultralytics(result)
        detections = detections[polygon_zone.trigger(detections)]
        detections = byte_track.update_with_detections(detections=detections)

        points = detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER)
        if points.size > 0:
            points = view_transformer.transform_points(points=points).astype(int)
        else:
            print("No points detected in the frame.")

        labels = []
        for tracker_id, [_, y] in zip(detections.tracker_id, points):
            coordinates[tracker_id].append(y)
            if len(coordinates[tracker_id]) < video_info.fps / 2:
                labels.append(f"#{tracker_id}")
            else:
                coordinates_start = coordinates[tracker_id][-1]
                coordinates_stop = coordinates[tracker_id][0]
                distance = abs(coordinates_start - coordinates_stop)
                time = len(coordinates[tracker_id]) / video_info.fps
                speed = (distance / time) * 3.6
                labels.append(f"#{tracker_id}, {float(speed)} kmph")

        annotated_frame = frame.copy()
        annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
        annotated_frame = sv.draw_polygon(annotated_frame, polygon=SOURCE, color=sv.Color.RED)
        annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)

        cv2.namedWindow("Annotated Frame", cv2.WINDOW_NORMAL)
        cv2.imshow("Annotated Frame", annotated_frame)

        if cv2.waitKey(1) == ord("q"):
            break

    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Can someone explain how to resolve this issue?

Additional

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    questionFurther information is requested

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions