Speed Estimation

### Search before asking

- [X] I have searched the Supervision [issues](https://github.com/roboflow/supervision/issues) and found no similar feature requests.


### Question

## Resource used : I have followed the steps mentioned in the youtube video : https://www.youtube.com/watch?v=uWP6UjDeZvY

I am using tokens on a treadmill and i want to do speed estimation, i am getting speed as 0.0 kmph since it's moving at very less speed and my source and target are mentioned below

```
SOURCE = np.array([[0, 0], [2868, 0], [2868, 1104], [0, 1104]])

TARGET_WIDTH = 0.758825
TARGET_HEIGHT = 0.29210

TARGET = np.array([
    [0, 0],
    [TARGET_WIDTH, 0],
    [TARGET_WIDTH, TARGET_HEIGHT],
    [0, TARGET_HEIGHT]
])
```

# Video 
link : https://drive.google.com/file/d/1z5QKJ9g7mCbz14N9NDoeso5YVskJwlig/view?usp=sharing

Since my video is of dimensions or resolution : 2868 × 1104
I have got my real life values as 
# My code

```
import argparse
import supervision as sv
import cv2
from ultralytics import YOLO
import numpy as np
from collections import defaultdict, deque

SOURCE = np.array([[0, 0], [2868, 0], [2868, 1104], [0, 1104]])

TARGET_WIDTH = 0.758825
TARGET_HEIGHT = 0.29210

TARGET = np.array([
    [0, 0],
    [TARGET_WIDTH, 0],
    [TARGET_WIDTH, TARGET_HEIGHT],
    [0, TARGET_HEIGHT]
])

class ViewTransformer:
    def __init__(self, source=np.ndarray, target=np.ndarray):
        source = source.astype(np.float32)
        target = target.astype(np.float32)
        self.m = cv2.getPerspectiveTransform(source, target)

    def transform_points(self, points: np.ndarray) -> np.ndarray:
        if points.size == 0:
            print("Warning: No points to transform.")
            return np.array([])
        reshaped_points = points.reshape(-1, 1, 2).astype(np.float32)
        transformed_points = cv2.perspectiveTransform(reshaped_points, self.m)
        return transformed_points.reshape(-1, 2)

def parse_arguments() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Speed Estimation using Ultralytics and Supervision"
    )
    parser.add_argument(
        "--source_video_path",
        required=False,
        default="/home/harvestedlabs/Desktop/Codes/test.mp4",
        help="Path to the source video file",
        type=str
    )
    return parser.parse_args()

def main():
    args = parse_arguments()
    video_info = sv.VideoInfo.from_video_path(args.source_video_path)
    model = YOLO("/home/harvestedlabs/Desktop/Codes/Likith/token.pt")
    byte_track = sv.ByteTrack(frame_rate=video_info.fps)
    thickness = sv.calculate_optimal_line_thickness(resolution_wh=video_info.resolution_wh)
    text_scale = sv.calculate_optimal_text_scale(resolution_wh=video_info.resolution_wh)
    bounding_box_annotator = sv.BoundingBoxAnnotator(thickness=thickness)
    label_annotator = sv.LabelAnnotator(text_scale=text_scale, text_thickness=thickness)
    frame_generator = sv.get_video_frames_generator(args.source_video_path)
    polygon_zone = sv.PolygonZone(SOURCE, frame_resolution_wh=video_info.resolution_wh)
    view_transformer = ViewTransformer(SOURCE, TARGET)
    coordinates = defaultdict(lambda: deque(maxlen=video_info.fps))

    for frame in frame_generator:
        result = model(frame)[0]  # Ensure using the correct method for prediction
        detections = sv.Detections.from_ultralytics(result)
        detections = detections[polygon_zone.trigger(detections)]
        detections = byte_track.update_with_detections(detections=detections)

        points = detections.get_anchors_coordinates(anchor=sv.Position.BOTTOM_CENTER)
        if points.size > 0:
            points = view_transformer.transform_points(points=points).astype(int)
        else:
            print("No points detected in the frame.")

        labels = []
        for tracker_id, [_, y] in zip(detections.tracker_id, points):
            coordinates[tracker_id].append(y)
            if len(coordinates[tracker_id]) < video_info.fps / 2:
                labels.append(f"#{tracker_id}")
            else:
                coordinates_start = coordinates[tracker_id][-1]
                coordinates_stop = coordinates[tracker_id][0]
                distance = abs(coordinates_start - coordinates_stop)
                time = len(coordinates[tracker_id]) / video_info.fps
                speed = (distance / time) * 3.6
                labels.append(f"#{tracker_id}, {float(speed)} kmph")

        annotated_frame = frame.copy()
        annotated_frame = bounding_box_annotator.annotate(scene=annotated_frame, detections=detections)
        annotated_frame = sv.draw_polygon(annotated_frame, polygon=SOURCE, color=sv.Color.RED)
        annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels)

        cv2.namedWindow("Annotated Frame", cv2.WINDOW_NORMAL)
        cv2.imshow("Annotated Frame", annotated_frame)

        if cv2.waitKey(1) == ord("q"):
            break

    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

```

### Can someone explain how to resolve this issue?

### Additional

_No response_

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Speed Estimation #1343

Search before asking

Question

Resource used : I have followed the steps mentioned in the youtube video : https://www.youtube.com/watch?v=uWP6UjDeZvY

Video

My code

Can someone explain how to resolve this issue?

Additional

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Speed Estimation #1343

Description

Search before asking

Question

Resource used : I have followed the steps mentioned in the youtube video : https://www.youtube.com/watch?v=uWP6UjDeZvY

Video

My code

Can someone explain how to resolve this issue?

Additional

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions