Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/reference_resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
workflow_dispatch:
pull_request:
paths:
- test/generate_reference_resources.sh
- test/generate_reference_resources.py
- .github/workflows/reference_resources.yaml # self reference
schedule:
- cron: '0 0 * * 0' # on sunday
Expand Down Expand Up @@ -38,7 +38,7 @@ jobs:
- name: Update pip
run: python -m pip install --upgrade pip

- name: Instal generation dependencies
- name: Install generation dependencies
run: |
# Note that we're installing stable - this is for running a script where we're a normal PyTorch
# user, not for building TorhCodec.
Expand All @@ -50,4 +50,4 @@ jobs:

- name: Run generation reference resources
run: |
test/generate_reference_resources.sh
python test/generate_reference_resources.py
24 changes: 0 additions & 24 deletions test/convert_image_to_tensor.py

This file was deleted.

123 changes: 123 additions & 0 deletions test/generate_reference_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import subprocess
from pathlib import Path

import numpy as np

import torch
from PIL import Image

# Run this script to update the resources used in unit tests. The resources are all derived
# from source media already checked into the repo.


def convert_image_to_tensor(image_path):
image_path = Path(image_path)
if not image_path.exists():
return
# Get base filename without extension
base_filename = image_path.with_suffix("")
pil_image = Image.open(image_path)
img_tensor = torch.from_numpy(np.asarray(pil_image))
# Save tensor to disk
torch.save(
img_tensor, str(base_filename) + ".pt", _use_new_zipfile_serialization=True
)
image_path.unlink()


def get_frame_by_index(video_path, frame, output_path, stream):
cmd = [
"ffmpeg",
"-y",
"-i",
video_path,
"-map",
f"0:{stream}",
"-vf",
f"select=eq(n\\,{frame})",
"-vsync",
"vfr",
"-q:v",
"2",
output_path,
]
subprocess.run(cmd, check=True)


def get_frame_by_timestamp(video_path, timestamp, output_path):
cmd = [
"ffmpeg",
"-y",
"-ss",
str(timestamp),
"-i",
video_path,
"-frames:v",
"1",
output_path,
]
subprocess.run(cmd, check=True)


def main():
SCRIPT_DIR = Path(__file__).resolve().parent
TORCHCODEC_PATH = SCRIPT_DIR.parent
RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources"
VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4"

# Last generated with ffmpeg version 4.3
#
# Note: The naming scheme used here must match the naming scheme used to load
# tensors in ./utils.py.
STREAMS = [0, 3]
FRAMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30, 35, 386, 387, 388, 389]
for stream in STREAMS:
for frame in FRAMES:
# Note that we are using 0-based index naming. Asking ffmpeg to number output
# frames would result in 1-based index naming. We enforce 0-based index naming
# so that the name of reference frames matches the index when accessing that
# frame in the Python decoder.
output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp"
get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=stream)
convert_image_to_tensor(output_bmp)

# Extract individual frames at specific timestamps, including the last frame of the video.
seek_timestamp = [6.0, 6.1, 10.0, 12.979633]
timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp]
for timestamp, name in zip(seek_timestamp, timestamp_name):
output_bmp = f"{VIDEO_PATH}.time{name}.bmp"
get_frame_by_timestamp(VIDEO_PATH, timestamp, output_bmp)
convert_image_to_tensor(output_bmp)

# This video was generated by running the following:
# conda install -c conda-forge x265
# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
# Note that this video only has 1 stream, at index 0.
VIDEO_PATH = RESOURCES_DIR / "h265_video.mp4"
FRAMES = [5]
for frame in FRAMES:
output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
convert_image_to_tensor(output_bmp)

# This video was generated by running the following:
# ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
# Note that this video only has 1 stream, at index 0.
VIDEO_PATH = RESOURCES_DIR / "av1_video.mkv"
FRAMES = [10]

for frame in FRAMES:
output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
convert_image_to_tensor(output_bmp)


if __name__ == "__main__":
main()
79 changes: 0 additions & 79 deletions test/generate_reference_resources.sh

This file was deleted.

2 changes: 2 additions & 0 deletions test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,8 @@ def sample_format(self) -> str:
return self.stream_infos[self.default_stream_index].sample_format


# This file was generated with:
# ffmpeg -y -i test/resources/nasa_13013.mp4 -b:a 192K -vn test/resources/nasa_13013.mp4.audio.mp3"
NASA_AUDIO_MP3 = TestAudio(
filename="nasa_13013.mp4.audio.mp3",
default_stream_index=0,
Expand Down
Loading