pytorch · Dan-Flores · Aug 27, 2025 · Aug 20, 2025 · Aug 20, 2025 · Aug 20, 2025
diff --git a/.github/workflows/reference_resources.yaml b/.github/workflows/reference_resources.yaml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
   pull_request:
     paths:
-      - test/generate_reference_resources.sh
+      - test/generate_reference_resources.py
       - .github/workflows/reference_resources.yaml # self reference
   schedule:
     - cron: '0 0 * * 0'  # on sunday
@@ -38,7 +38,7 @@ jobs:
       - name: Update pip
         run: python -m pip install --upgrade pip
 
-      - name: Instal generation dependencies
+      - name: Install generation dependencies
         run: |
           # Note that we're installing stable - this is for running a script where we're a normal PyTorch
           # user, not for building TorhCodec.
@@ -50,4 +50,4 @@ jobs:
 
       - name: Run generation reference resources
         run: |
-          test/generate_reference_resources.sh
+          python test/generate_reference_resources.py
diff --git a/test/convert_image_to_tensor.py b/test/convert_image_to_tensor.py
diff --git a/test/generate_reference_resources.py b/test/generate_reference_resources.py
@@ -0,0 +1,123 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import subprocess
+from pathlib import Path
+
+import numpy as np
+
+import torch
+from PIL import Image
+
+# Run this script to update the resources used in unit tests. The resources are all derived
+# from source media already checked into the repo.
+
+
+def convert_image_to_tensor(image_path):
+    image_path = Path(image_path)
+    if not image_path.exists():
+        return
+    # Get base filename without extension
+    base_filename = image_path.with_suffix("")
+    pil_image = Image.open(image_path)
+    img_tensor = torch.from_numpy(np.asarray(pil_image))
+    # Save tensor to disk
+    torch.save(
+        img_tensor, str(base_filename) + ".pt", _use_new_zipfile_serialization=True
+    )
+    image_path.unlink()
+
+
+def get_frame_by_index(video_path, frame, output_path, stream):
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-i",
+        video_path,
+        "-map",
+        f"0:{stream}",
+        "-vf",
+        f"select=eq(n\\,{frame})",
+        "-vsync",
+        "vfr",
+        "-q:v",
+        "2",
+        output_path,
+    ]
+    subprocess.run(cmd, check=True)
+
+
+def get_frame_by_timestamp(video_path, timestamp, output_path):
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-ss",
+        str(timestamp),
+        "-i",
+        video_path,
+        "-frames:v",
+        "1",
+        output_path,
+    ]
+    subprocess.run(cmd, check=True)
+
+
+def main():
+    SCRIPT_DIR = Path(__file__).resolve().parent
+    TORCHCODEC_PATH = SCRIPT_DIR.parent
+    RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources"
+    VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4"
+
+    # Last generated with ffmpeg version 4.3
+    #
+    # Note: The naming scheme used here must match the naming scheme used to load
+    # tensors in ./utils.py.
+    STREAMS = [0, 3]
+    FRAMES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 25, 30, 35, 386, 387, 388, 389]
+    for stream in STREAMS:
+        for frame in FRAMES:
+            # Note that we are using 0-based index naming. Asking ffmpeg to number output
+            # frames would result in 1-based index naming. We enforce 0-based index naming
+            # so that the name of reference frames matches the index when accessing that
+            # frame in the Python decoder.
+            output_bmp = f"{VIDEO_PATH}.stream{stream}.frame{frame:06d}.bmp"
+            get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=stream)
+            convert_image_to_tensor(output_bmp)
+
+    # Extract individual frames at specific timestamps, including the last frame of the video.
+    seek_timestamp = [6.0, 6.1, 10.0, 12.979633]
+    timestamp_name = [f"{seek_timestamp:06f}" for seek_timestamp in seek_timestamp]
+    for timestamp, name in zip(seek_timestamp, timestamp_name):
+        output_bmp = f"{VIDEO_PATH}.time{name}.bmp"
+        get_frame_by_timestamp(VIDEO_PATH, timestamp, output_bmp)
+        convert_image_to_tensor(output_bmp)
+
+    # This video was generated by running the following:
+    # conda install -c conda-forge x265
+    # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265  --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
+    # ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
+    # Note that this video only has 1 stream, at index 0.
+    VIDEO_PATH = RESOURCES_DIR / "h265_video.mp4"
+    FRAMES = [5]
+    for frame in FRAMES:
+        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
+        get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
+        convert_image_to_tensor(output_bmp)
+
+    # This video was generated by running the following:
+    # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv
+    # Note that this video only has 1 stream, at index 0.
+    VIDEO_PATH = RESOURCES_DIR / "av1_video.mkv"
+    FRAMES = [10]
+
+    for frame in FRAMES:
+        output_bmp = f"{VIDEO_PATH}.stream0.frame{frame:06d}.bmp"
+        get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0)
+        convert_image_to_tensor(output_bmp)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/generate_reference_resources.sh b/test/generate_reference_resources.sh
diff --git a/test/utils.py b/test/utils.py
@@ -517,6 +517,8 @@ def sample_format(self) -> str:
         return self.stream_infos[self.default_stream_index].sample_format
 
 
+# This file was generated with:
+# ffmpeg -y -i test/resources/nasa_13013.mp4 -b:a 192K -vn test/resources/nasa_13013.mp4.audio.mp3"
 NASA_AUDIO_MP3 = TestAudio(
     filename="nasa_13013.mp4.audio.mp3",
     default_stream_index=0,