Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/pytorch.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
659af3c353e49b35c191cdd2dba3b3c79d0e6822
a79095985ccd3a645bd2fc31c5ae4fcf215ef518
2 changes: 1 addition & 1 deletion .ci/scripts/test_model_e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ if [ "$AUDIO_URL" != "" ]; then
elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then
conda install -y -c conda-forge "ffmpeg<8"
pip install datasets soundfile
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
pip install torchcodec==0.11.0.dev20260310 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
fi

Expand Down
2 changes: 1 addition & 1 deletion examples/models/moshi/mimi/install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
set -x

sudo apt install ffmpeg -y
pip install torchcodec==0.11.0.dev20260217 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
pip install torchcodec==0.11.0.dev20260310 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
pip install moshi==0.2.11
pip install bitsandbytes soundfile einops
# Run llama2/install requirements for torchao deps
Expand Down
35 changes: 35 additions & 0 deletions runtime/core/portable_type/c10/c10/util/complex_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,41 @@ C10_HOST_DEVICE inline c10::complex<T> pow(
#endif
}

// Regression in ROCm 7.2. See https://github.com/ROCm/rocm-libraries/pull/3836.
// Specialized version for complex<float> on AMD GPUs to use FMA-based
// multiplication
#if defined(__HIPCC__)
namespace detail {
// FMA-aware complex multiplication for float precision on AMD GPUs.
// This prevents SLP vectorizer from breaking FMA formation, which causes
// numerical precision loss in complex arithmetic.
// The issue occurs when vectorizer packs scalar multiplies before backend
// can form FMA instructions, resulting in double rounding instead of single.
C10_HOST_DEVICE inline thrust::complex<float> complex_mul_fma(
thrust::complex<float> a,
thrust::complex<float> b) {
// Complex multiplication: (a.r + a.i*i) * (b.r + b.i*i)
// = (a.r*b.r - a.i*b.i) + (a.r*b.i + a.i*b.r)*i
// Using __builtin_fmaf ensures FMA at source level:
// real: a.r*b.r + (-(a.i*b.i)) = FMA(a.r, b.r, -(a.i*b.i))
// imag: a.i*b.r + a.r*b.i = FMA(a.r, b.i, a.i*b.r)
float real_part = __builtin_fmaf(a.real(), b.real(), -(a.imag() * b.imag()));
float imag_part = __builtin_fmaf(a.real(), b.imag(), a.imag() * b.real());
return thrust::complex<float>(real_part, imag_part);
}
} // namespace detail

template <>
C10_HOST_DEVICE inline c10::complex<float> pow(
const c10::complex<float>& x,
const c10::complex<float>& y) {
auto log_x = thrust::log(static_cast<thrust::complex<float>>(x));
auto y_log_x =
detail::complex_mul_fma(static_cast<thrust::complex<float>>(y), log_x);
return static_cast<c10::complex<float>>(thrust::exp(y_log_x));
}
#endif

template <typename T>
C10_HOST_DEVICE inline c10::complex<T> pow(
const c10::complex<T>& x,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ __host__ __device__
// This macro is used to find older C++ compilers
// that don't support move optimization for return values.

#if (defined(__GNUC__) && __GNUC__ < 13) || \
#if (defined(__GNUC__) && __GNUC__ < 13 && __cplusplus < 202002L) || \
(defined(__clang_major__) && __clang_major__ < 13)
#define C10_RETURN_MOVE_IF_OLD_COMPILER 1
#else
Expand Down
4 changes: 2 additions & 2 deletions torch_pin.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
TORCH_VERSION = "2.11.0"
NIGHTLY_VERSION = "dev20260215"
TORCH_VERSION = "2.12.0"
NIGHTLY_VERSION = "dev20260310"
Loading