intel · antonkesy · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -31,15 +31,15 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_INSTALL_RPATH $ORIGIN)
 
 # Need the torch package
-set(Torch_COMP_VERION "${CMAKE_PROJECT_VERSION_MAJOR}.${CMAKE_PROJECT_VERSION_MINOR}")
-find_package(Torch ${Torch_COMP_VERION} REQUIRED)
+set(Torch_COMP_VERSION "${CMAKE_PROJECT_VERSION_MAJOR}.${CMAKE_PROJECT_VERSION_MINOR}")
+find_package(Torch ${Torch_COMP_VERSION} REQUIRED)
 
 if(NOT EXISTS ${TORCH_INSTALL_PREFIX})
   message(FATAL_ERROR "Can NOT find torch install path at ${TORCH_INSTALL_PREFIX}!")
 endif()
 
-if(NOT ${Torch_COMP_VERION} VERSION_EQUAL "${Torch_VERSION_MAJOR}.${Torch_VERSION_MINOR}")
-  message(FATAL_ERROR "Not compatible Torch version ${Torch_VERSION} at ${TORCH_INSTALL_PREFIX}!\nTorch ${Torch_COMP_VERION} is needed!")
+if(NOT ${Torch_COMP_VERSION} VERSION_EQUAL "${Torch_VERSION_MAJOR}.${Torch_VERSION_MINOR}")
+  message(FATAL_ERROR "Not compatible Torch version ${Torch_VERSION} at ${TORCH_INSTALL_PREFIX}!\nTorch ${Torch_COMP_VERSION} is needed!")
 endif()
 
 include(${IPEX_ROOT_DIR}/cmake/Options.cmake)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -75,7 +75,7 @@ In case you want to reinstall, make sure that you uninstall Intel® Extension fo
 
 ### Tips and Debugging
 
-* A prerequisite to installing Intel® Extension for PyTorch\* is CMake. We recommend installing it with [Homebrew](https://brew.sh/) with `brew install cmake` if you are developing on MacOS or Linux system.
+* A prerequisite to installing Intel® Extension for PyTorch\* is CMake. We recommend installing it with [Homebrew](https://brew.sh/) with `brew install cmake` if you are developing on macOS or Linux system.
 * Our `setup.py` requires Python >= 3.6
 * If you run into errors when running `python setup.py develop`, here are some debugging steps:
   1. Run `printf '#include <stdio.h>\nint main() { printf("Hello World");}'|clang -x c -; ./a.out` to make sure your CMake works and can compile this simple Hello World program without errors.

diff --git a/cmake/cppsdk/gen_self_extract.sh.in b/cmake/cppsdk/gen_self_extract.sh.in
@@ -32,5 +32,5 @@ if [ $? -gt 0 ]; then
     exit 23
 fi
 
-echo "Successfully generate self-extacting package at ${LIBIPEX_INSTALL_SCRIPT}"
+echo "Successfully generate self-extracting package at ${LIBIPEX_INSTALL_SCRIPT}"
 exit
diff --git a/cmake/cppsdk/libintel-ext-pt.installer.sh.in b/cmake/cppsdk/libintel-ext-pt.installer.sh.in
@@ -119,7 +119,7 @@ if [[ ${COMMAND} == "install" ]]; then
         echo "f|${comp}" >> ${LIBTORCH_PATH}/${LOGFILE}
     done
 
-    echo "Installation successed!"
+    echo "Installation succeeded!"
 
 # LIBIPEX Uninstallation
 elif [[ ${COMMAND} == "uninstall" ]]; then
@@ -144,7 +144,7 @@ elif [[ ${COMMAND} == "uninstall" ]]; then
         rm -f ${LIBTORCH_PATH}/${LOGFILE}
     fi
 
-    echo "Uninstallation successed!"
+    echo "Uninstallation succeeded!"
 fi
 
 exit

diff --git a/cmake/cpu/IsaCodegen.cmake b/cmake/cpu/IsaCodegen.cmake
@@ -141,7 +141,7 @@ endif(CXX_AVX2_FOUND)
 list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES)
 math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1")
 
-# The sources list might get reordered later based on the capabilites.
+# The sources list might get reordered later based on the capabilities.
 # See NOTE [ Linking AVX and non-AVX files ]
 foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
   foreach(IMPL ${cpu_kernel_cpp_in})

diff --git a/csrc/cpu/aten/AveragePool.cpp b/csrc/cpu/aten/AveragePool.cpp
@@ -308,7 +308,7 @@ at::Tensor avg_pool3d_out_cpu(
   } else {
     TORCH_CHECK(
         false,
-        "Unsupport memory format. Supports only ChannelsLast3d, Contiguous");
+        "Unsupported memory format. Supports only ChannelsLast3d, Contiguous");
   }
 
   TORCH_CHECK(
@@ -459,7 +459,7 @@ at::Tensor avg_pool3d_backward_out_cpu(
   } else {
     TORCH_CHECK(
         false,
-        "Unsupport memory format. Supports only ChannelsLast3d, Contiguous");
+        "Unsupported memory format. Supports only ChannelsLast3d, Contiguous");
   }
 
   TORCH_CHECK(

diff --git a/csrc/cpu/aten/Conv.cpp b/csrc/cpu/aten/Conv.cpp
@@ -104,7 +104,7 @@ at::Tensor convolution_kernel(
     at::MemoryFormat memory_format) {
   // Base convolution kernel, this base kernel will not change input's format,
   // so make sure you has make process the input's format before call this
-  // function, the output wil has same format with input.
+  // function, the output will has same format with input.
   // TODO: the input will be actively converted to channels last format
   // after the 5-D tensor supports channels last format.
   TORCH_CHECK(

diff --git a/csrc/cpu/aten/ConvTranspose.cpp b/csrc/cpu/aten/ConvTranspose.cpp
@@ -36,7 +36,7 @@ std::vector<int64_t> conv_input_size(
 static inline std::vector<int64_t> padding_r(
     at::IntArrayRef padding,
     at::IntArrayRef output_padding) {
-  // ConvTranpose padding adjustment
+  // ConvTranspose padding adjustment
   //
   // PyTorch uses padding/output_padding:
   //   osize = (isize - 1) * stride - 2 * padding + dilation * (kernel_size - 1)

diff --git a/csrc/cpu/aten/DistributedMergedEmb.cpp b/csrc/cpu/aten/DistributedMergedEmb.cpp
@@ -68,7 +68,7 @@ IPEX_DEFINE_DISPATCH(mergedemb_distribute_backward_merge_adagrad_update_stub);
  * distributed-merged-embedding-foward-lookup
  * 1. mergedemb_distribute_backward_local_cpu will finish the backward with
  * local grad (shape of [local BS * num_table * emb_dim]), the output grad will
- * be organzied by 3 TensorList: val Tensors, idx Tensors, ofs Tensors. The
+ * be organized by 3 TensorList: val Tensors, idx Tensors, ofs Tensors. The
  * number of the Tensors in 1 TensorList equal to world size. val[i], idx[i],
  * ofs[i] is the tensors will be transfer to rank i by sparse all to all. It
  * contains the grads for those indices on rank i.

diff --git a/csrc/cpu/aten/EmbeddingBag.cpp b/csrc/cpu/aten/EmbeddingBag.cpp
@@ -148,8 +148,8 @@ at::Tensor embedding_bag(
 } // namespace torch_ipex
 
 /*
-A namespace wrapper to keep API compatiable to callers.
-And also compatiable to new dyndisp.
+A namespace wrapper to keep API compatible to callers.
+And also compatible to new dyndisp.
 */
 namespace torch_ipex {
 

diff --git a/csrc/cpu/aten/FlashAttention.cpp b/csrc/cpu/aten/FlashAttention.cpp
@@ -20,7 +20,7 @@ bool use_ipex_flash_attention(
 }
 
 /*
- *Caculate the flash attention SDPA with attention mask.
+ *Calculate the flash attention SDPA with attention mask.
  */
 std::tuple<at::Tensor, at::Tensor> flash_attention_forward_cpu(
     const at::Tensor& query,

diff --git a/csrc/cpu/aten/LayerNorm.cpp b/csrc/cpu/aten/LayerNorm.cpp
@@ -25,7 +25,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> layer_norm_impl(
     double eps) {
   TORCH_CHECK(
       gamma.scalar_type() == at::kFloat && beta.scalar_type() == at::kFloat,
-      "gamma adn beta's data type should be float");
+      "gamma and beta's data type should be float");
   ideep::tensor x = itensor_view_from_dense(X);
   const ideep::tensor scale = itensor_view_from_dense(gamma);
   const ideep::tensor shift = itensor_view_from_dense(beta);
@@ -148,7 +148,7 @@ at::Tensor layer_norm_forward(
  * Now, we only use oneDNN kernel when both weight and bias are provided.
  * ToDo: more scenarios to use oneDNN or remvoe this pass
  * when at::layer_norm performance is back compared to w/o
- * mergeing https://github.com/pytorch/pytorch/pull/59987
+ * merging https://github.com/pytorch/pytorch/pull/59987
  *
  * @param input: the source tensor to layernorm
  * @param normalized_shape: input shape from an expected input of size

diff --git a/csrc/cpu/aten/Linear.cpp b/csrc/cpu/aten/Linear.cpp
@@ -45,7 +45,7 @@ void linear_kernel_output(
       dim == 2 ? self_ : self_.reshape({-1, self.size(self.dim() - 1)});
   const ideep::tensor mkldnn_input = itensor_view_from_dense(self_reshaped);
   // output.sizes() will return a reference for output's size which will not
-  // hold the underlaying storage. It will be released if output are dead
+  // hold the underlying storage. It will be released if output are dead
   // (output = output.reshape(output_size_reshaped)) output.sizes().vec() will
   // trigger a copy and can hold the sizes vector.
   auto output_size = output.sizes().vec();

diff --git a/csrc/cpu/aten/MaskedMultiHeadAttention.cpp b/csrc/cpu/aten/MaskedMultiHeadAttention.cpp
@@ -10,7 +10,7 @@ IPEX_DEFINE_DISPATCH(deepseekv2_mla_kernel_stub);
 IPEX_DEFINE_DISPATCH(prepare_4d_causal_attention_mask_kernel_stub);
 
 /*
- *Caculate the masked multihead attention for decoder layer in decoder only
+ *Calculate the masked multihead attention for decoder layer in decoder only
  *model.
  *@param query
  *@param key

diff --git a/csrc/cpu/aten/MergedEmbeddingBag.h b/csrc/cpu/aten/MergedEmbeddingBag.h
@@ -50,7 +50,7 @@ class EMBROWFixLen {
  * EmbeddingRowCache with smaller memory usage.
  *
  * EmbeddingRowCache contains var length EmbRow hash map and Fixed length EmbRow
- * with len=64, 128, 256 And handle different lenght inside EmbeddingRowCache
+ * with len=64, 128, 256 And handle different length inside EmbeddingRowCache
  * without expose len info to users.
  *
  * The robin_hood::unordered_map<int64_t, T*> _cached_ptr is used because user
@@ -61,7 +61,7 @@ class EMBROWFixLen {
  *     We will allocate memory to hold emb row very frequently during Embedding
  * FW/BW, we wish to allocate the memory on stack by using temporal varalble
  * instead of allocating them in heap for performance consideration. So we use C
- * array to hold fixed length and use std::vector to hold var lenght
+ * array to hold fixed length and use std::vector to hold var length
  * (std::vector will use memory on heap).
  *
  * How to use:

diff --git a/csrc/cpu/aten/PagedAttention.cpp b/csrc/cpu/aten/PagedAttention.cpp
@@ -11,7 +11,7 @@ IPEX_DEFINE_DISPATCH(reshape_and_cache_kernel_stub);
 IPEX_DEFINE_DISPATCH(flash_attn_var_len_kernel_stub);
 
 /*
- *Caculate the masked multihead attention for decoder layer in decoder only
+ *Calculate the masked multihead attention for decoder layer in decoder only
  */
 at::Tensor single_query_cached_kv_attention_forward_cpu(
     at::Tensor& out, // [num_seqs, num_heads, head_size]

diff --git a/csrc/cpu/aten/Punica.cpp b/csrc/cpu/aten/Punica.cpp
@@ -17,52 +17,52 @@ at::Tensor punica_bgmv_shrink_forward_cpu(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     const double scale) {
-  punica_bgmv_shrink_kernel_stub(kCPU, out, input, weights, indicies, scale);
+  punica_bgmv_shrink_kernel_stub(kCPU, out, input, weights, indices, scale);
   return out;
 }
 
 at::Tensor punica_sgmv_shrink_forward_cpu(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     const double scale) {
   punica_sgmv_shrink_kernel_stub(
-      kCPU, out, input, weights, indicies, seq_lens, scale);
+      kCPU, out, input, weights, indices, seq_lens, scale);
   return out;
 }
 
 at::Tensor punica_bgmv_expand_forward_cpu(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     bool add_inputs) {
   punica_bgmv_expand_kernel_stub(
-      kCPU, out, input, weights, indicies, add_inputs);
+      kCPU, out, input, weights, indices, add_inputs);
   return out;
 }
 
 at::Tensor punica_sgmv_expand_forward_cpu(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     bool add_inputs) {
   punica_sgmv_expand_kernel_stub(
-      kCPU, out, input, weights, indicies, seq_lens, add_inputs);
+      kCPU, out, input, weights, indices, seq_lens, add_inputs);
   return out;
 }
 
 at::Tensor punica_bgmv_expand_slice_forward_cpu(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     int64_t slice_offset,
     int64_t slice_size,
     bool add_inputs) {
@@ -71,7 +71,7 @@ at::Tensor punica_bgmv_expand_slice_forward_cpu(
       out,
       input,
       weights,
-      indicies,
+      indices,
       slice_offset,
       slice_size,
       add_inputs);
@@ -82,7 +82,7 @@ at::Tensor punica_sgmv_expand_slice_forward_cpu(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     int64_t slice_offset,
     int64_t slice_size,
@@ -92,7 +92,7 @@ at::Tensor punica_sgmv_expand_slice_forward_cpu(
       out,
       input,
       weights,
-      indicies,
+      indices,
       seq_lens,
       slice_offset,
       slice_size,

diff --git a/csrc/cpu/aten/Punica.h b/csrc/cpu/aten/Punica.h
@@ -12,37 +12,37 @@ void punica_bgmv_shrink(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     const double scale);
 
 void punica_sgmv_shrink(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     const double scale);
 
 void punica_bgmv_expand(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     bool add_inputs);
 
 void punica_sgmv_expand(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     bool add_inputs);
 
 void punica_bgmv_expand_slice(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     int64_t slice_offset,
     int64_t slice_size,
     bool add_inputs);
@@ -51,7 +51,7 @@ void punica_sgmv_expand_slice(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     int64_t slice_offset,
     int64_t slice_size,
@@ -62,37 +62,37 @@ using punica_bgmv_shrink_fn = void (*)(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     const double scale);
 
 using punica_sgmv_shrink_fn = void (*)(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     const double scale);
 
 using punica_bgmv_expand_fn = void (*)(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     bool add_inputs);
 
 using punica_sgmv_expand_fn = void (*)(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     bool add_inputs);
 
 using punica_bgmv_expand_slice_fn = void (*)(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     int64_t slice_offset,
     int64_t slice_size,
     bool add_inputs);
@@ -101,7 +101,7 @@ using punica_sgmv_expand_slice_fn = void (*)(
     at::Tensor& out,
     at::Tensor& input,
     at::Tensor& weights,
-    at::Tensor& indicies,
+    at::Tensor& indices,
     at::Tensor& seq_lens,
     int64_t slice_offset,
     int64_t slice_size,

diff --git a/csrc/cpu/aten/RotaryPositionEmbedding.cpp b/csrc/cpu/aten/RotaryPositionEmbedding.cpp
@@ -1,5 +1,5 @@
 
-// The orginal python code can be found in
+// The original python code can be found in
 // https://github.com/huggingface/transformers/blob/main/src/transformers/models/gptj/modeling_gptj.py
 // apply_rotary_pos_emb
 #include "RotaryPositionEmbedding.h"
-Original file line number
+Diff line change
@@ Expand Up / @@ -32,5 +32,5 @@ if [ $? -gt 0 ]; then @@
         exit 23
     fi
-    echo "Successfully generate self-extacting package at ${LIBIPEX_INSTALL_SCRIPT}"
+    echo "Successfully generate self-extracting package at ${LIBIPEX_INSTALL_SCRIPT}"
     exit