Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_INSTALL_RPATH $ORIGIN)

# Need the torch package
set(Torch_COMP_VERION "${CMAKE_PROJECT_VERSION_MAJOR}.${CMAKE_PROJECT_VERSION_MINOR}")
find_package(Torch ${Torch_COMP_VERION} REQUIRED)
set(Torch_COMP_VERSION "${CMAKE_PROJECT_VERSION_MAJOR}.${CMAKE_PROJECT_VERSION_MINOR}")
find_package(Torch ${Torch_COMP_VERSION} REQUIRED)

if(NOT EXISTS ${TORCH_INSTALL_PREFIX})
message(FATAL_ERROR "Can NOT find torch install path at ${TORCH_INSTALL_PREFIX}!")
endif()

if(NOT ${Torch_COMP_VERION} VERSION_EQUAL "${Torch_VERSION_MAJOR}.${Torch_VERSION_MINOR}")
message(FATAL_ERROR "Not compatible Torch version ${Torch_VERSION} at ${TORCH_INSTALL_PREFIX}!\nTorch ${Torch_COMP_VERION} is needed!")
if(NOT ${Torch_COMP_VERSION} VERSION_EQUAL "${Torch_VERSION_MAJOR}.${Torch_VERSION_MINOR}")
message(FATAL_ERROR "Not compatible Torch version ${Torch_VERSION} at ${TORCH_INSTALL_PREFIX}!\nTorch ${Torch_COMP_VERSION} is needed!")
endif()

include(${IPEX_ROOT_DIR}/cmake/Options.cmake)
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ In case you want to reinstall, make sure that you uninstall Intel® Extension fo

### Tips and Debugging

* A prerequisite to installing Intel® Extension for PyTorch\* is CMake. We recommend installing it with [Homebrew](https://brew.sh/) with `brew install cmake` if you are developing on MacOS or Linux system.
* A prerequisite to installing Intel® Extension for PyTorch\* is CMake. We recommend installing it with [Homebrew](https://brew.sh/) with `brew install cmake` if you are developing on macOS or Linux system.
* Our `setup.py` requires Python >= 3.6
* If you run into errors when running `python setup.py develop`, here are some debugging steps:
1. Run `printf '#include <stdio.h>\nint main() { printf("Hello World");}'|clang -x c -; ./a.out` to make sure your CMake works and can compile this simple Hello World program without errors.
Expand Down
2 changes: 1 addition & 1 deletion cmake/cppsdk/gen_self_extract.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ if [ $? -gt 0 ]; then
exit 23
fi

echo "Successfully generate self-extacting package at ${LIBIPEX_INSTALL_SCRIPT}"
echo "Successfully generate self-extracting package at ${LIBIPEX_INSTALL_SCRIPT}"
exit
4 changes: 2 additions & 2 deletions cmake/cppsdk/libintel-ext-pt.installer.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ if [[ ${COMMAND} == "install" ]]; then
echo "f|${comp}" >> ${LIBTORCH_PATH}/${LOGFILE}
done

echo "Installation successed!"
echo "Installation succeeded!"

# LIBIPEX Uninstallation
elif [[ ${COMMAND} == "uninstall" ]]; then
Expand All @@ -144,7 +144,7 @@ elif [[ ${COMMAND} == "uninstall" ]]; then
rm -f ${LIBTORCH_PATH}/${LOGFILE}
fi

echo "Uninstallation successed!"
echo "Uninstallation succeeded!"
fi

exit
Expand Down
2 changes: 1 addition & 1 deletion cmake/cpu/IsaCodegen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ endif(CXX_AVX2_FOUND)
list(LENGTH CPU_CAPABILITY_NAMES NUM_CPU_CAPABILITY_NAMES)
math(EXPR NUM_CPU_CAPABILITY_NAMES "${NUM_CPU_CAPABILITY_NAMES}-1")

# The sources list might get reordered later based on the capabilites.
# The sources list might get reordered later based on the capabilities.
# See NOTE [ Linking AVX and non-AVX files ]
foreach(i RANGE ${NUM_CPU_CAPABILITY_NAMES})
foreach(IMPL ${cpu_kernel_cpp_in})
Expand Down
4 changes: 2 additions & 2 deletions csrc/cpu/aten/AveragePool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ at::Tensor avg_pool3d_out_cpu(
} else {
TORCH_CHECK(
false,
"Unsupport memory format. Supports only ChannelsLast3d, Contiguous");
"Unsupported memory format. Supports only ChannelsLast3d, Contiguous");
}

TORCH_CHECK(
Expand Down Expand Up @@ -459,7 +459,7 @@ at::Tensor avg_pool3d_backward_out_cpu(
} else {
TORCH_CHECK(
false,
"Unsupport memory format. Supports only ChannelsLast3d, Contiguous");
"Unsupported memory format. Supports only ChannelsLast3d, Contiguous");
}

TORCH_CHECK(
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/Conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ at::Tensor convolution_kernel(
at::MemoryFormat memory_format) {
// Base convolution kernel, this base kernel will not change input's format,
// so make sure you has make process the input's format before call this
// function, the output wil has same format with input.
// function, the output will has same format with input.
// TODO: the input will be actively converted to channels last format
// after the 5-D tensor supports channels last format.
TORCH_CHECK(
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/ConvTranspose.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ std::vector<int64_t> conv_input_size(
static inline std::vector<int64_t> padding_r(
at::IntArrayRef padding,
at::IntArrayRef output_padding) {
// ConvTranpose padding adjustment
// ConvTranspose padding adjustment
//
// PyTorch uses padding/output_padding:
// osize = (isize - 1) * stride - 2 * padding + dilation * (kernel_size - 1)
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/DistributedMergedEmb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ IPEX_DEFINE_DISPATCH(mergedemb_distribute_backward_merge_adagrad_update_stub);
* distributed-merged-embedding-foward-lookup
* 1. mergedemb_distribute_backward_local_cpu will finish the backward with
* local grad (shape of [local BS * num_table * emb_dim]), the output grad will
* be organzied by 3 TensorList: val Tensors, idx Tensors, ofs Tensors. The
* be organized by 3 TensorList: val Tensors, idx Tensors, ofs Tensors. The
* number of the Tensors in 1 TensorList equal to world size. val[i], idx[i],
* ofs[i] is the tensors will be transfer to rank i by sparse all to all. It
* contains the grads for those indices on rank i.
Expand Down
4 changes: 2 additions & 2 deletions csrc/cpu/aten/EmbeddingBag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ at::Tensor embedding_bag(
} // namespace torch_ipex

/*
A namespace wrapper to keep API compatiable to callers.
And also compatiable to new dyndisp.
A namespace wrapper to keep API compatible to callers.
And also compatible to new dyndisp.
*/
namespace torch_ipex {

Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/FlashAttention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ bool use_ipex_flash_attention(
}

/*
*Caculate the flash attention SDPA with attention mask.
*Calculate the flash attention SDPA with attention mask.
*/
std::tuple<at::Tensor, at::Tensor> flash_attention_forward_cpu(
const at::Tensor& query,
Expand Down
4 changes: 2 additions & 2 deletions csrc/cpu/aten/LayerNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor> layer_norm_impl(
double eps) {
TORCH_CHECK(
gamma.scalar_type() == at::kFloat && beta.scalar_type() == at::kFloat,
"gamma adn beta's data type should be float");
"gamma and beta's data type should be float");
ideep::tensor x = itensor_view_from_dense(X);
const ideep::tensor scale = itensor_view_from_dense(gamma);
const ideep::tensor shift = itensor_view_from_dense(beta);
Expand Down Expand Up @@ -148,7 +148,7 @@ at::Tensor layer_norm_forward(
* Now, we only use oneDNN kernel when both weight and bias are provided.
* ToDo: more scenarios to use oneDNN or remvoe this pass
* when at::layer_norm performance is back compared to w/o
* mergeing https://github.com/pytorch/pytorch/pull/59987
* merging https://github.com/pytorch/pytorch/pull/59987
*
* @param input: the source tensor to layernorm
* @param normalized_shape: input shape from an expected input of size
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/Linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void linear_kernel_output(
dim == 2 ? self_ : self_.reshape({-1, self.size(self.dim() - 1)});
const ideep::tensor mkldnn_input = itensor_view_from_dense(self_reshaped);
// output.sizes() will return a reference for output's size which will not
// hold the underlaying storage. It will be released if output are dead
// hold the underlying storage. It will be released if output are dead
// (output = output.reshape(output_size_reshaped)) output.sizes().vec() will
// trigger a copy and can hold the sizes vector.
auto output_size = output.sizes().vec();
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/MaskedMultiHeadAttention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ IPEX_DEFINE_DISPATCH(deepseekv2_mla_kernel_stub);
IPEX_DEFINE_DISPATCH(prepare_4d_causal_attention_mask_kernel_stub);

/*
*Caculate the masked multihead attention for decoder layer in decoder only
*Calculate the masked multihead attention for decoder layer in decoder only
*model.
*@param query
*@param key
Expand Down
4 changes: 2 additions & 2 deletions csrc/cpu/aten/MergedEmbeddingBag.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class EMBROWFixLen {
* EmbeddingRowCache with smaller memory usage.
*
* EmbeddingRowCache contains var length EmbRow hash map and Fixed length EmbRow
* with len=64, 128, 256 And handle different lenght inside EmbeddingRowCache
* with len=64, 128, 256 And handle different length inside EmbeddingRowCache
* without expose len info to users.
*
* The robin_hood::unordered_map<int64_t, T*> _cached_ptr is used because user
Expand All @@ -61,7 +61,7 @@ class EMBROWFixLen {
* We will allocate memory to hold emb row very frequently during Embedding
* FW/BW, we wish to allocate the memory on stack by using temporal varalble
* instead of allocating them in heap for performance consideration. So we use C
* array to hold fixed length and use std::vector to hold var lenght
* array to hold fixed length and use std::vector to hold var length
* (std::vector will use memory on heap).
*
* How to use:
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/PagedAttention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ IPEX_DEFINE_DISPATCH(reshape_and_cache_kernel_stub);
IPEX_DEFINE_DISPATCH(flash_attn_var_len_kernel_stub);

/*
*Caculate the masked multihead attention for decoder layer in decoder only
*Calculate the masked multihead attention for decoder layer in decoder only
*/
at::Tensor single_query_cached_kv_attention_forward_cpu(
at::Tensor& out, // [num_seqs, num_heads, head_size]
Expand Down
24 changes: 12 additions & 12 deletions csrc/cpu/aten/Punica.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,52 +17,52 @@ at::Tensor punica_bgmv_shrink_forward_cpu(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
const double scale) {
punica_bgmv_shrink_kernel_stub(kCPU, out, input, weights, indicies, scale);
punica_bgmv_shrink_kernel_stub(kCPU, out, input, weights, indices, scale);
return out;
}

at::Tensor punica_sgmv_shrink_forward_cpu(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
const double scale) {
punica_sgmv_shrink_kernel_stub(
kCPU, out, input, weights, indicies, seq_lens, scale);
kCPU, out, input, weights, indices, seq_lens, scale);
return out;
}

at::Tensor punica_bgmv_expand_forward_cpu(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
bool add_inputs) {
punica_bgmv_expand_kernel_stub(
kCPU, out, input, weights, indicies, add_inputs);
kCPU, out, input, weights, indices, add_inputs);
return out;
}

at::Tensor punica_sgmv_expand_forward_cpu(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
bool add_inputs) {
punica_sgmv_expand_kernel_stub(
kCPU, out, input, weights, indicies, seq_lens, add_inputs);
kCPU, out, input, weights, indices, seq_lens, add_inputs);
return out;
}

at::Tensor punica_bgmv_expand_slice_forward_cpu(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
int64_t slice_offset,
int64_t slice_size,
bool add_inputs) {
Expand All @@ -71,7 +71,7 @@ at::Tensor punica_bgmv_expand_slice_forward_cpu(
out,
input,
weights,
indicies,
indices,
slice_offset,
slice_size,
add_inputs);
Expand All @@ -82,7 +82,7 @@ at::Tensor punica_sgmv_expand_slice_forward_cpu(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
int64_t slice_offset,
int64_t slice_size,
Expand All @@ -92,7 +92,7 @@ at::Tensor punica_sgmv_expand_slice_forward_cpu(
out,
input,
weights,
indicies,
indices,
seq_lens,
slice_offset,
slice_size,
Expand Down
24 changes: 12 additions & 12 deletions csrc/cpu/aten/Punica.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,37 +12,37 @@ void punica_bgmv_shrink(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
const double scale);

void punica_sgmv_shrink(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
const double scale);

void punica_bgmv_expand(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
bool add_inputs);

void punica_sgmv_expand(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
bool add_inputs);

void punica_bgmv_expand_slice(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
int64_t slice_offset,
int64_t slice_size,
bool add_inputs);
Expand All @@ -51,7 +51,7 @@ void punica_sgmv_expand_slice(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
int64_t slice_offset,
int64_t slice_size,
Expand All @@ -62,37 +62,37 @@ using punica_bgmv_shrink_fn = void (*)(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
const double scale);

using punica_sgmv_shrink_fn = void (*)(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
const double scale);

using punica_bgmv_expand_fn = void (*)(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
bool add_inputs);

using punica_sgmv_expand_fn = void (*)(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
bool add_inputs);

using punica_bgmv_expand_slice_fn = void (*)(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
int64_t slice_offset,
int64_t slice_size,
bool add_inputs);
Expand All @@ -101,7 +101,7 @@ using punica_sgmv_expand_slice_fn = void (*)(
at::Tensor& out,
at::Tensor& input,
at::Tensor& weights,
at::Tensor& indicies,
at::Tensor& indices,
at::Tensor& seq_lens,
int64_t slice_offset,
int64_t slice_size,
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/RotaryPositionEmbedding.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

// The orginal python code can be found in
// The original python code can be found in
// https://github.com/huggingface/transformers/blob/main/src/transformers/models/gptj/modeling_gptj.py
// apply_rotary_pos_emb
#include "RotaryPositionEmbedding.h"
Expand Down
Loading