algorithm1832
diff --git a/‎ci/rule-tests/__snapshots__/no-int32-type-dims-snapshot.yml‎
Lines changed: 39 additions & 0 deletions b/‎ci/rule-tests/__snapshots__/no-int32-type-dims-snapshot.yml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎ci/rule-tests/no-int32-type-dims.yml‎
Lines changed: 9 additions & 0 deletions b/‎ci/rule-tests/no-int32-type-dims.yml‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎ci/rules/no-int32-type-dims.yml‎
Lines changed: 54 additions & 0 deletions b/‎ci/rules/no-int32-type-dims.yml‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎paddle/common/enforce.h‎
Lines changed: 10 additions & 0 deletions b/‎paddle/common/enforce.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎paddle/phi/kernels/cpu/tdm_child_kernel.cc‎
Lines changed: 4 additions & 1 deletion b/‎paddle/phi/kernels/cpu/tdm_child_kernel.cc‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎paddle/phi/kernels/cpu/tdm_sampler_kernel.cc‎
Lines changed: 4 additions & 1 deletion b/‎paddle/phi/kernels/cpu/tdm_sampler_kernel.cc‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎paddle/phi/kernels/funcs/cross_entropy.cu‎
Lines changed: 8 additions & 2 deletions b/‎paddle/phi/kernels/funcs/cross_entropy.cu‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎paddle/phi/kernels/funcs/fake_dequantize_functor.cu‎
Lines changed: 4 additions & 1 deletion b/‎paddle/phi/kernels/funcs/fake_dequantize_functor.cu‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎paddle/phi/kernels/funcs/im2col.cu‎
Lines changed: 90 additions & 24 deletions b/‎paddle/phi/kernels/funcs/im2col.cu‎
Lines changed: 90 additions & 24 deletions
@@ -0,0 +1,39 @@
+id: no-int32-type-dims
+snapshots:
+  int class_num = prob->dims()[1];:
+    fixed: |
+      int64_t class_num = prob->dims()[1];
+      // TODO(large-tensor): downstream functors may still use int; guard until upgraded.
+    labels:
+      - source: int class_num = prob->dims()[1];
+        style: primary
+        start: 0
+        end: 32
+  int input_ids_num = input.numel();:
+    fixed: |
+      int64_t input_ids_num = input.numel();
+      // TODO(large-tensor): downstream functors may still use int; guard until upgraded.
+      PADDLE_ENFORCE_LE_INT_MAX(input_ids_num, "input_ids_num");
+    labels:
+      - source: int input_ids_num = input.numel();
+        style: primary
+        start: 0
+        end: 34
+  int row_size = x.dims()[ndim - 1];:
+    fixed: |
+      int64_t row_size = x.dims()[ndim - 1];
+      // TODO(large-tensor): downstream functors may still use int; guard until upgraded.
+    labels:
+      - source: int row_size = x.dims()[ndim - 1];
+        style: primary
+        start: 0
+        end: 34
+  int32_t input_ids_num = input.numel();:
+    fixed: |
+      int64_t input_ids_num = input.numel();
+      // TODO(large-tensor): downstream functors may still use int; guard until upgraded.
+    labels:
+      - source: int32_t input_ids_num = input.numel();
+        style: primary
+        start: 0
+        end: 38
@@ -0,0 +1,9 @@
+id: no-int32-type-dims
+valid:
+  - int64_t input_ids_num = input.numel();
+  - int64_t class_num = prob->dims()[1];
+  - int64_t row_size = x.dims()[ndim - 1];
+invalid:
+  - int32_t input_ids_num = input.numel();
+  - int class_num = prob->dims()[1];
+  - int row_size = x.dims()[ndim - 1];
@@ -0,0 +1,54 @@
+id: no-int32-type-dims
+language: cpp
+files:
+  - paddle/phi/kernels/**
+ignores:
+  - paddle/phi/kernels/legacy/**
+  - paddle/phi/kernels/xpu/**
+  - paddle/phi/kernels/custom/**
+  - paddle/phi/kernels/sparse/**
+severity: error
+message: |
+  Use int64_t instead of int/int32_t for dims/shape/strides/numel/offset (large tensor support).
+  If it is a false positive, please contact zrr1999 (Recommend), wanghuancoder for more information.
+note: dims/shape/strides/numel/offset may be >= INT32_MAX; dims()/numel() return int64_t
+rule:
+  any:
+    # int x = <RIGHT>;
+    - pattern: int $VAR = $RIGHT
+    # int32_t x = <RIGHT>;
+    - pattern: int32_t $VAR = $RIGHT
+    # direct-initializer: int x(<RIGHT>);
+    - pattern: int $VAR($RIGHT)
+    # direct-initializer: int32_t x(<RIGHT>);
+    - pattern: int32_t $VAR($RIGHT)
+constraints:
+  RIGHT:
+    any:
+      # dims[...] / expr.dims()[...] / expr->dims()[...]
+      - pattern: $E.dims[$INDEX]
+      - pattern: $E.dims()[$INDEX]
+      - pattern: $E->dims[$INDEX]
+      - pattern: $E->dims()[$INDEX]
+      # shape[...] / expr.shape()[...] / expr->shape()[...]
+      - pattern: $E.shape[$INDEX]
+      - pattern: $E.shape()[$INDEX]
+      - pattern: $E->shape[$INDEX]
+      - pattern: $E->shape()[$INDEX]
+      # strides[...] / expr.strides()[...] / expr->strides()[...]
+      - pattern: $E.strides[$INDEX]
+      - pattern: $E.strides()[$INDEX]
+      - pattern: $E->strides[$INDEX]
+      - pattern: $E->strides()[$INDEX]
+      # numel / numel()
+      - pattern: $E.numel
+      - pattern: $E.numel()
+      - pattern: $E->numel()
+      - pattern: $E->numel()
+        # offset / offset()
+        # unsafe
+        # - pattern: $E.offset
+        # - pattern: $E.offset()
+fix: |
+  int64_t $VAR = $RIGHT;
+  // TODO(large-tensor): downstream functors may still use int; guard until upgraded.
@@ -335,6 +335,16 @@ using CommonType2 = typename std::add_lvalue_reference<
 #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
   __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
 
+#define PADDLE_ENFORCE_LE_INT_MAX(var, var_name)                             \
+  PADDLE_ENFORCE_LE(var,                                                     \
+                    std::numeric_limits<int>::max(),                         \
+                    common::errors::InvalidArgument(                         \
+                        "Tensor dimension %s=%ld exceeds the maximum value " \
+                        "that int can represent (%d).",                      \
+                        var_name,                                            \
+                        var,                                                 \
+                        std::numeric_limits<int>::max()))
+
 TEST_API bool RegisterLogSimplyStr(const std::string& type,
                                    const std::string& simply);
 template <typename T>
 
@@ -36,7 +36,10 @@ void TDMChildInner(const Context &dev_ctx,
   int node_nums = info_dims[0];
   int length = info_dims[1];
 
-  int input_ids_num = input.numel();
+  int64_t input_ids_num = input.numel();
+  // TODO(large-tensor): downstream functors may still use int; guard until
+  // upgraded.
+
   VLOG(4) << "TDM child op: input numel ->  " << input_ids_num;
 
   std::vector<OutT> child_vec{};
 
@@ -43,7 +43,10 @@ void TDMSamplerInner(const Context &dev_ctx,
                      phi::DenseTensor *label,
                      phi::DenseTensor *mask) {
   // get dimension
-  int input_ids_num = input_tensor.numel();
+  int64_t input_ids_num = input_tensor.numel();
+  // TODO(large-tensor): downstream functors may still use int; guard until
+  // upgraded.
+
   VLOG(3) << "TDM: input ids nums: " << input_ids_num;
   auto layer_nums = neg_samples_num_list.size();
   VLOG(3) << "TDM: tree layer nums: " << layer_nums;
 
@@ -122,8 +122,14 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
   T* loss_data = dev_ctx.template Alloc<T>(out);
   const T* prob_data = prob->data<T>();
 
-  int batch_size = prob->dims()[0];
-  int class_num = prob->dims()[1];
+  int64_t batch_size = prob->dims()[0];
+  // TODO(large-tensor): downstream functors may still use int; guard until
+  // upgraded.
+
+  int64_t class_num = prob->dims()[1];
+  // TODO(large-tensor): downstream functors may still use int; guard until
+  // upgraded.
+
   constexpr int kMaxBlockDim = 512;
 
   // big tensor currently not supported
 
@@ -142,7 +142,10 @@ void ChannelDequantizeFunctor<Context, T>::operator()(
     // quantized on. `x_num_col_dims` is -1 for operator in ['matmul',
     // 'matmul_v2', 'mul'] and is 1 for other operators.
     int64_t num = in->numel();
-    int n_scales = in->dims()[x_num_col_dims];
+    int64_t n_scales = in->dims()[x_num_col_dims];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
     const T* scale_one = scales[0]->data<T>();
     const T* scale_two = scales[1]->data<T>();
 
 
@@ -121,10 +121,21 @@ class Im2ColFunctor<phi::funcs::ColFormat::kCFO, DeviceContext, T> {
         (data_layout != DataLayout::kNHWC ? im.dims()[1] : im.dims()[0]);
     int im_width =
         (data_layout != DataLayout::kNHWC ? im.dims()[2] : im.dims()[1]);
-    int filter_height = col->dims()[1];
-    int filter_width = col->dims()[2];
-    int col_height = col->dims()[3];
-    int col_width = col->dims()[4];
+    int64_t filter_height = col->dims()[1];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t filter_width = col->dims()[2];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t col_height = col->dims()[3];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t col_width = col->dims()[4];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
 
     int num_outputs = im_channels * col_height * col_width;
     int num_thread = 1024;
@@ -256,10 +267,21 @@ class Col2ImFunctor<phi::funcs::ColFormat::kCFO, DeviceContext, T> {
         (data_layout != DataLayout::kNHWC ? im->dims()[1] : im->dims()[0]);
     int im_width =
         (data_layout != DataLayout::kNHWC ? im->dims()[2] : im->dims()[1]);
-    int filter_height = col.dims()[1];
-    int filter_width = col.dims()[2];
-    int col_height = col.dims()[3];
-    int col_width = col.dims()[4];
+    int64_t filter_height = col.dims()[1];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t filter_width = col.dims()[2];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t col_height = col.dims()[3];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t col_width = col.dims()[4];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
 
     PADDLE_ENFORCE_EQ(
         (im_height + padding[0] + padding[2] -
@@ -406,13 +428,33 @@ class Im2ColFunctor<phi::funcs::ColFormat::kOCF, DeviceContext, T> {
                           "the dims of tensor 'col' is [%s].",
                           col->dims()));
 
-    int im_channels = im.dims()[0];
-    int im_height = im.dims()[1];
-    int im_width = im.dims()[2];
-    int filter_height = col->dims()[3];
-    int filter_width = col->dims()[4];
-    int col_height = col->dims()[0];
-    int col_width = col->dims()[1];
+    int64_t im_channels = im.dims()[0];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t im_height = im.dims()[1];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t im_width = im.dims()[2];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t filter_height = col->dims()[3];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t filter_width = col->dims()[4];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t col_height = col->dims()[0];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t col_width = col->dims()[1];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
 
     int block_dim_x = 0;
     int block_dim_y = 0;
@@ -431,7 +473,9 @@ class Im2ColFunctor<phi::funcs::ColFormat::kOCF, DeviceContext, T> {
     }
 
     int block_dim_z = 1024 / block_dim_x / block_dim_y;
-    dim3 threads(block_dim_x, block_dim_y, std::min(block_dim_z, im_channels));
+    dim3 threads(block_dim_x,
+                 block_dim_y,
+                 std::min(block_dim_z, static_cast<int>(im_channels)));
     dim3 grid(col_width, col_height);
     im2colOCF<T><<<grid, threads, 0, dev_ctx.stream()>>>(im.data<T>(),
                                                          im_channels,
@@ -516,13 +560,33 @@ class Col2ImFunctor<phi::funcs::ColFormat::kOCF, DeviceContext, T> {
                           "the dims of tensor 'col' is [%s].",
                           col.dims()));
 
-    int im_channels = im->dims()[0];
-    int im_height = im->dims()[1];
-    int im_width = im->dims()[2];
-    int filter_height = col.dims()[3];
-    int filter_width = col.dims()[4];
-    int col_height = col.dims()[0];
-    int col_width = col.dims()[1];
+    int64_t im_channels = im->dims()[0];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t im_height = im->dims()[1];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t im_width = im->dims()[2];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t filter_height = col.dims()[3];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t filter_width = col.dims()[4];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t col_height = col.dims()[0];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
+
+    int64_t col_width = col.dims()[1];
+    // TODO(large-tensor): downstream functors may still use int; guard until
+    // upgraded.
 
     PADDLE_ENFORCE_EQ(
         (im_height + padding[0] + padding[2] -
@@ -558,7 +622,9 @@ class Col2ImFunctor<phi::funcs::ColFormat::kOCF, DeviceContext, T> {
     }
 
     int block_dim_z = 1024 / block_dim_x / block_dim_y;
-    dim3 threads(block_dim_x, block_dim_y, std::min(block_dim_z, im_channels));
+    dim3 threads(block_dim_x,
+                 block_dim_y,
+                 std::min(block_dim_z, static_cast<int>(im_channels)));
     dim3 grid(col_width, col_height);
     col2imOCF<T><<<grid, threads, 0, dev_ctx.stream()>>>(col.data<T>(),
                                                          im_channels,