Skip to content

Fix compilation errors when building QUDA by HIP in DTK environmentΒ #1617

@Jaden-Anderson

Description

@Jaden-Anderson

I have just built QUDA to run by HIP in DTK environment (spec. Hygon DCU Toolkit 25.04), compiled by Clang. However, hundreds of compilation errors (similar to each other, one of which is shown in Compilation Errors below) occurred all at once. To get rid of that, these changes can be made:

diff --git a/include/targets/hip/shared_memory_helper.h b/include/targets/hip/shared_memory_helper.h
index 69d8c095c..27d836475 100644
--- a/include/targets/hip/shared_memory_helper.h
+++ b/include/targets/hip/shared_memory_helper.h
@@ -85,6 +85,14 @@ namespace quda
     {
     }

+    /**
+       @brief Constructor for SharedMemory object.
+    */
+    template <typename... U, typename... Arg>
+    constexpr SharedMemory(const KernelOps<U...> &, const Arg &...) : data(cache(get_offset(target::block_dim())))
+    {
+    }
+
     /**
        @brief Return this SharedMemory object.
     */

Compilation Errors

In file included from /path/to/quda/build/lib/coarsecoarse_op_32_32.cu:1:
In file included from /path/to/quda/lib/coarsecoarse_op.hpp:9:
In file included from /path/to/quda/lib/coarse_op.cuh:3:
In file included from /path/to/quda/lib/../include/kernels/coarse_op_kernel.cuh:13:
In file included from /path/to/quda/include/targets/hip/shared_memory_cache_helper.h:1:
/path/to/quda/include/targets/hip/../generic/shared_memory_cache_helper.h:127:7: error: no matching constructor for initialization of 'SharedMemory<atom_t<complex<int>[8][8][4][2][2]>, SizeDims<DimsStaticConditional<2, 1, 1>, sizeof(complex<int>[8][8][4][2][2]) / sizeof(atom_t<complex<int>[8][8][4][2][2]>)>, void>' (aka 'SharedMemory<HIP_vector_type<int, 4>, SizeDims<quda::DimsStaticConditional<2, 1, 1>, sizeof(quda::complex<int>[8][8][4][2][2]) / sizeof(atom_t<complex<int>[8][8][4][2][2]>)>, void>')
      Smem(ops, arg...), block(D::dims(target::block_dim(), arg...)), stride(block.x * block.y * block.z)
      ^    ~~~~~~~~
/path/to/quda/lib/../include/kernels/coarse_op_kernel.cuh:1414:18: note: in instantiation of function template specialization 'quda::SharedMemoryCache<quda::complex<int>[8][8][4][2][2], quda::DimsStaticConditional<2, 1, 1>>::SharedMemoryCache<quda::SharedMemoryCache<quda::complex<int>[8][8][4][2][2], quda::DimsStaticConditional<2, 1, 1>>, quda::CalculateYArg<true, float, float, 2, 2, 32, 32, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER, true, int>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 4, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>>>' requested here
      Cache<Arg> cache {ftor, arg};
                 ^
/path/to/quda/include/targets/hip/target_device.h:18:16: note: in instantiation of function template specialization 'quda::storeCoarseSharedAtomic_impl<true>::operator()<quda::MatrixTile<quda::complex<float>, 2, 2, false>[4], quda::Pack<QUDA_BACKWARDS>, quda::compute_vuv<quda::CalculateYArg<true, float, float, 2, 2, 32, 32, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER, true, int>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 4, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>>>>' requested here
        return f<true>()(args...);
               ^
/path/to/quda/lib/../include/kernels/coarse_op_kernel.cuh:1645:7: note: in instantiation of function template specialization 'quda::storeCoarseSharedAtomic<quda::MatrixTile<quda::complex<float>, 2, 2, false>[4], quda::compute_vuv<quda::CalculateYArg<true, float, float, 2, 2, 32, 32, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER, true, int>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 4, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>>>>' requested here
      storeCoarseSharedAtomic(vuv, isDiagonal, coarse_x_cb, coarse_parity, i0, j0, parity, ftor);
      ^
/path/to/quda/lib/../include/kernels/coarse_op_kernel.cuh:1734:7: note: in instantiation of function template specialization 'quda::computeVUV<1, quda::compute_vuv<quda::CalculateYArg<true, float, float, 2, 2, 32, 32, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER, true, int>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 4, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>>>>' requested here
      computeVUV<nFace>(*this, parity, x_cb, c_row * arg.vuvTile.M, c_col * arg.vuvTile.N, parity_coarse, x_coarse_cb);
      ^
/path/to/quda/lib/../include/targets/generic/kernel_host.h:28:69: note: in instantiation of member function 'quda::compute_vuv<quda::CalculateYArg<true, float, float, 2, 2, 32, 32, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER, true, int>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 4, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>>>::operator()' requested here
        for (int k = 0; k < static_cast<int>(arg.threads.z); k++) { f(i, j, k); }
                                                                    ^
/path/to/quda/lib/../include/tunable_nd.h:458:7: note: (skipping 2 contexts in backtrace; use -ftemplate-backtrace-limit=0 to see all)
      Kernel3D_host<Functor, Arg>(arg);
      ^
/path/to/quda/lib/coarse_op.cuh:580:7: note: in instantiation of function template specialization 'quda::CalculateY<false, QUDA_CPU_FIELD_LOCATION, quda::CalculateYArg<true, float, float, 2, 2, 32, 32, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER, true, int>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 4, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>>>::Launch<QUDA_CPU_FIELD_LOCATION>' requested here
      Launch<location_template>(arg, tp, type, stream);
      ^
/path/to/quda/lib/coarse_op.cuh:1073:11: note: in instantiation of member function 'quda::CalculateY<false, QUDA_CPU_FIELD_LOCATION, quda::CalculateYArg<true, float, float, 2, 2, 32, 32, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER, true, int>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 4, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>>>::apply' requested here
        y.apply(device::get_default_stream());
          ^
/path/to/quda/lib/coarsecoarse_op.hpp:61:7: note: in instantiation of function template specialization 'quda::calculateY<false, QUDA_CPU_FIELD_LOCATION, true, float, float, 2, 32, 2, 32, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 4, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::colorspinor::FieldOrderCB<float, 2, 32, 32, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER, true, int>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>, quda::gauge::FieldOrder<float, 64, 2, QUDA_QDP_GAUGE_ORDER>>' requested here
      calculateY<use_mma, QUDA_CPU_FIELD_LOCATION, true, Float, vFloat, fineSpin, fineColor, coarseSpin, coarseColor>(
      ^
/path/to/quda/lib/coarsecoarse_op.hpp:194:13: note: in instantiation of function template specialization 'quda::calculateYcoarse<false, float, float, 32, 32>' requested here
            calculateYcoarse<use_mma, float, float, fineColor, coarseColor>(Y, X, Yatomic, Xatomic, uv, T, g, clover,
            ^
/path/to/quda/build/lib/coarsecoarse_op_32_32.cu:45:5: note: in instantiation of function template specialization 'quda::calculateYcoarse<false, 32, 32>' requested here
    calculateYcoarse<use_mma, fineColor, coarseColor>(Y, X, *Yatomic, *Xatomic, *uv, T, gauge, clover, cloverInv, kappa, mass, mu, mu_factor, dirac, matpc,
    ^
/path/to/quda/include/targets/hip/shared_memory_helper.h:84:15: note: candidate constructor template not viable: requires 1 argument, but 2 were provided
    constexpr SharedMemory(const KernelOps<U...> &) : data(cache(get_offset(target::block_dim())))
              ^
/path/to/quda/include/targets/hip/shared_memory_helper.h:23:62: note: candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 2 were provided
  template <typename T, typename S, typename O = void> class SharedMemory
                                                             ^
/path/to/quda/include/targets/hip/shared_memory_helper.h:23:62: note: candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 2 were provided
/path/to/quda/include/targets/hip/shared_memory_helper.h:78:15: note: candidate constructor not viable: requires 0 arguments, but 2 were provided
    constexpr SharedMemory() : data(cache(get_offset(target::block_dim()))) { }
              ^

One more thing

Errors as follows may also occurr if one is using local Eigen3, rather than allowing QUDA to download a copy.

/path/to/quda/lib/interface_quda.cpp:4811:15: error: reference to 'array' is ambiguous
  std::vector<array<double, 2>> ferm_epsilon(nvector);
              ^
/path/to/quda/lib/../include/array.h:12:39: note: candidate found by name lookup is 'quda::array'
  template <typename T, int n> struct array {
                                      ^
/path/to/eigen3/include/eigen3/Eigen/src/Core/util/EmulateArray.h:18:7: note: candidate found by name lookup is 'Eigen::array'
class array {
      ^

Once encountered, these changes can help:

diff --git a/lib/interface_quda.cpp b/lib/interface_quda.cpp
index 77fec65bf..0e78fd3b2 100644
--- a/lib/interface_quda.cpp
+++ b/lib/interface_quda.cpp
@@ -4808,7 +4808,7 @@ void computeCloverForceQuda(void *h_mom, double dt, void **h_x, void **, double

  std::vector<ColorSpinorField> x(nvector), x0(nvector);
  std::vector<double> force_coeff(nvector);
-  std::vector<array<double, 2>> ferm_epsilon(nvector);
+  std::vector<quda::array<double, 2>> ferm_epsilon(nvector);

  QudaParity parity = inv_param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC ? QUDA_EVEN_PARITY : QUDA_ODD_PARITY;

@@ -4883,7 +4883,7 @@ void computeTMCloverForceQuda(void *h_mom, void **h_x, void **h_x0, double *coef

  std::vector<ColorSpinorField> x(nvector), x0(nvector);
  std::vector<double> force_coeff(nvector);
-  std::vector<array<double, 2>> ferm_epsilon(nvector);
+  std::vector<quda::array<double, 2>> ferm_epsilon(nvector);

  QudaParity parity = inv_param->matpc_type == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC ? QUDA_EVEN_PARITY : QUDA_ODD_PARITY;

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions