|  | 
|  | 1 | +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-device-code-split=per_kernel %s -o %t.out | 
|  | 2 | +// RUN: %HOST_RUN_PLACEHOLDER %t.out | 
|  | 3 | +// RUN: %CPU_RUN_PLACEHOLDER %t.out | 
|  | 4 | +// RUN: %GPU_RUN_PLACEHOLDER %t.out | 
|  | 5 | +// RUN: %ACC_RUN_PLACEHOLDER %t.out | 
|  | 6 | + | 
|  | 7 | +// OpenCL CPU driver does not support cl_khr_fp16 extension for this reason this | 
|  | 8 | +// test is compiled with the -fsycl-device-code-split flag | 
|  | 9 | + | 
|  | 10 | +#include <CL/sycl.hpp> | 
|  | 11 | +#include <cassert> | 
|  | 12 | + | 
|  | 13 | +template <typename T> void assert_out_of_bound(T val, T lower, T upper) { | 
|  | 14 | +  assert(sycl::all(lower < val && val < upper)); | 
|  | 15 | +} | 
|  | 16 | + | 
|  | 17 | +template <> | 
|  | 18 | +void assert_out_of_bound<float>(float val, float lower, float upper) { | 
|  | 19 | +  assert(lower < val && val < upper); | 
|  | 20 | +} | 
|  | 21 | + | 
|  | 22 | +template <> | 
|  | 23 | +void assert_out_of_bound<sycl::half>(sycl::half val, sycl::half lower, | 
|  | 24 | +                                     sycl::half upper) { | 
|  | 25 | +  assert(lower < val && val < upper); | 
|  | 26 | +} | 
|  | 27 | + | 
|  | 28 | +template <typename T> | 
|  | 29 | +void native_tanh_tester(sycl::queue q, T val, T up, T lo) { | 
|  | 30 | +  T r = val; | 
|  | 31 | + | 
|  | 32 | +#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH | 
|  | 33 | +  { | 
|  | 34 | +    sycl::buffer<T, 1> BufR(&r, sycl::range<1>(1)); | 
|  | 35 | +    q.submit([&](sycl::handler &cgh) { | 
|  | 36 | +      auto AccR = BufR.template get_access<sycl::access::mode::read_write>(cgh); | 
|  | 37 | +      cgh.single_task([=]() { | 
|  | 38 | +        AccR[0] = sycl::ext::oneapi::experimental::native::tanh(AccR[0]); | 
|  | 39 | +      }); | 
|  | 40 | +    }); | 
|  | 41 | +  } | 
|  | 42 | + | 
|  | 43 | +  assert_out_of_bound(r, up, lo); | 
|  | 44 | +#else | 
|  | 45 | +  assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); | 
|  | 46 | +#endif | 
|  | 47 | +} | 
|  | 48 | + | 
|  | 49 | +template <typename T> | 
|  | 50 | +void native_exp2_tester(sycl::queue q, T val, T up, T lo) { | 
|  | 51 | +  T r = val; | 
|  | 52 | + | 
|  | 53 | +#ifdef SYCL_EXT_ONEAPI_NATIVE_MATH | 
|  | 54 | +  { | 
|  | 55 | +    sycl::buffer<T, 1> BufR(&r, sycl::range<1>(1)); | 
|  | 56 | +    q.submit([&](sycl::handler &cgh) { | 
|  | 57 | +      auto AccR = BufR.template get_access<sycl::access::mode::read_write>(cgh); | 
|  | 58 | +      cgh.single_task([=]() { | 
|  | 59 | +        AccR[0] = sycl::ext::oneapi::experimental::native::exp2(AccR[0]); | 
|  | 60 | +      }); | 
|  | 61 | +    }); | 
|  | 62 | +  } | 
|  | 63 | + | 
|  | 64 | +  assert_out_of_bound(r, up, lo); | 
|  | 65 | +#else | 
|  | 66 | +  assert(!"SYCL_EXT_ONEAPI_NATIVE_MATH not supported"); | 
|  | 67 | +#endif | 
|  | 68 | +} | 
|  | 69 | + | 
|  | 70 | +int main() { | 
|  | 71 | + | 
|  | 72 | +  sycl::queue q; | 
|  | 73 | + | 
|  | 74 | +  const double tv[16] = {-2.0, -1.5, -1.0, 0.0, 2.0,  1.5, 1.0,   0.0, | 
|  | 75 | +                         -1.7, 1.7,  -1.2, 1.2, -3.0, 3.0, -10.0, 10.0}; | 
|  | 76 | +  const double tl[16] = {-0.97, -0.91, -0.77, -0.1, 0.95, 0.89, 0.75,  -0.1, | 
|  | 77 | +                         -0.94, 0.92,  -0.84, 0.82, -1.0, 0.98, -1.10, 0.98}; | 
|  | 78 | +  const double tu[16] = {-0.95, -0.89, -0.75, 0.1,  0.97,  0.91, 0.77,  0.1, | 
|  | 79 | +                         -0.92, 0.94,  -0.82, 0.84, -0.98, 1.00, -0.98, 1.10}; | 
|  | 80 | + | 
|  | 81 | +  native_tanh_tester<float>(q, tv[0], tl[0], tu[0]); | 
|  | 82 | +  native_tanh_tester<sycl::float2>(q, {tv[0], tv[1]}, {tl[0], tl[1]}, | 
|  | 83 | +                                   {tu[0], tu[1]}); | 
|  | 84 | +  native_tanh_tester<sycl::float3>( | 
|  | 85 | +      q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); | 
|  | 86 | +  native_tanh_tester<sycl::float4>(q, {tv[0], tv[1], tv[2], tv[3]}, | 
|  | 87 | +                                   {tl[0], tl[1], tl[2], tl[3]}, | 
|  | 88 | +                                   {tu[0], tu[1], tu[2], tu[3]}); | 
|  | 89 | +  native_tanh_tester<sycl::float8>( | 
|  | 90 | +      q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, | 
|  | 91 | +      {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, | 
|  | 92 | +      {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); | 
|  | 93 | +  native_tanh_tester<sycl::float16>( | 
|  | 94 | +      q, | 
|  | 95 | +      {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], | 
|  | 96 | +       tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, | 
|  | 97 | +      {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], | 
|  | 98 | +       tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, | 
|  | 99 | +      {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], | 
|  | 100 | +       tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); | 
|  | 101 | + | 
|  | 102 | +  if (q.get_device().has(sycl::aspect::fp16)) { | 
|  | 103 | + | 
|  | 104 | +    native_tanh_tester<sycl::half>(q, tv[0], tl[0], tu[0]); | 
|  | 105 | +    native_tanh_tester<sycl::half2>(q, {tv[0], tv[1]}, {tl[0], tl[1]}, | 
|  | 106 | +                                    {tu[0], tu[1]}); | 
|  | 107 | +    native_tanh_tester<sycl::half3>( | 
|  | 108 | +        q, {tv[0], tv[1], tv[2]}, {tl[0], tl[1], tl[2]}, {tu[0], tu[1], tu[2]}); | 
|  | 109 | +    native_tanh_tester<sycl::half4>(q, {tv[0], tv[1], tv[2], tv[3]}, | 
|  | 110 | +                                    {tl[0], tl[1], tl[2], tl[3]}, | 
|  | 111 | +                                    {tu[0], tu[1], tu[2], tu[3]}); | 
|  | 112 | +    native_tanh_tester<sycl::half8>( | 
|  | 113 | +        q, {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7]}, | 
|  | 114 | +        {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7]}, | 
|  | 115 | +        {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7]}); | 
|  | 116 | +    native_tanh_tester<sycl::half16>( | 
|  | 117 | +        q, | 
|  | 118 | +        {tv[0], tv[1], tv[2], tv[3], tv[4], tv[5], tv[6], tv[7], tv[8], tv[9], | 
|  | 119 | +         tv[10], tv[11], tv[12], tv[13], tv[14], tv[15]}, | 
|  | 120 | +        {tl[0], tl[1], tl[2], tl[3], tl[4], tl[5], tl[6], tl[7], tl[8], tl[9], | 
|  | 121 | +         tl[10], tl[11], tl[12], tl[13], tl[14], tl[15]}, | 
|  | 122 | +        {tu[0], tu[1], tu[2], tu[3], tu[4], tu[5], tu[6], tu[7], tu[8], tu[9], | 
|  | 123 | +         tu[10], tu[11], tu[12], tu[13], tu[14], tu[15]}); | 
|  | 124 | + | 
|  | 125 | +    const double ev[16] = {-2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0, | 
|  | 126 | +                           -2.0, -1.5, -1.0, 0.0, 2.0, 1.5, 1.0, 0.0}; | 
|  | 127 | +    const double el[16] = {0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9, | 
|  | 128 | +                           0.1, 0.34, 0.4, -0.9, 3.9, 2.7, 1.9, -0.9}; | 
|  | 129 | +    const double eu[16] = {0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1, | 
|  | 130 | +                           0.3, 0.36, 0.6, 1.1, 4.1, 2.9, 2.1, 1.1}; | 
|  | 131 | + | 
|  | 132 | +    native_exp2_tester<sycl::half>(q, ev[0], el[0], eu[0]); | 
|  | 133 | +    native_exp2_tester<sycl::half2>(q, {ev[0], ev[1]}, {el[0], el[1]}, | 
|  | 134 | +                                    {eu[0], eu[1]}); | 
|  | 135 | +    native_exp2_tester<sycl::half3>( | 
|  | 136 | +        q, {ev[0], ev[1], ev[2]}, {el[0], el[1], el[2]}, {eu[0], eu[1], eu[2]}); | 
|  | 137 | +    native_exp2_tester<sycl::half4>(q, {ev[0], ev[1], ev[2], ev[3]}, | 
|  | 138 | +                                    {el[0], el[1], el[2], el[3]}, | 
|  | 139 | +                                    {eu[0], eu[1], eu[2], eu[3]}); | 
|  | 140 | +    native_exp2_tester<sycl::half8>( | 
|  | 141 | +        q, {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7]}, | 
|  | 142 | +        {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7]}, | 
|  | 143 | +        {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7]}); | 
|  | 144 | +    native_exp2_tester<sycl::half16>( | 
|  | 145 | +        q, | 
|  | 146 | +        {ev[0], ev[1], ev[2], ev[3], ev[4], ev[5], ev[6], ev[7], ev[8], ev[9], | 
|  | 147 | +         ev[10], ev[11], ev[12], ev[13], ev[14], ev[15]}, | 
|  | 148 | +        {el[0], el[1], el[2], el[3], el[4], el[5], el[6], el[7], el[8], el[9], | 
|  | 149 | +         el[10], el[11], el[12], el[13], el[14], el[15]}, | 
|  | 150 | +        {eu[0], eu[1], eu[2], eu[3], eu[4], eu[5], eu[6], eu[7], eu[8], eu[9], | 
|  | 151 | +         eu[10], eu[11], eu[12], eu[13], eu[14], eu[15]}); | 
|  | 152 | +  } | 
|  | 153 | + | 
|  | 154 | +  return 0; | 
|  | 155 | +} | 
0 commit comments