@@ -5,24 +5,24 @@ gpu.module @test_round_robin_assignment {
5
5
// CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
6
6
gpu.func @create_nd_tdesc (%src: memref <256 x128 xf32 >) {
7
7
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<256x128xf32>
8
- // CHECK-SAME: -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
8
+ // CHECK-SAME: -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
9
9
// CHECK-NOT: xegpu.create_nd_tdesc
10
10
%tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
11
- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
11
+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
12
12
gpu.return
13
13
}
14
14
15
15
// CHECK-LABEL: load_nd_tdesc
16
16
// CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
17
17
gpu.func @load_nd_tdesc (%src: memref <256 x128 xf32 >) {
18
18
%tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
19
- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
19
+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
20
20
// CHECK-COUNT-4: xegpu.load_nd %{{.*}}
21
- // CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
21
+ // CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
22
22
// CHECK-SAME-COUNT-4: -> vector<16x16xf32>
23
23
// CHECK-NOT: xegpu.load_nd
24
24
%load = xegpu.load_nd %tdesc
25
- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
25
+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
26
26
-> vector <256 x128 xf32 >
27
27
gpu.return
28
28
}
@@ -31,36 +31,36 @@ gpu.module @test_round_robin_assignment {
31
31
// CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
32
32
gpu.func @store_nd (%src: memref <256 x128 xf32 >) {
33
33
%tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
34
- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
34
+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
35
35
// CHECK-COUNT-4: xegpu.store_nd %{{.*}}, %{{.*}}
36
- // CHECK-SAME-COUNT-4: : vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
36
+ // CHECK-SAME-COUNT-4: : vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
37
37
// CHECK-NOT : xegpu.store_nd
38
38
%load = xegpu.load_nd %tdesc
39
- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
39
+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
40
40
-> vector <256 x128 xf32 >
41
41
xegpu.store_nd %load , %tdesc
42
- : vector <256 x128 xf32 >, !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
42
+ : vector <256 x128 xf32 >, !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
43
43
gpu.return
44
44
}
45
45
46
46
// CHECK-LABEL: update_nd
47
47
// CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
48
48
gpu.func @update_nd (%src: memref <256 x128 xf32 >){
49
49
%tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
50
- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
50
+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
51
51
// CHECK-COUNT-4: xegpu.update_nd_offset %{{.*}}, [0, 16]
52
- // CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>>
52
+ // CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>>
53
53
// CHECK-NOT: xegpu.update_nd_offset
54
54
%update = xegpu.update_nd_offset %tdesc , [0 , 16 ]
55
- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
55
+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
56
56
gpu.return
57
57
}
58
58
59
59
// CHECK-LABEL: dpas
60
60
// CHECK-SAME: (%[[ARG_0:.*]]: memref<256x128xf16>, %[[ARG_1:.*]]: memref<128x256xf16>)
61
61
gpu.func @dpas (%a: memref <256 x128 xf16 >, %b: memref <128 x256 xf16 >) {
62
62
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<256x128xf16>
63
- // CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
63
+ // CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
64
64
// CHECK-NOT: xegpu.create_nd_tdesc
65
65
// CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_1]][%{{.*}}, %{{.*}}] : memref<128x256xf16>
66
66
// CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [4, 8], lane_data = [1, 1]>>
@@ -89,12 +89,12 @@ gpu.module @test_round_robin_assignment {
89
89
// CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
90
90
gpu.func @prefetch_nd_tdesc (%src: memref <256 x128 xf32 >) {
91
91
// CHECK-COUNT-4: xegpu.prefetch_nd %{{.*}}
92
- // CHECK-SAME-COUNT-4: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
92
+ // CHECK-SAME-COUNT-4: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
93
93
// CHECK-NOT: xegpu.prefetch_nd
94
94
%tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
95
- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
95
+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
96
96
xegpu.prefetch_nd %tdesc
97
- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
97
+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
98
98
gpu.return
99
99
}
100
100
0 commit comments