Skip to content

Commit 0ddbd2f

Browse files
[intel] Remove nvidia_gpu usages (#998)
`nvidia_gpu` should not be used in Intel backend. --------- Signed-off-by: Whitney Tsang <[email protected]>
1 parent b24013a commit 0ddbd2f

File tree

3 files changed

+0
-107
lines changed

3 files changed

+0
-107
lines changed

third_party/intel/backend/compiler.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,6 @@ def make_ttir(mod, metadata, opt):
120120

121121
@staticmethod
122122
def make_ttgir(mod, metadata, opt, device_arch):
123-
cluster_info = intel.ClusterInfo()
124-
if opt.cluster_dims is not None:
125-
cluster_info.clusterDimX = opt.cluster_dims[0]
126-
cluster_info.clusterDimY = opt.cluster_dims[1]
127-
cluster_info.clusterDimZ = opt.cluster_dims[2]
128123
# TTIR -> TTGIR
129124
pm = ir.pass_manager(mod.context)
130125
pm.enable_debug()
@@ -148,7 +143,6 @@ def make_ttgir(mod, metadata, opt, device_arch):
148143
passes.common.add_symbol_dce(pm)
149144
passes.common.add_canonicalizer(pm)
150145
pm.run(mod)
151-
metadata["cluster_dims"] = (cluster_info.clusterDimX, cluster_info.clusterDimY, cluster_info.clusterDimZ)
152146
return mod
153147

154148
@staticmethod

third_party/intel/lib/TritonIntelGPUToLLVM/ConvertLayoutOpToLLVM.cpp

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -369,89 +369,6 @@ struct ConvertLayoutOpConversion
369369
}
370370
}
371371

372-
LogicalResult
373-
lowerDistToDistWithDistSmem(triton::gpu::ConvertLayoutOp op,
374-
OpAdaptor adaptor,
375-
ConversionPatternRewriter &rewriter) const {
376-
auto loc = op.getLoc();
377-
auto typeConverter = getTypeConverter();
378-
auto srcTy = op.getSrc().getType();
379-
auto dstTy = op.getType();
380-
auto srcLayout = srcTy.getEncoding();
381-
auto dstLayout = dstTy.getEncoding();
382-
auto srcShapePerCTA = getShapePerCTA(srcTy);
383-
auto srcCTAsPerCGA = triton::gpu::getCTAsPerCGA(srcLayout);
384-
auto srcCTAOrder = triton::gpu::getCTAOrder(srcLayout);
385-
unsigned rank = srcShapePerCTA.size();
386-
387-
auto llvmElemTy = getTypeConverter()->convertType(dstTy.getElementType());
388-
auto elemPtrTy = ptr_ty(rewriter.getContext(), 3);
389-
390-
Value smemBase =
391-
LLVM::intel::getSharedMemoryBase(loc, rewriter, op.getOperation());
392-
smemBase = bitcast(smemBase, elemPtrTy);
393-
auto smemShape = convertType<unsigned, int64_t>(srcShapePerCTA);
394-
395-
// Store to local shared memory
396-
{
397-
auto inVals = unpackLLElements(loc, adaptor.getSrc(), rewriter);
398-
auto inIndices = ::intel::emitIndices(loc, rewriter, srcLayout, srcTy,
399-
/*withCTAOffset*/ false);
400-
401-
assert(inIndices.size() == inVals.size() &&
402-
"Unexpected number of indices emitted");
403-
404-
for (unsigned i = 0; i < inIndices.size(); ++i) {
405-
Value offset = linearize(rewriter, loc, inIndices[i], smemShape);
406-
Value ptr = gep(elemPtrTy, llvmElemTy, smemBase, offset);
407-
store(inVals[i], ptr);
408-
}
409-
}
410-
411-
// Cluster barrier
412-
rewriter.create<triton::nvidia_gpu::ClusterArriveOp>(loc, false);
413-
rewriter.create<triton::nvidia_gpu::ClusterWaitOp>(loc);
414-
415-
// Load from remote shared memory
416-
{
417-
SmallVector<Value> srcShapePerCTACache;
418-
for (unsigned i = 0; i < rank; ++i)
419-
srcShapePerCTACache.push_back(i32_val(srcShapePerCTA[i]));
420-
421-
SmallVector<Value> outVals;
422-
auto outIndices = ::intel::emitIndices(loc, rewriter, dstLayout, dstTy,
423-
/*withCTAOffset*/ true);
424-
425-
for (unsigned i = 0; i < outIndices.size(); ++i) {
426-
auto coord = outIndices[i];
427-
assert(coord.size() == rank && "Unexpected rank of index emitted");
428-
429-
SmallVector<Value> multiDimCTAId, localCoord;
430-
for (unsigned d = 0; d < rank; ++d) {
431-
multiDimCTAId.push_back(udiv(coord[d], srcShapePerCTACache[d]));
432-
localCoord.push_back(urem(coord[d], srcShapePerCTACache[d]));
433-
}
434-
435-
Value remoteCTAId =
436-
linearize(rewriter, loc, multiDimCTAId, srcCTAsPerCGA, srcCTAOrder);
437-
Value localOffset = linearize(rewriter, loc, localCoord, smemShape);
438-
439-
Value ptr = gep(elemPtrTy, llvmElemTy, smemBase, localOffset);
440-
outVals.push_back(load_dsmem(ptr, remoteCTAId, llvmElemTy));
441-
}
442-
443-
Value result =
444-
packLLElements(loc, getTypeConverter(), outVals, rewriter, dstTy);
445-
rewriter.replaceOp(op, result);
446-
}
447-
448-
// Cluster barrier
449-
rewriter.create<triton::nvidia_gpu::ClusterArriveOp>(loc, false);
450-
rewriter.create<triton::nvidia_gpu::ClusterWaitOp>(loc);
451-
452-
return success();
453-
}
454-
455372
// blocked/dpas -> blocked/dpas.
456373
// Data padding in shared memory to avoid bank conflict.
457374
LogicalResult
@@ -465,8 +382,6 @@ struct ConvertLayoutOpConversion
465382
Attribute srcLayout = srcTy.getEncoding();
466383
Attribute dstLayout = dstTy.getEncoding();
467384

468-
if (shouldUseDistSmem(srcLayout, dstLayout))
469-
return lowerDistToDistWithDistSmem(op, adaptor, rewriter);
470385
Value smemBase =
471386
LLVM::intel::getSharedMemoryBase(loc, rewriter, op.getOperation());
472387
auto elemPtrTy = ptr_ty(rewriter.getContext(), 3);

third_party/intel/triton_xpu.cc

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,22 +62,6 @@ void init_triton_intel(py::module &&m) {
6262
auto passes = m.def_submodule("passes");
6363
init_triton_intel_passes_ttgpuir(passes.def_submodule("ttgpuir"));
6464

65-
// cluster info
66-
py::class_<mlir::triton::nvidia_gpu::ClusterInfo>(m, "ClusterInfo")
67-
.def(py::init<>())
68-
.def_readwrite("clusterDimX",
69-
&mlir::triton::nvidia_gpu::ClusterInfo::clusterDimX)
70-
.def_readwrite("clusterDimY",
71-
&mlir::triton::nvidia_gpu::ClusterInfo::clusterDimY)
72-
.def_readwrite("clusterDimZ",
73-
&mlir::triton::nvidia_gpu::ClusterInfo::clusterDimZ)
74-
.def("__repr__", [](mlir::triton::nvidia_gpu::ClusterInfo &self) {
75-
std::ostringstream oss;
76-
oss << "(" << self.clusterDimX << ", " << self.clusterDimY << ", "
77-
<< self.clusterDimZ << ")";
78-
return oss.str();
79-
});
80-
8165
// load dialects
8266
m.def("load_dialects", [](mlir::MLIRContext &context) {
8367
mlir::DialectRegistry registry;

0 commit comments

Comments
 (0)