Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions shardy/dialect/sdy/ir/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ cc_library(
":op_interface_inc",
":ops_inc",
"//shardy/common:logging",
"//shardy/dialect/sdy/transforms/common:macros",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:BytecodeOpInterface",
"@llvm-project//mlir:FuncDialect",
Expand Down
5 changes: 2 additions & 3 deletions shardy/dialect/sdy/ir/axis_list_ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,8 @@ class AxisListRef {
// TODO(enver): Move this method to utilities.
// TODO(enver): Instead make this a method of AxisRefAttr, after moving
// AxesWithTail to a general data structure in Shardy.
// TODO(enver): Reuse getPrefixOfInputWithout method on
// shardy/dialect/sdy/transforms/propagation/basic_factor_propagation.cc,
// instead, after an iterater is added.
// TODO(enver): Reuse getPrefixWithoutOverlap method in
// shardy/dialect/sdy/ir/utils.h, after an iterator is added.
std::optional<AxisRefAttr> getPrefixOfInputWithoutOverlap(
AxisRefAttr axisRef) const;

Expand Down
12 changes: 12 additions & 0 deletions shardy/dialect/sdy/ir/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ limitations under the License.
#include "mlir/Support/LLVM.h"
#include "shardy/dialect/sdy/ir/constants.h"
#include "shardy/dialect/sdy/ir/dialect.h"
#include "shardy/dialect/sdy/transforms/common/macros.h"

namespace mlir {
namespace sdy {
Expand Down Expand Up @@ -631,5 +632,16 @@ bool isUsedBy(Value value, Operation* user) {
});
}

// TODO(enver): Use it in AxisListRef methods.
std::optional<AxisRefAttr> getPrefixWithoutOverlap(
AxisRefAttr axisRef, ArrayRef<AxisRefAttr> otherAxisRefs) {
AxisRefAttr result = axisRef;
for (AxisRefAttr otherAxisRef : otherAxisRefs) {
SDY_ASSIGN_OR_RETURN_IF_NULLOPT(
result, result.getPrefixWithoutOverlap(otherAxisRef));
}
return result;
}

} // namespace sdy
} // namespace mlir
5 changes: 5 additions & 0 deletions shardy/dialect/sdy/ir/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,11 @@ class AddAxisOrMergeInserter {
// Returns true if `value` is used by `user`.
bool isUsedBy(Value value, Operation* user);

// Returns the largest prefix of `axisRef` that does not overlap with any axes
// in `otherAxisRefs`.
std::optional<AxisRefAttr> getPrefixWithoutOverlap(
AxisRefAttr axisRef, ArrayRef<AxisRefAttr> otherAxisRefs);

} // namespace sdy
} // namespace mlir

Expand Down
16 changes: 6 additions & 10 deletions shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -671,22 +671,19 @@ int64_t findTensorIndexToPreferOnUnaryOperation(
//
// Guarantees to return a non-empty AxesPerFactor.
AxesPerFactor findCommonAxesOnUnaryOperation(
ArrayRef<TensorShardingAttr> inShardings,
ArrayRef<TensorShardingAttr> outShardings,
const ShardingProjection& shardingProjection,
OpShardingRuleAttr shardingRule, ArrayRef<int64_t> tensorSizes,
const SymbolTable& symbolTable, const Mesh& mesh) {
const Mesh& mesh) {
int64_t tensorIndexToPrefer = findTensorIndexToPreferOnUnaryOperation(
shardingProjection, shardingRule, tensorSizes, mesh);

// Set factor shardings to make sure factors that do not appear in the
// preferred tensor are sharded on the other tensor.
AxesPerFactor factorAxisRefs(shardingRule.getNumFactors());
// TODO(enver): Add and use forEachFactorSharding helper method.
for (const auto& [tensorIndex, tensorFactorSharding] :
llvm::enumerate(llvm::concat<const TensorFactorShardings>(
shardingProjection.getOperands(),
shardingProjection.getResults()))) {
for (const TensorFactorShardings& tensorFactorSharding :
llvm::concat<const TensorFactorShardings>(
shardingProjection.getOperands(), shardingProjection.getResults())) {
for (const auto& [factorIndex, factorSharding] :
tensorFactorSharding.factorIndexToSharding) {
if (!factorSharding.axisRefs.empty()) {
Expand Down Expand Up @@ -764,9 +761,8 @@ AxesPerFactor findCommonAxes(ArrayRef<TensorShardingAttr> inShardings,
if (shardingRule.getNonScalarTensorIndices().size() == 2 &&
shardingRule.getNeedReplicationFactors().empty() &&
!shardingRule.hasDimensionsWithMultipleFactors()) {
return findCommonAxesOnUnaryOperation(inShardings, outShardings,
shardingProjection, shardingRule,
tensorSizes, symbolTable, mesh);
return findCommonAxesOnUnaryOperation(shardingProjection, shardingRule,
tensorSizes, mesh);
}

AxesPerFactor factorCommonAxes = findCommonAxesUsingMajorityVoteHeuristic(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ ArrayRef<AxisRefAttr> getUnreducedAxes(Value value);
SmallVector<int64_t> getTensorSizes(Operation* op);

// Returns reduction axes that are the union of all axes on reduction factors.
// The result axes are not necessarilly canonicalized.
// The result axes are not necessarily canonicalized.
SmallVector<AxisRefAttr> getReductionAxes(const AxesPerFactor& axesPerFactor,
OpShardingRuleAttr shardingRule);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ limitations under the License.

#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Threading.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/Value.h"
#include "mlir/Support/LLVM.h"
Expand All @@ -41,23 +39,6 @@ limitations under the License.
namespace mlir {
namespace sdy {

namespace {

// Returns the largest prefix of `axisRef` that does not overlap with any axes
// in `otherAxisRefs`.
// TODO(enver): Move to ir/utils and use in AxisListRef methods.
std::optional<AxisRefAttr> getPrefixWithoutOverlap(
AxisRefAttr axisRef, ArrayRef<AxisRefAttr> otherAxisRefs) {
AxisRefAttr result = axisRef;
for (AxisRefAttr otherAxisRef : otherAxisRefs) {
SDY_ASSIGN_OR_RETURN_IF_NULLOPT(
result, result.getPrefixWithoutOverlap(otherAxisRef));
}
return result;
}

} // namespace

std::optional<AxisRefAttr>
BasicFactorPropagation::compatiblePrefixNoConflictsAcrossFactors(
AxisRefAttr axisRef, const FactorIndexToSharding& factorIndexToSharding,
Expand Down
Loading