Skip to content

Commit f36f51a

Browse files
committed
[Storage] Add --no-attribute-reordering CLI option.
1 parent b09ddc7 commit f36f51a

File tree

1 file changed

+56
-20
lines changed

1 file changed

+56
-20
lines changed

src/storage/DataLayoutFactory.cpp

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,53 @@ using namespace m;
1010
using namespace m::storage;
1111

1212

13+
namespace m {
14+
15+
namespace options {
16+
17+
/** Whether to reorder attributes when creating data layouts. */
18+
bool no_attribute_reordering = false;
19+
20+
}
21+
22+
}
23+
24+
namespace {
25+
26+
__attribute__((constructor(201)))
27+
static void add_storage_args()
28+
{
29+
Catalog &C = Catalog::Get();
30+
31+
/*----- Command-line arguments -----*/
32+
C.arg_parser().add<bool>(
33+
/* group= */ "Storage",
34+
/* short= */ nullptr,
35+
/* long= */ "--no-attribute-reordering",
36+
/* description= */ "do not reorder attributes when creating data layouts, e.g. to minimize padding",
37+
/* callback= */ [](bool){ options::no_attribute_reordering = true; }
38+
);
39+
}
40+
41+
}
42+
43+
44+
/** Computes the order for attributes of types \p types and returns this permutation as array of indices. Attributes
45+
* are reordered by their alignment requirement to minimize padding except the CLI option `--no-attribute-reordering`
46+
* is set. */
1347
std::unique_ptr<std::size_t[]>
14-
sorted_by_alignment(const std::vector<const Type*> &types)
48+
compute_attribute_order(const std::vector<const Type*> &types)
1549
{
1650
/*----- Collect all indices. -----*/
1751
auto indices = std::make_unique<std::size_t[]>(types.size());
1852
std::iota(indices.get(), indices.get() + types.size(), 0);
1953

20-
/*----- Sort indices by alignment. -----*/
21-
std::stable_sort(indices.get(), indices.get() + types.size(), [&](std::size_t left, std::size_t right) {
22-
return types[left]->alignment() > types[right]->alignment();
23-
});
54+
if (not options::no_attribute_reordering) {
55+
/*----- Sort indices by alignment. -----*/
56+
std::stable_sort(indices.get(), indices.get() + types.size(), [&](std::size_t left, std::size_t right) {
57+
return types[left]->alignment() > types[right]->alignment();
58+
});
59+
}
2460

2561
return indices;
2662
}
@@ -29,18 +65,18 @@ DataLayout RowLayoutFactory::make(std::vector<const Type*> types, std::size_t nu
2965
{
3066
M_insist(not types.empty(), "cannot make layout for zero types");
3167

32-
auto indices = sorted_by_alignment(types);
68+
auto indices = compute_attribute_order(types);
3369
uint64_t offsets[types.size()]; // in bits
3470

3571
/*----- Compute offsets. -----*/
3672
uint64_t offset_in_bits = 0;
3773
uint64_t alignment_in_bits = 8;
3874

3975
for (std::size_t idx = 0; idx != types.size(); ++idx) {
40-
const auto sorted_idx = indices[idx];
41-
offsets[sorted_idx] = offset_in_bits;
42-
offset_in_bits += types[sorted_idx]->size();
43-
alignment_in_bits = std::max(alignment_in_bits, types[sorted_idx]->alignment());
76+
const auto mapped_idx = indices[idx];
77+
offsets[mapped_idx] = offset_in_bits;
78+
offset_in_bits += types[mapped_idx]->size();
79+
alignment_in_bits = std::max(alignment_in_bits, types[mapped_idx]->alignment());
4480
}
4581

4682
const uint64_t null_bitmap_offset = offset_in_bits;
@@ -70,7 +106,7 @@ DataLayout PAXLayoutFactory::make(std::vector<const Type*> types, std::size_t nu
70106
{
71107
M_insist(not types.empty(), "cannot make layout for zero types");
72108

73-
auto indices = sorted_by_alignment(types);
109+
auto indices = compute_attribute_order(types);
74110
uint64_t offsets[types.size() + 1]; // in bits
75111

76112
/*----- Compute attribute offsets in a virtual row. -----*/
@@ -79,11 +115,11 @@ DataLayout PAXLayoutFactory::make(std::vector<const Type*> types, std::size_t nu
79115
std::size_t num_not_byte_aligned = 0;
80116

81117
for (std::size_t idx = 0; idx != types.size(); ++idx) {
82-
const auto sorted_idx = indices[idx];
83-
offsets[sorted_idx] = offset_in_bits;
84-
offset_in_bits += types[sorted_idx]->size();
85-
alignment_in_bits = std::max(alignment_in_bits, types[sorted_idx]->alignment());
86-
if (types[sorted_idx]->size() % 8)
118+
const auto mapped_idx = indices[idx];
119+
offsets[mapped_idx] = offset_in_bits;
120+
offset_in_bits += types[mapped_idx]->size();
121+
alignment_in_bits = std::max(alignment_in_bits, types[mapped_idx]->alignment());
122+
if (types[mapped_idx]->size() % 8)
87123
++num_not_byte_aligned;
88124
}
89125

@@ -109,10 +145,10 @@ DataLayout PAXLayoutFactory::make(std::vector<const Type*> types, std::size_t nu
109145
/*----- Compute column offsets. -----*/
110146
uint64_t running_padding = 0;
111147
for (std::size_t idx = 0; idx != types.size(); ++idx) {
112-
const auto sorted_idx = indices[idx];
113-
offsets[sorted_idx] = offsets[sorted_idx] * num_rows_per_block + running_padding;
114-
M_insist(offsets[sorted_idx] % 8 == 0, "attribute column must be byte aligned");
115-
if (uint64_t bit_offset = (types[sorted_idx]->size() * num_rows_per_block) % 8; bit_offset)
148+
const auto mapped_idx = indices[idx];
149+
offsets[mapped_idx] = offsets[mapped_idx] * num_rows_per_block + running_padding;
150+
M_insist(offsets[mapped_idx] % 8 == 0, "attribute column must be byte aligned");
151+
if (uint64_t bit_offset = (types[mapped_idx]->size() * num_rows_per_block) % 8; bit_offset)
116152
running_padding += 8UL - bit_offset;
117153
}
118154
offsets[types.size()] = offsets[types.size()] * num_rows_per_block + running_padding;

0 commit comments

Comments
 (0)