Skip to content

Commit db646f8

Browse files
committed
First draft of f16 library
1 parent c5e9223 commit db646f8

File tree

12 files changed

+131
-27
lines changed

12 files changed

+131
-27
lines changed

compiler-rt/include/profile/MemProfData.inc

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,7 @@ void Merge(const MemInfoBlock &newMIB) {
218218
ShorterHistogramSize = newMIB.AccessHistogramSize;
219219
}
220220
for (size_t i = 0; i < ShorterHistogramSize; ++i) {
221-
// Cast to uint8_t* and cap the sum at 255 to prevent overflow
222-
uint8_t *CurrentHistPtr = (uint8_t *)AccessHistogram;
223-
uint8_t *ShorterHistPtr = (uint8_t *)ShorterHistogram;
224-
uint32_t sum = CurrentHistPtr[i] + ShorterHistPtr[i];
225-
CurrentHistPtr[i] = (sum > 255) ? 255 : (uint8_t)sum;
221+
((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
226222
}
227223
}
228224

@@ -232,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) {
232228
} __attribute__((__packed__));
233229
#endif
234230

231+
constexpr int MantissaBits = 12;
232+
constexpr int ExponentBits = 4;
233+
constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1;
234+
constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1;
235+
236+
// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format.
237+
inline uint16_t encodeHistogramCount(uint64_t Count) {
238+
if (Count == 0)
239+
return 0;
240+
241+
const uint64_t MaxRepresentableValue = static_cast<uint64_t>(MaxMantissa)
242+
<< MaxExponent;
243+
if (Count > MaxRepresentableValue)
244+
Count = MaxRepresentableValue;
245+
246+
if (Count <= MaxMantissa)
247+
return Count;
248+
249+
uint64_t M = Count;
250+
uint16_t E = 0;
251+
while (M > MaxMantissa) {
252+
M = (M + 1) >> 1;
253+
E++;
254+
}
255+
return (E << MantissaBits) | static_cast<uint16_t>(M);
256+
}
257+
258+
// Decodes a 16-bit scaled integer and returns the
259+
// decoded 64-bit unsigned integer.
260+
inline uint64_t decodeHistogramCount(uint16_t EncodedValue) {
261+
const uint16_t E = EncodedValue >> MantissaBits;
262+
const uint16_t M = EncodedValue & MaxMantissa;
263+
return static_cast<uint64_t>(M) << E;
264+
}
265+
235266
} // namespace memprof
236267
} // namespace llvm
237268

compiler-rt/lib/memprof/memprof_allocator.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
7777
? MAX_HISTOGRAM_PRINT_SIZE
7878
: M.AccessHistogramSize;
7979
for (size_t i = 0; i < PrintSize; ++i) {
80-
Printf("%u ", ((uint8_t *)M.AccessHistogram)[i]);
80+
Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
8181
}
8282
Printf("\n");
8383
}
@@ -327,14 +327,12 @@ struct Allocator {
327327
uint32_t HistogramSize =
328328
RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
329329
uintptr_t Histogram =
330-
(uintptr_t)InternalAlloc(HistogramSize * sizeof(uint8_t));
331-
memset((void *)Histogram, 0, HistogramSize * sizeof(uint8_t));
330+
(uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
331+
memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
332332
for (size_t i = 0; i < HistogramSize; ++i) {
333333
u8 Counter =
334334
*((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
335-
// Cap the counter at HISTOGRAM_MAX_COUNTER (255) to prevent overflow
336-
((uint8_t *)Histogram)[i] =
337-
(Counter > HISTOGRAM_MAX_COUNTER) ? HISTOGRAM_MAX_COUNTER : Counter;
335+
((uint64_t *)Histogram)[i] = (uint64_t)Counter;
338336
}
339337
MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
340338
GetCpuId(), Histogram, HistogramSize);

compiler-rt/lib/memprof/memprof_rawprofile.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ using ::__sanitizer::Vector;
1919
using ::llvm::memprof::MemInfoBlock;
2020
using SegmentEntry = ::llvm::memprof::SegmentEntry;
2121
using Header = ::llvm::memprof::Header;
22+
using ::llvm::memprof::encodeHistogramCount;
2223

2324
namespace {
2425
template <class T> char *WriteBytes(const T &Pod, char *Buffer) {
@@ -169,14 +170,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
169170
// FIXME: We unnecessarily serialize the AccessHistogram pointer. Adding a
170171
// serialization schema will fix this issue. See also FIXME in
171172
// deserialization.
172-
Ptr = WriteBytes((*h)->mib, Ptr);
173-
for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) {
174-
// Read as uint8_t and write as uint8_t
175-
uint8_t HistogramEntry = ((uint8_t *)((*h)->mib.AccessHistogram))[j];
173+
auto &MIB = (*h)->mib;
174+
Ptr = WriteBytes(MIB, Ptr);
175+
for (u64 j = 0; j < MIB.AccessHistogramSize; ++j) {
176+
u16 HistogramEntry =
177+
encodeHistogramCount(((u64 *)(MIB.AccessHistogram))[j]);
176178
Ptr = WriteBytes(HistogramEntry, Ptr);
177179
}
178-
if ((*h)->mib.AccessHistogramSize > 0) {
179-
InternalFree((void *)((*h)->mib.AccessHistogram));
180+
if (MIB.AccessHistogramSize > 0) {
181+
InternalFree((void *)MIB.AccessHistogram);
180182
}
181183
}
182184
CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
@@ -250,7 +252,7 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
250252
},
251253
reinterpret_cast<void *>(&TotalAccessHistogramEntries));
252254
const u64 NumHistogramBytes =
253-
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint8_t), 8);
255+
RoundUpTo(TotalAccessHistogramEntries * sizeof(uint16_t), 8);
254256

255257
const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);
256258

compiler-rt/lib/memprof/tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set(MEMPROF_SOURCES
2626
../memprof_rawprofile.cpp)
2727

2828
set(MEMPROF_UNITTESTS
29+
histogram_encoding.cpp
2930
rawprofile.cpp
3031
driver.cpp)
3132

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#include <cstdint>
2+
#include <vector>
3+
4+
#include "profile/MemProfData.inc"
5+
#include "gtest/gtest.h"
6+
7+
namespace llvm {
8+
namespace memprof {
9+
namespace {
10+
TEST(MemProf, F16EncodeDecode) {
11+
const std::vector<uint64_t> TestCases = {
12+
0, 100, 4095, 4096, 5000, 8191, 65535, 1000000, 134213640, 200000000,
13+
};
14+
15+
for (const uint64_t TestCase : TestCases) {
16+
const uint16_t Encoded = encodeHistogramCount(TestCase);
17+
const uint64_t Decoded = decodeHistogramCount(Encoded);
18+
19+
const uint64_t MaxRepresentable = static_cast<uint64_t>(MaxMantissa)
20+
<< MaxExponent;
21+
22+
if (TestCase >= MaxRepresentable) {
23+
EXPECT_EQ(Decoded, MaxRepresentable);
24+
} else if (TestCase == 0) {
25+
EXPECT_EQ(Decoded, TestCase);
26+
} else {
27+
// The decoded value should be close to the original value.
28+
// The error should be less than 1/1024 for larger numbers.
29+
EXPECT_NEAR(Decoded, TestCase, static_cast<double>(TestCase) / 1024.0);
30+
}
31+
}
32+
}
33+
} // namespace
34+
} // namespace memprof
35+
} // namespace llvm

compiler-rt/test/memprof/TestCases/memprof_histogram_uint8.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
// Test the histogram support in memprof using the text format output.
2+
// Shadow memory counters per object are limited to 8b. In memory counters
3+
// aggregating counts across multiple objects are 64b.
4+
15
// RUN: %clangxx_memprof -O0 -mllvm -memprof-histogram -mllvm -memprof-use-callbacks=true %s -o %t && %env_memprof_opts=print_text=1:histogram=1:log_path=stdout %run %t 2>&1 | FileCheck %s
26

37
#include <stdio.h>

llvm/include/llvm/ProfileData/MemProfData.inc

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,7 @@ void Merge(const MemInfoBlock &newMIB) {
218218
ShorterHistogramSize = newMIB.AccessHistogramSize;
219219
}
220220
for (size_t i = 0; i < ShorterHistogramSize; ++i) {
221-
// Cast to uint8_t* and cap the sum at 255 to prevent overflow
222-
uint8_t *CurrentHistPtr = (uint8_t *)AccessHistogram;
223-
uint8_t *ShorterHistPtr = (uint8_t *)ShorterHistogram;
224-
uint32_t sum = CurrentHistPtr[i] + ShorterHistPtr[i];
225-
CurrentHistPtr[i] = (sum > 255) ? 255 : (uint8_t)sum;
221+
((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
226222
}
227223
}
228224

@@ -232,6 +228,41 @@ void Merge(const MemInfoBlock &newMIB) {
232228
} __attribute__((__packed__));
233229
#endif
234230

231+
constexpr int MantissaBits = 12;
232+
constexpr int ExponentBits = 4;
233+
constexpr uint16_t MaxMantissa = (1U << MantissaBits) - 1;
234+
constexpr uint16_t MaxExponent = (1U << ExponentBits) - 1;
235+
236+
// Encodes a 64-bit unsigned integer into a 16-bit scaled integer format.
237+
inline uint16_t encodeHistogramCount(uint64_t Count) {
238+
if (Count == 0)
239+
return 0;
240+
241+
const uint64_t MaxRepresentableValue = static_cast<uint64_t>(MaxMantissa)
242+
<< MaxExponent;
243+
if (Count > MaxRepresentableValue)
244+
Count = MaxRepresentableValue;
245+
246+
if (Count <= MaxMantissa)
247+
return Count;
248+
249+
uint64_t M = Count;
250+
uint16_t E = 0;
251+
while (M > MaxMantissa) {
252+
M = (M + 1) >> 1;
253+
E++;
254+
}
255+
return (E << MantissaBits) | static_cast<uint16_t>(M);
256+
}
257+
258+
// Decodes a 16-bit scaled integer and returns the
259+
// decoded 64-bit unsigned integer.
260+
inline uint64_t decodeHistogramCount(uint16_t EncodedValue) {
261+
const uint16_t E = EncodedValue >> MantissaBits;
262+
const uint16_t M = EncodedValue & MaxMantissa;
263+
return static_cast<uint64_t>(M) << E;
264+
}
265+
235266
} // namespace memprof
236267
} // namespace llvm
237268

llvm/lib/ProfileData/MemProfReader.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,12 @@ readMemInfoBlocksV5(const char *Ptr) {
217217
MIB.AccessHistogram =
218218
(uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t));
219219
for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) {
220-
// The on-disk format for V5 uses uint8_t.
221-
const uint8_t Val =
222-
endian::readNext<uint8_t, llvm::endianness::little, unaligned>(Ptr);
223-
((uint64_t *)MIB.AccessHistogram)[J] = Val;
220+
// The on-disk format for V5 uses uint16_t which is then decoded to
221+
// uint64_t.
222+
const uint16_t Val =
223+
endian::readNext<uint16_t, llvm::endianness::little, unaligned>(
224+
Ptr);
225+
((uint64_t *)MIB.AccessHistogram)[J] = decodeHistogramCount(Val);
224226
}
225227
}
226228
Items.push_back({Id, MIB});
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)