Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ To design custom accelerators for your application, use Vitis library functions
Combine domain-specific Vitis libraries with pre-optimized deep learning models from the Vitis AI library or the Vitis AI development kit to accelerate your whole application and meet the overall system-level functionality and performance goals.

![Scalable and Flexible Library Functions](https://xilinx.github.io/Vitis_Libraries/_images/1568760747007.png)

# Support & Contact
To report any issues or request support, please post your question to the Vitis section of the [Adaptive SoC & FPGA Community Forums](https://adaptivesupport.amd.com/s/topic/0TO2E000000YKXhWAO/vitis?language=en_US)
36 changes: 20 additions & 16 deletions codec/L2/demos/webpEnc/host/src/dec/vp8l.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,10 @@ static int ReadHuffmanCodeLengths(VP8LDecoder* const dec,
int symbol;
int max_symbol;
int prev_code_len = DEFAULT_CODE_LENGTH;
HuffmanCode table[1 << LENGTHS_TABLE_BITS];
HuffmanTables tables;

if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS, code_length_code_lengths, NUM_CODE_LENGTH_CODES)) {
if (!!VP8LHuffmanTablesAllocate(1 << LENGTHS_TABLE_BITS, &tables) ||
!VP8LBuildHuffmanTable(&tables, LENGTHS_TABLE_BITS, code_length_code_lengths, NUM_CODE_LENGTH_CODES)) {
goto End;
}

Expand All @@ -236,7 +237,7 @@ static int ReadHuffmanCodeLengths(VP8LDecoder* const dec,
int code_len;
if (max_symbol-- == 0) break;
VP8LFillBitWindow(br);
p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
p = &tables.curr_segment->start[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
VP8LSetBitPos(br, br->bit_pos_ + p->bits);
code_len = p->value;
if (code_len < kCodeLengthLiterals) {
Expand All @@ -259,6 +260,7 @@ static int ReadHuffmanCodeLengths(VP8LDecoder* const dec,
ok = 1;

End:
VP8LHuffmanTablesDeallocate(&tables);
if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
return ok;
}
Expand All @@ -268,7 +270,7 @@ static int ReadHuffmanCodeLengths(VP8LDecoder* const dec,
static int ReadHuffmanCode(int alphabet_size,
VP8LDecoder* const dec,
int* const code_lengths,
HuffmanCode* const table) {
HuffmanTables* const table) {
int ok = 0;
int size = 0;
VP8LBitReader* const br = &dec->br_;
Expand Down Expand Up @@ -320,13 +322,16 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, int co
VP8LMetadata* const hdr = &dec->hdr_;
uint32_t* huffman_image = NULL;
HTreeGroup* htree_groups = NULL;
HuffmanCode* huffman_tables = NULL;
HuffmanCode* next = NULL;
HuffmanTables* huffman_tables = &hdr->huffman_tables_;
int num_htree_groups = 1;
int max_alphabet_size = 0;
int* code_lengths = NULL;
const int table_size = kTableSize[color_cache_bits];

// Check the table has been 0 initialized (through InitMetadata).
assert(huffman_tables->root.start == NULL);
assert(huffman_tables->curr_segment == NULL);

if (allow_recursion && VP8LReadBits(br, 1)) {
// use meta Huffman codes.
const int huffman_precision = VP8LReadBits(br, 3) + 2;
Expand Down Expand Up @@ -360,16 +365,14 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, int co
}
}

huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size, sizeof(*huffman_tables));
htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, sizeof(*code_lengths));

if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
if (htree_groups == NULL || code_lengths == NULL || !VP8LHuffmanTablesAllocate(num_htree_groups * table_size, huffman_tables)) {
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
goto Error;
}

next = huffman_tables;
for (i = 0; i < num_htree_groups; ++i) {
HTreeGroup* const htree_group = &htree_groups[i];
HuffmanCode** const htrees = htree_group->htrees;
Expand All @@ -383,15 +386,17 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, int co
if (j == 0 && color_cache_bits > 0) {
alphabet_size += 1 << color_cache_bits;
}
size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
size =
ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables);
htrees[j] = huffman_tables->curr_segment->curr_table;
if (size == 0) {
goto Error;
}
if (is_trivial_literal && kLiteralMap[j] == 1) {
is_trivial_literal = (next->bits == 0);
is_trivial_literal = (htrees[j]->bits == 0);
}
total_size += next->bits;
next += size;
total_size += htrees[j]->bits;
huffman_tables->curr_segment->curr_table += size;
if (j <= ALPHA) {
int local_max_bits = code_lengths[0];
int k;
Expand Down Expand Up @@ -424,7 +429,6 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, int co
hdr->huffman_image_ = huffman_image;
hdr->num_htree_groups_ = num_htree_groups;
hdr->htree_groups_ = htree_groups;
hdr->huffman_tables_ = huffman_tables;
return 1;

Error:
Expand Down Expand Up @@ -1215,7 +1219,7 @@ static void ClearMetadata(VP8LMetadata* const hdr) {
assert(hdr != NULL);

WebPSafeFree(hdr->huffman_image_);
WebPSafeFree(hdr->huffman_tables_);
VP8LHuffmanTablesDeallocate(&hdr->huffman_tables_);
VP8LHtreeGroupsFree(hdr->htree_groups_);
VP8LColorCacheClear(&hdr->color_cache_);
VP8LColorCacheClear(&hdr->saved_color_cache_);
Expand Down Expand Up @@ -1537,7 +1541,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
// Sanity checks.
if (dec == NULL) return 0;

assert(dec->hdr_.huffman_tables_ != NULL);
assert(dec->hdr_.huffman_tables_.root.start != NULL);
assert(dec->hdr_.htree_groups_ != NULL);
assert(dec->hdr_.num_htree_groups_ > 0);

Expand Down
2 changes: 1 addition & 1 deletion codec/L2/demos/webpEnc/host/src/dec/vp8li.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ typedef struct {
uint32_t* huffman_image_;
int num_htree_groups_;
HTreeGroup* htree_groups_;
HuffmanCode* huffman_tables_;
HuffmanTables huffman_tables_;
} VP8LMetadata;

typedef struct VP8LDecoder VP8LDecoder;
Expand Down
48 changes: 42 additions & 6 deletions codec/L2/demos/webpEnc/host/src/utils/huffman.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,17 +175,21 @@ int VP8LBuildHuffmanTable(HuffmanCode* const root_table,
for (; count[len] > 0; --count[len]) {
HuffmanCode code;
if ((key & mask) != low) {
table += table_size;
if (root_table != NULL) table += table_size;
table_bits = NextTableBitSize(count, len, root_bits);
table_size = 1 << table_bits;
total_size += table_size;
low = key & mask;
root_table[low].bits = (uint8_t)(table_bits + root_bits);
root_table[low].value = (uint16_t)((table - root_table) - low);
if (root_table != NULL) {
root_table[low].bits = (uint8_t)(table_bits + root_bits);
root_table[low].value = (uint16_t)((table - root_table) - low);
}
}
if (root_table != NULL) {
code.bits = (uint8_t)(len - root_bits);
code.value = (uint16_t)sorted[symbol++];
ReplicateValue(&table[key >> root_bits], step, table_size, code);
}
code.bits = (uint8_t)(len - root_bits);
code.value = (uint16_t)sorted[symbol++];
ReplicateValue(&table[key >> root_bits], step, table_size, code);
key = GetNextKey(key, len);
}
}
Expand All @@ -200,3 +204,35 @@ int VP8LBuildHuffmanTable(HuffmanCode* const root_table,
WebPSafeFree(sorted);
return total_size;
}

int VP8LHuffmanTablesAllocate(int size, HuffmanTables* huffman_tables) {
// Have 'segment' point to the first segment for now, 'root'.
HuffmanTablesSegment* const root = &huffman_tables->root;
huffman_tables->curr_segment = root;
// Allocate root.
root->start = (HuffmanCode*)WebPSafeMalloc(size, sizeof(*root->start));
if (root->start == NULL) return 0;
root->curr_table = root->start;
root->next = NULL;
root->size = size;
return 1;
}

void VP8LHuffmanTablesDeallocate(HuffmanTables* const huffman_tables) {
HuffmanTablesSegment *current, *next;
if (huffman_tables == NULL) return;
// Free the root node.
current = &huffman_tables->root;
next = current->next;
WebPSafeFree(current->start);
current->start = NULL;
current->next = NULL;
current = next;
// Free the following nodes.
while (current != NULL) {
next = current->next;
WebPSafeFree(current->start);
WebPSafeFree(current);
current = next;
}
}
25 changes: 24 additions & 1 deletion codec/L2/demos/webpEnc/host/src/utils/huffman.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,29 @@ typedef struct {
// or non-literal symbol otherwise
} HuffmanCode32;

// Contiguous memory segment of HuffmanCodes.
typedef struct HuffmanTablesSegment {
HuffmanCode* start;
// Pointer to where we are writing into the segment. Starts at 'start' and
// cannot go beyond 'start' + 'size'.
HuffmanCode* curr_table;
// Pointer to the next segment in the chain.
struct HuffmanTablesSegment* next;
int size;
} HuffmanTablesSegment;

// Chained memory segments of HuffmanCodes.
typedef struct HuffmanTables {
HuffmanTablesSegment root;
// Currently processed segment. At first, this is 'root'.
HuffmanTablesSegment* curr_segment;
} HuffmanTables;

// Allocates a HuffmanTables with 'size' contiguous HuffmanCodes. Returns 0 on
// memory allocation error, 1 otherwise.
int VP8LHuffmanTablesAllocate(int size, HuffmanTables* huffman_tables);
void VP8LHuffmanTablesDeallocate(HuffmanTables* const huffman_tables);

#define HUFFMAN_PACKED_BITS 6
#define HUFFMAN_PACKED_TABLE_SIZE (1u << HUFFMAN_PACKED_BITS)

Expand Down Expand Up @@ -77,7 +100,7 @@ void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups);
// the huffman table.
// Returns built table size or 0 in case of error (invalid tree or
// memory error).
int VP8LBuildHuffmanTable(HuffmanCode* const root_table,
int VP8LBuildHuffmanTable(HuffmanTables* const root_table,
int root_bits,
const int code_lengths[],
int code_lengths_size);
Expand Down
6 changes: 3 additions & 3 deletions dsp/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@Library('pipeline-library')_

VitisLibPipeline (branch: 'next', libname: 'xf_dsp', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim:vitis_aie_x86sim',
upstream_dependencies: 'xf_utils_hw,next,../utils; xf_data_mover,next,../data_mover; dsplib_internal_scripts,main,../dsplib_internal_scripts',
devtest: 'RunDeploy.sh', TOOLVERSION: '2024.2_stable_latest',
VitisLibPipeline (branch: 'main', libname: 'xf_dsp', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim:vitis_aie_x86sim',
upstream_dependencies: 'xf_utils_hw,main,../utils; xf_data_mover,main,../data_mover; dsplib_internal_scripts,main,../dsplib_internal_scripts',
devtest: 'RunDeploy.sh', TOOLVERSION: '2024.2_released',
email: '[email protected]',
post_launch: '../dsplib_internal_scripts/scripts/jenkins/post_launch_wrapper.sh |& tee -a reporting_log.txt')
11 changes: 2 additions & 9 deletions dsp/L1/include/aie/fir_tdm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,7 @@ class kernelFilterClass {
#if __HAS_ACCUM_PERMUTES__ == 1
// cint16/int16 combo can be overloaded with 2 column MUL/MACs.
static constexpr unsigned int columnMultiple =
(std::is_same<TT_DATA, cint16>::value && std::is_same<TT_COEFF, int16>::value) &&
(TP_TDM_CHANNELS > m_kVOutSize) && (TP_TDM_CHANNELS % (2 * m_kVOutSize) == 0)
? 2
: 1;
(std::is_same<TT_DATA, cint16>::value && std::is_same<TT_COEFF, int16>::value) ? 2 : 1;
static constexpr unsigned int coeffToDataMultiple = 1;
#else
static constexpr unsigned int columnMultiple = 1;
Expand Down Expand Up @@ -256,11 +253,7 @@ class kernelFilterClass {
// Operate on multiple frames in parallel, when possible.
// Optimized to reduce data loads, handy when 512-bits of data and 256-bits of coeffs are needed on each clock
// cycle.
static constexpr unsigned int useEvenFrames =
(TP_NUM_FRAMES % 2 == 0 && columnMultiple == 2 && TP_TDM_CHANNELS > m_kVOutSize &&
TP_TDM_CHANNELS % kSamplesInVectData == 0)
? 1
: 0;
static constexpr unsigned int useEvenFrames = (TP_NUM_FRAMES % 2 == 0 && columnMultiple == 2) ? 1 : 0;
// TDM FIR Margin = (TP_FIR_LEN-1)*TP_TDM_CHANNELS
// or set to 0, if handled with internal buffer.
static constexpr unsigned int enableInternalMargin = __HAS_ACCUM_PERMUTES__ ? 1 : 0;
Expand Down
48 changes: 30 additions & 18 deletions dsp/L1/src/aie/fir_tdm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,6 @@ INLINE_DECL void kernelFilterClass<TT_DATA,
// Rewind by
inRdItr -= (TP_FIR_RANGE_LEN)*TP_TDM_LOOP_SIZE - 1;
}
// inRdItr += m_kFirCoeffOffset * columnMultiple * TP_TDM_CHANNELS / kSamplesInVectData;
}
};

Expand Down Expand Up @@ -615,22 +614,38 @@ kernelFilterClass<TT_DATA,
using accVect_t = ::aie::accum<typename tTDMAccBaseType<TT_DATA, TT_COEFF>::type, kSamplesInVectAcc>;

dataVect_t dataVect;
dataVect_t* __restrict inPointer;
dataRead_t* __restrict inPointer;
outDataVect_t outVect, outVect2;
coeffVect_t* __restrict coeffVectPtr;

coeffVect_t coeffVect;
accVect_t acc, acc2;
input_circular_buffer<TT_DATA, extents<internalBufferSize>, margin<0> > inWindowCirc(&m_inputBuffer[0],
internalBufferSize, 0);
auto inWrItr = ::aie::begin_vector_random_circular<kSamplesInVectData>(m_inputBuffer, internalBufferSize);
auto inRdItr = ::aie::begin_vector_random_circular<kSamplesInVectAcc>(inWindowCirc);
auto inWrItr = ::aie::begin_vector_random_circular<kSamplesInVectAcc>(m_inputBuffer, internalBufferSize);
auto inRdItr = ::aie::begin_vector_random_circular<kSamplesInVectAcc>(m_inputBuffer, internalBufferSize);
dataVect_t* frameStart = (dataVect_t*)inInterface.inWindow;
// #undef _DSPLIB_FIR_TDM_HPP_DEBUG_

inWrItr += (marginFrame)*TP_TDM_CHANNELS / kSamplesInVectData;
inWrItr += (marginFrame)*TP_TDM_CHANNELS / kSamplesInVectAcc;
int readIncr = ((marginFrame + 2 + m_kFirCoeffOffset)) * columnMultiple * TP_TDM_CHANNELS / kSamplesInVectData;
inRdItr += readIncr;
// precalculate margin frame prior to jumping into inner loop.
// Alternatively, calculate margin frame within inner loop, to avoid a costly div.
// Calculating frame margin inside inner loop benefits cases that operate on a fairly small number of frames.
constexpr unsigned int precalculatedMarginFrame = (TP_NUM_FRAMES > internalBufferFrames) ? 1 : 0;
if
constexpr(m_kFirMargin == 0) {
if
constexpr(precalculatedMarginFrame == 1) {
marginFrame = (((marginFrame + TP_NUM_FRAMES) >= internalBufferFrames)
? ((marginFrame + TP_NUM_FRAMES) % internalBufferFrames)
: (marginFrame + TP_NUM_FRAMES));
}
}
else {
// Margin has been copied externally and is as part of the window
marginFrame = 0;
}

// Loop through 2 frames at a time
for (int frame = 0; frame < TP_NUM_FRAMES / 2; frame++)
Expand All @@ -640,25 +655,23 @@ kernelFilterClass<TT_DATA,
// Embed margin handling here, as this would reduce the amount of buffer size.

for (int j = 0; j < 2; j++) {
dataVect_t* frameStart =
(dataVect_t*)inInterface.inWindow + j * TP_TDM_CHANNELS / kSamplesInVectData;
dataRead_t* frameStart =
(dataRead_t*)inInterface.inWindow + j * TP_TDM_CHANNELS / kSamplesInVectAcc;
// Copy margin for 2 frames at a time
for (int i = 0; i < TP_TDM_CHANNELS / kSamplesInVectData; i++) {
inPointer =
((dataVect_t*)frameStart) + i + 2 * frame * TP_TDM_CHANNELS / kSamplesInVectData;
for (int i = 0; i < TP_TDM_CHANNELS / kSamplesInVectAcc; i++) {
inPointer = ((dataRead_t*)frameStart) + i + 2 * frame * TP_TDM_CHANNELS / kSamplesInVectAcc;
// dataVect = *inPointer;
// *inWrItr++ = dataVect;
*inWrItr++ = *inPointer;
}
// Copying 2 frames at a time.
marginFrame = (marginFrame == (internalBufferFrames - 1) ? 0 : marginFrame + 1);
if
constexpr(precalculatedMarginFrame == 0) {
marginFrame = (marginFrame == (internalBufferFrames - 1) ? 0 : marginFrame + 1);
}
}
chess_memory_fence();
chess_separator_scheduler();
}
else {
// Margin has been copied externally and is as part of the window
marginFrame = 0;
}
// Read once, prior to the loop
if
constexpr(columnMultiple == 2) {
Expand Down Expand Up @@ -713,7 +726,6 @@ kernelFilterClass<TT_DATA,
}
if
constexpr(TP_CASC_IN == CASC_IN_TRUE) {
// acc = (accVect_t)readincr_v<kSamplesInVectAcc>(inInterface.inCascade);
acc2 = readCascade<TT_DATA, TT_COEFF>(inInterface, acc2);
acc2 = macTdm2(acc2, dataVect, coeffVect);
}
Expand Down
Loading