diff --git a/CMakeLists.txt b/CMakeLists.txt index e175847..98c70ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,14 +3,14 @@ # https://rix0r.nl/blog/2015/08/13/cmake-guide/ # -cmake_minimum_required(VERSION 3.5 FATAL_ERROR) +cmake_minimum_required(VERSION 3.9 FATAL_ERROR) project(mantis VERSION 0.2 LANGUAGES C CXX) if (NOT CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE "Release") endif() # We require C++11 -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED ON) diff --git a/Makefile.deprecated b/Makefile.deprecated index 3d8c450..622f9f0 100644 --- a/Makefile.deprecated +++ b/Makefile.deprecated @@ -1,4 +1,4 @@ -TARGETS= mantis +TARGETS= mantis monochromatic_component_iterator ifdef D DEBUG=-g -DDEBUG @@ -33,8 +33,8 @@ CFLAGS += -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -I$(LOC_INCLUDE)\ -Wno-unused-result -Wno-strict-aliasing -Wno-unused-function -Wno-sign-compare \ -Wno-implicit-function-declaration -LDFLAGS += $(DEBUG) $(PROFILE) $(OPT) -lsdsl -lpthread -lboost_system \ --lboost_thread -lm -lz -lrt +LDFLAGS += $(DEBUG) $(PROFILE) $(OPT) -lpthread -lboost_system \ +-lboost_thread -lm -lz -lrt lib/libsdsl.a # # declaration of dependencies @@ -45,6 +45,8 @@ all: $(TARGETS) # dependencies between programs and .o files mantis: $(OBJDIR)/kmer.o $(OBJDIR)/mantis.o $(OBJDIR)/validatemantis.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/query.o $(OBJDIR)/coloreddbg.o $(OBJDIR)/bitvector.o $(OBJDIR)/util.o $(OBJDIR)/MantisFS.o +monochromatic_component_iterator: $(OBJDIR)/kmer.o $(OBJDIR)/gqf.o $(OBJDIR)/hashutil.o $(OBJDIR)/monochromatic_component_iterator.o + # dependencies between .o files and .h files $(OBJDIR)/mantis.o: $(LOC_SRC)/mantis.cc $(OBJDIR)/MantisFs.o: $(LOC_SRC)/MantisFS.cc $(LOC_INCLUDE)/MantisFS.h @@ -59,7 +61,7 @@ $(OBJDIR)/hashutil.o: $(LOC_INCLUDE)/hashutil.h # dependencies between .o files and .cc (or .c) files $(OBJDIR)/gqf.o: $(LOC_SRC)/cqf/gqf.c $(LOC_INCLUDE)/cqf/gqf.h - +$(OBJDIR)/monochromatic_component_iterator.o: $(LOC_INCLUDE)/cqf.h $(LOC_INCLUDE)/monochromatic_component_iterator.h $(LOC_SRC)/monochromatic_component_iterator.cc # # generic build rules # diff --git a/data/SRR191403-k20-Cut1.squeakr b/data/SRR191403-k20-Cut1.squeakr new file mode 100644 index 0000000..c922c2f Binary files /dev/null and b/data/SRR191403-k20-Cut1.squeakr differ diff --git a/data/SRR191403_exact.ser b/data/SRR191403_exact.ser deleted file mode 100644 index 4a7b0dd..0000000 Binary files a/data/SRR191403_exact.ser and /dev/null differ diff --git a/data/SRR191411-k20-Cut1.squeakr b/data/SRR191411-k20-Cut1.squeakr new file mode 100644 index 0000000..464d2c8 Binary files /dev/null and b/data/SRR191411-k20-Cut1.squeakr differ diff --git a/data/SRR191411_exact.ser b/data/SRR191411_exact.ser deleted file mode 100644 index 204b4c4..0000000 Binary files a/data/SRR191411_exact.ser and /dev/null differ diff --git a/include/MSF.h b/include/MSF.h new file mode 100644 index 0000000..c0af9c1 --- /dev/null +++ b/include/MSF.h @@ -0,0 +1,301 @@ +// +// Created by Fatemeh Almodaresi on 7/20/18. +// + +#ifndef MANTIS_MSF_H +#define MANTIS_MSF_H +#include +#include +#include +#include +#include "clipp.h" +#include "bitvector.h" +//#include "sdsl/bits.hpp" + +#define EQS_PER_SLOT 20000000 + +using namespace std; + +typedef std::vector> eqvec; + +struct Edge { + uint32_t n1; + uint32_t n2; + uint16_t weight; + + Edge(uint32_t inN1, uint32_t inN2, uint16_t inWeight) + : n1(inN1), n2(inN2), weight(inWeight) {} +}; + +struct EdgePtr { + uint16_t bucket; + uint32_t idx; + + EdgePtr(uint16_t bucketIn, uint32_t idxIn) : bucket(bucketIn), idx(idxIn) {} +}; + +struct Child { + uint32_t id; + uint16_t weight; + + Child(uint32_t inN1, uint16_t inWeight) : id(inN1), weight(inWeight) {} +}; + +struct Path { + uint32_t id; + uint32_t steps; + uint64_t weight; + + Path(uint32_t idIn, + uint32_t stepsIn, + uint64_t weightIn) : id(idIn), steps(stepsIn), weight(weightIn) {} +}; + +struct DisjointSetNode { + uint32_t parent{0}; + uint64_t rnk{0}, w{0}, edges{0}; + + void setParent(uint32_t p) { parent = p; } + + void mergeWith(DisjointSetNode &n, uint16_t edgeW, uint32_t id) { + n.setParent(parent); + w += (n.w + static_cast(edgeW)); + edges += (n.edges + 1); + n.edges = 0; + n.w = 0; + if (rnk == n.rnk) { + rnk++; + } + } +}; + +// To represent Disjoint Sets +struct DisjointSets { + std::vector els; + uint64_t n; + + // Constructor. + DisjointSets(uint64_t n) { + // Allocate memory + this->n = n; + els.resize(n); + // Initially, all vertices are in + // different sets and have rank 0. + for (uint64_t i = 0; i <= n; i++) { + //every element is parent of itself + els[i].setParent(i); + } + } + + // Find the parent of a node 'u' + // Path Compression + uint32_t find(uint32_t u) { + /* Make the parent of the nodes in the path + from u--> parent[u] point to parent[u] */ + if (u != els[u].parent) + els[u].parent = find(els[u].parent); + return els[u].parent; + } + + // Union by rank + void merge(uint32_t x, uint32_t y, uint16_t edgeW) { + x = find(x), y = find(y); + + /* Make tree with smaller height + a subtree of the other tree */ + if (els[x].rnk > els[y].rnk) { + els[x].mergeWith(els[y], edgeW, x); + + } else {// If rnk[x] <= rnk[y] + els[y].mergeWith(els[x], edgeW, y); + } + } +}; + +// Structure to represent a graph +struct Graph { + + uint64_t V; + + vector> edges; + vector> mst; + + uint64_t mst_totalWeight{0}; + + Graph(uint64_t bucketCnt) { edges.resize(bucketCnt); } + + // Utility function to add an edge + void addEdge(uint32_t u, uint32_t v, uint16_t w) { + edges[w - 1].emplace_back(u, v, w); + //edges.emplace_back(u, v, w); + } + + // Function to find MST using Kruskal's + // MST algorithm + DisjointSets kruskalMSF(uint32_t bucketCnt) { + int mst_wt = 0; // Initialize result + + // Create disjoint sets + DisjointSets ds(V); + + std::string tmp; + uint64_t n1{0}, n2{0}, cntr{0}, mergeCntr{0}; + uint32_t w{0}; + sdsl::bit_vector nodes(V, 0); + // Iterate through all sorted edges + for (auto bucketCntr = 0; bucketCntr < bucketCnt; bucketCntr++) { + //ifstream file(filename); + /*std::getline(file, tmp); + while (file.good()) { + file >> n1 >> n2 >> w;*/ + uint32_t edgeIdxInBucket = 0; + for (auto it = edges[bucketCntr].begin(); it != edges[bucketCntr].end(); it++) { + //if (w == bucketCntr) { + w = it->weight; + uint32_t u = it->n1; + uint32_t v = it->n2; + uint32_t set_u = ds.find(u); + uint32_t set_v = ds.find(v); + + // Check if the selected edge is creating + // a cycle or not (Cycle is created if u + // and v belong to same set) + if (set_u != set_v) { + // Current edge will be in the MST + // Merge two sets + ds.merge(set_u, set_v, w); + mst[u].emplace_back(bucketCntr, edgeIdxInBucket); + mst[v].emplace_back(bucketCntr, edgeIdxInBucket); + nodes[u] = 1; + nodes[v] = 1; + mst_totalWeight += w; + mergeCntr++; + }/* else { + if (nodes.find(u) == nodes.end() || nodes.find(v) == nodes.end()) + std::cerr << u << " " << v << " " << set_u << " " << set_v << "\n"; + }*/ + cntr++; + if (cntr % 1000000 == 0) { + std::cerr << "edge " << cntr << " " << mergeCntr << "\n"; + } + edgeIdxInBucket++; + //} + } + /*file.clear(); + file.seekg(0, file.beg);*/ + + } + //file.close(); + uint64_t distinctNodes{0}; + for (uint64_t i = 0; i < V; i += 64) { + distinctNodes += sdsl::bits::cnt(nodes.get_int(i, 64)); + } + + std::cerr << "final # of edges: " << cntr + << "\n# of merges: " << mergeCntr + << "\n# of distinct nodes: " << distinctNodes + << "\n"; + return ds; + } +}; + +void loadEqs(std::string filename, eqvec &bvs) { + bvs.reserve(20); + std::string eqfile; + std::ifstream eqlist(filename); + if (eqlist.is_open()) { + uint64_t accumTotalEqCls = 0; + while (getline(eqlist, eqfile)) { + sdsl::rrr_vector<63> bv; + bvs.push_back(bv); + sdsl::load_from_file(bvs.back(), eqfile); + } + } + std::cerr << "loaded all the equivalence classes: " + << ((bvs.size() - 1) * EQS_PER_SLOT + bvs.back().size()) + << "\n"; +} + +void buildColor(eqvec &bvs, + std::vector &eq, + uint64_t eqid, +uint64_t num_samples) { +uint64_t i{0}, bitcnt{0}, wrdcnt{0}; +uint64_t idx = eqid / EQS_PER_SLOT; +uint64_t offset = eqid % EQS_PER_SLOT; +//std::cerr << eqid << " " << num_samples << " " << idx << " " << offset << "\n"; +while (i eq; +eq.resize(numWrds); +buildColor(bvs, eq, eqid, num_samples); +for (uint64_t i = 0; i < eq.size(); i += 1) { +res += (uint16_t)sdsl::bits::cnt(eq[i]); +} +return res; +} + +// for two non-zero nodes, delta list is positions that xor of the bits was 1 +std::vector getDeltaList(eqvec &bvs, + uint64_t eqid1,uint64_t eqid2, uint64_t num_samples, uint64_t numWrds) { +std::vector res; +std::vector eq1, eq2; +eq1.resize(numWrds); +eq2.resize(numWrds); +buildColor(bvs, eq1, eqid1, num_samples); +buildColor(bvs, eq2, eqid2, num_samples); + +for (uint32_t i = 0; i < eq1.size(); i += 1) { +uint64_t eq12xor = eq1[i] ^ eq2[i]; +for (uint32_t j = 0; j < 64; j++) { +if ( (eq12xor >> j) & 0x01 ) { +res.push_back(i*64+j); +} +} +} + +return res; // rely on c++ optimization +} + +// for those connected to node zero, delta list is position of set bits +std::vector getDeltaList(eqvec &bvs, + uint64_t eqid1, uint64_t num_samples, uint64_t numWrds) { +std::vector res; +std::vector eq1; +eq1.resize(numWrds); +buildColor(bvs, eq1, eqid1, num_samples); + +for (uint32_t i = 0; i < eq1.size(); i += 1) { +for (uint32_t j = 0; j < 64; j++) { +if ( (eq1[i] >> j) & 0x01 ) { +res.push_back(i*64+j); +} +} +} + +return res; // rely on c++ optimization +} + +uint64_t hammingDist(eqvec &bvs, uint64_t eqid1, uint64_t eqid2, uint64_t num_samples) { + uint64_t dist{0}; + std::vector eq1(((num_samples - 1) / 64) + 1), eq2(((num_samples - 1) / 64) + 1); + buildColor(bvs, eq1, eqid1, num_samples); + buildColor(bvs, eq2, eqid2, num_samples); + + for (uint64_t i = 0; i < eq1.size(); i++) { + if (eq1[i] != eq2[i]) + dist += sdsl::bits::cnt(eq1[i] ^ eq2[i]); + } + return dist; +} + +#endif //MANTIS_MSF_H diff --git a/include/SIMDCompressionAndIntersection/VarIntG8IU.h b/include/SIMDCompressionAndIntersection/VarIntG8IU.h new file mode 100644 index 0000000..b187f9a --- /dev/null +++ b/include/SIMDCompressionAndIntersection/VarIntG8IU.h @@ -0,0 +1,237 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + */ +#ifndef __SSSE3__ +#pragma message \ + "Disabling varintg8iu due to lack of SSSE3 support, try adding -mssse3 or the equivalent on your compiler" +#else +#ifndef VARINTG8IU_H__ +#define VARINTG8IU_H__ +#include +#include "codecs.h" +#include "delta.h" +#ifdef __GNUC__ +#define PREDICT_FALSE(x) (__builtin_expect(x, 0)) +#else +#define PREDICT_FALSE(x) x +#endif + +namespace SIMDCompressionLib { +/** + * + * Implementation of varint-G8IU taken from + * Stepanov et al., SIMD-Based Decoding of Posting Lists, CIKM 2011 + * + * Update: D. Lemire believes that this scheme was patented by Rose, Stepanov et + * al. (patent 20120221539). + * We wrote this code before the patent was published (August 2012). + * + * By Maxime Caron and Daniel Lemire + * + * This code was originally written by M. Caron and then + * optimized by D. Lemire. + * + * + * + */ + +template class VarIntG8IU : public IntegerCODEC { + +public: + // For all possible values of the + // descriptor we build a table of any shuffle sequence + // that might be needed at decode time. + VarIntG8IU() { + char mask[256][32]; + for (int desc = 0; desc <= 255; desc++) { + memset(mask[desc], -1, 32); + int bitmask = 0x00000001; + int bitindex = 0; + // count number of 0 in the char + int complete = 0; + int ithSize[8]; + int lastpos = -1; + while (bitindex < 8) { + if ((desc & bitmask) == 0) { + ithSize[complete] = bitindex - lastpos; + lastpos = bitindex; + complete++; + } + bitindex++; + bitmask = bitmask << 1; + } + maskOutputSize[desc] = complete; + + int j = 0; + int k = 0; + for (int i = 0; i < complete; i++) { + for (int n = 0; n < 4; n++) { + if (n < ithSize[i]) { + mask[desc][k] = static_cast(j); + j = j + 1; + } else { + mask[desc][k] = -1; + } + k = k + 1; + } + } + } + for (int desc = 0; desc <= 255; desc++) { + vecmask[desc][0] = + _mm_lddqu_si128(reinterpret_cast<__m128i const *>(mask[desc])); + vecmask[desc][1] = + _mm_lddqu_si128(reinterpret_cast<__m128i const *>(mask[desc] + 16)); + } + } + + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + uint32_t prev = 0; // for deltas + const uint32_t *src = in; + size_t srclength = length * 4; // number of input bytes + + unsigned char *dst = reinterpret_cast(out); + nvalue = nvalue * 4; // output bytes + + size_t compressed_size = 0; + while (srclength > 0 && nvalue >= 9) { + compressed_size += encodeBlock(src, srclength, dst, nvalue, prev); + } + // Ouput might not be a multiple of 4 so we make it so + nvalue = ((compressed_size + 3) / 4); + } + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + __m128i mprev = _mm_setzero_si128(); // for deltas + const unsigned char *src = reinterpret_cast(in); + const uint32_t *const initdst = out; + + uint32_t *dst = out; + size_t srclength = length * 4; + for (; srclength >= 22; srclength -= 8, src += 8) { + unsigned char desc = *src; + src += 1; + srclength -= 1; + const __m128i data = + _mm_lddqu_si128(reinterpret_cast<__m128i const *>(src)); + const __m128i result = _mm_shuffle_epi8(data, vecmask[desc][0]); + if (delta) { + mprev = RegularDeltaSIMD::PrefixSum(result, mprev); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), mprev); + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), result); + } + int readSize = maskOutputSize[desc]; + if (readSize > 4) { + const __m128i result2 = _mm_shuffle_epi8(data, vecmask[desc][1]); + if (delta) { + mprev = RegularDeltaSIMD::PrefixSum(result2, mprev); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + 4), mprev); + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + 4), result2); + } + } + dst += readSize; + } + while (srclength >= 9) { + unsigned char desc = *src; + src += 1; + srclength -= 1; + char buff[32]; + memcpy(buff, src, 8); + const __m128i data = + _mm_lddqu_si128(reinterpret_cast<__m128i const *>(buff)); + const __m128i result = _mm_shuffle_epi8(data, vecmask[desc][0]); + if (delta) { + mprev = RegularDeltaSIMD::PrefixSum(result, mprev); + _mm_storeu_si128(reinterpret_cast<__m128i *>(buff), mprev); + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(buff), result); + } + int readSize = maskOutputSize[desc]; + if (readSize > 4) { + const __m128i result2 = _mm_shuffle_epi8(data, vecmask[desc][1]); + if (delta) { + mprev = RegularDeltaSIMD::PrefixSum(result2, mprev); + _mm_storeu_si128(reinterpret_cast<__m128i *>(buff + 16), mprev); + } else { + _mm_storeu_si128(reinterpret_cast<__m128i *>(buff + 16), result2); + } + } + memcpy(dst, buff, 4 * readSize); + dst += readSize; + srclength -= 8; + src += 8; + } + + nvalue = (dst - initdst); + return reinterpret_cast((reinterpret_cast(src) + 3) & + ~3); + } + + virtual std::string name() const { + return std::string("VarIntG8IU") + + ((delta == 1) ? "scalardelta" : ((delta == 2) ? "delta" : "")); + } + + int encodeBlock(const uint32_t *&src, size_t &srclength, unsigned char *&dest, + size_t &dstlength, uint32_t &prev) { + unsigned char desc = 0xFF; + unsigned char bitmask = 0x01; + uint32_t buffer[8]; + int ithSize[8]; + int length = 0; + int numInt = 0; + + while (srclength > 0) { + const uint32_t *temp = src; + + int byteNeeded = + delta ? getNumByteNeeded(*temp - prev) : getNumByteNeeded(*temp); + + if (PREDICT_FALSE(length + byteNeeded > 8)) { + break; + } + + // flip the correct bit in desc + bitmask = static_cast(bitmask << (byteNeeded - 1)); + desc = desc ^ bitmask; + bitmask = static_cast(bitmask << 1); + + ithSize[numInt] = byteNeeded; + length += byteNeeded; + buffer[numInt] = delta ? *temp - prev : *temp; + if (delta) + prev = *temp; + src = src + 1; + srclength -= 4; + numInt++; + } + + dest[0] = desc; + int written = 1; + for (int i = 0; i < numInt; i++) { + int size = ithSize[i]; + uint32_t value = buffer[i]; + for (int j = 0; j < size; j++) { + dest[written] = static_cast(value >> (j * 8)); + written++; + } + } + dest += 9; + dstlength -= 9; + return 9; + } + +private: + int maskOutputSize[256]; + __m128i vecmask[256][2]; + + int getNumByteNeeded(const uint32_t val) { + return ((__builtin_clz(val | 255) ^ 31) >> 3) + 1; + } +}; +} +#endif // VARINTG8IU_H__ +#endif //__SSE3__ diff --git a/include/SIMDCompressionAndIntersection/binarypacking.h b/include/SIMDCompressionAndIntersection/binarypacking.h new file mode 100644 index 0000000..43955fb --- /dev/null +++ b/include/SIMDCompressionAndIntersection/binarypacking.h @@ -0,0 +1,187 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +#ifndef SIMDCompressionAndIntersection_BINARYPACKING_H_ +#define SIMDCompressionAndIntersection_BINARYPACKING_H_ + +#include "codecs.h" +#include "bitpackinghelpers.h" +#include "util.h" + +namespace SIMDCompressionLib { + +struct BasicBlockPacker { + static void inline unpackblock(const uint32_t *in, uint32_t *out, + const uint32_t bit, uint32_t &initoffset) { + BitPackingHelpers::fastunpack(in, out, bit); + if (bit < 32) + inverseDelta(initoffset, out); + initoffset = *(out + BitPackingHelpers::BlockSize - 1); + } + + static uint32_t maxbits(const uint32_t *in, uint32_t &initoffset) { + uint32_t accumulator = in[0] - initoffset; + for (uint32_t k = 1; k < BitPackingHelpers::BlockSize; ++k) { + accumulator |= in[k] - in[k - 1]; + } + initoffset = in[BitPackingHelpers::BlockSize - 1]; + return gccbits(accumulator); + } + + static void inline packblockwithoutmask(uint32_t *in, uint32_t *out, + const uint32_t bit, + uint32_t &initoffset) { + const uint32_t nextoffset = *(in + BitPackingHelpers::BlockSize - 1); + if (bit < 32) + delta(initoffset, in); + BitPackingHelpers::fastpackwithoutmask(in, out, bit); + initoffset = nextoffset; + } + static string name() { return "BasicBlockPacker"; } +}; + +struct NoDeltaBlockPacker { + static void inline unpackblock(const uint32_t *in, uint32_t *out, + const uint32_t bit, uint32_t &) { + BitPackingHelpers::fastunpack(in, out, bit); + } + static void inline packblockwithoutmask(uint32_t *in, uint32_t *out, + const uint32_t bit, uint32_t &) { + BitPackingHelpers::fastpackwithoutmask(in, out, bit); + } + + static uint32_t maxbits(const uint32_t *in, uint32_t &) { + uint32_t accumulator = 0; + for (uint32_t k = 0; k < BitPackingHelpers::BlockSize; ++k) { + accumulator |= in[k]; + } + return gccbits(accumulator); + } + + static string name() { return "NoDeltaBlockPacker"; } +}; + +struct IntegratedBlockPacker { + PURE_FUNCTION + static uint32_t maxbits(const uint32_t *in, uint32_t &initoffset) { + uint32_t accumulator = in[0] - initoffset; + for (uint32_t k = 1; k < BitPackingHelpers::BlockSize; ++k) { + accumulator |= in[k] - in[k - 1]; + } + initoffset = in[BitPackingHelpers::BlockSize - 1]; + return gccbits(accumulator); + } + + static void inline packblockwithoutmask(const uint32_t *in, uint32_t *out, + const uint32_t bit, + uint32_t &initoffset) { + BitPackingHelpers::integratedfastpackwithoutmask(initoffset, in, out, bit); + initoffset = *(in + BitPackingHelpers::BlockSize - 1); + } + static void inline unpackblock(const uint32_t *in, uint32_t *out, + const uint32_t bit, uint32_t &initoffset) { + BitPackingHelpers::integratedfastunpack(initoffset, in, out, bit); + initoffset = *(out + BitPackingHelpers::BlockSize - 1); + } + static string name() { return "IntegratedBlockPacker"; } +}; + +template class BinaryPacking : public IntegerCODEC { +public: + static const uint32_t MiniBlockSize = 32; + static const uint32_t HowManyMiniBlocks = 4; + static const uint32_t BlockSize = + MiniBlockSize; // HowManyMiniBlocks * MiniBlockSize; + static const uint32_t bits32 = 8; + + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + checkifdivisibleby(length, BlockSize); + const uint32_t *const initout(out); + *out++ = static_cast(length); + uint32_t Bs[HowManyMiniBlocks]; + uint32_t init = 0; + const uint32_t *const final = in + length; + for (; in + HowManyMiniBlocks * MiniBlockSize <= final; + in += HowManyMiniBlocks * MiniBlockSize) { + uint32_t tmpinit = init; + for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) { + Bs[i] = BlockPacker::maxbits(in + i * MiniBlockSize, tmpinit); + } + *out++ = (Bs[0] << 24) | (Bs[1] << 16) | (Bs[2] << 8) | Bs[3]; + for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) { + BlockPacker::packblockwithoutmask(in + i * MiniBlockSize, out, Bs[i], + init); + out += Bs[i]; + } + } + if (in < final) { + size_t howmany = (final - in) / MiniBlockSize; + uint32_t tmpinit = init; + memset(&Bs[0], 0, HowManyMiniBlocks * sizeof(uint32_t)); + for (uint32_t i = 0; i < howmany; ++i) { + Bs[i] = BlockPacker::maxbits(in + i * MiniBlockSize, tmpinit); + } + *out++ = (Bs[0] << 24) | (Bs[1] << 16) | (Bs[2] << 8) | Bs[3]; + for (uint32_t i = 0; i < howmany; ++i) { + BlockPacker::packblockwithoutmask(in + i * MiniBlockSize, out, Bs[i], + init); + out += Bs[i]; + } + } + nvalue = out - initout; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t /*length*/, + uint32_t *out, size_t &nvalue) { + const uint32_t actuallength = *in++; + checkifdivisibleby(actuallength, BlockSize); + const uint32_t *const initout(out); + uint32_t Bs[HowManyMiniBlocks]; + uint32_t init = 0; + for (; out < initout + + actuallength / (HowManyMiniBlocks * MiniBlockSize) * + HowManyMiniBlocks * MiniBlockSize; + out += HowManyMiniBlocks * MiniBlockSize) { + Bs[0] = static_cast(in[0] >> 24); + Bs[1] = static_cast(in[0] >> 16); + Bs[2] = static_cast(in[0] >> 8); + Bs[3] = static_cast(in[0]); + ++in; + for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) { + BlockPacker::unpackblock(in, out + i * MiniBlockSize, Bs[i], init); + in += Bs[i]; + } + } + if (out < initout + actuallength) { + size_t howmany = (initout + actuallength - out) / MiniBlockSize; + Bs[0] = static_cast(in[0] >> 24); + Bs[1] = static_cast(in[0] >> 16); + Bs[2] = static_cast(in[0] >> 8); + Bs[3] = static_cast(in[0]); + ++in; + for (uint32_t i = 0; i < howmany; ++i) { + BlockPacker::unpackblock(in, out + i * MiniBlockSize, Bs[i], init); + in += Bs[i]; + } + out += howmany * MiniBlockSize; + } + nvalue = out - initout; + return in; + } + + string name() const { + ostringstream convert; + convert << "BinaryPacking" + << "With" << BlockPacker::name() << MiniBlockSize; + return convert.str(); + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_BINARYPACKING_H_ */ diff --git a/include/SIMDCompressionAndIntersection/bitpacking.h b/include/SIMDCompressionAndIntersection/bitpacking.h new file mode 100644 index 0000000..6f67e96 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/bitpacking.h @@ -0,0 +1,174 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +#ifndef SIMDCompressionAndIntersection_BITPACKING +#define SIMDCompressionAndIntersection_BITPACKING +#include +#include "platform.h" + +namespace SIMDCompressionLib { + +void __fastunpack0(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack1(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack2(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack3(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack4(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack5(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack6(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack7(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack8(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack9(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastunpack10(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack11(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack12(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack13(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack14(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack15(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack16(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack17(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack18(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack19(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack20(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack21(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack22(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack23(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack24(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack25(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack26(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack27(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack28(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack29(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack30(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack31(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastunpack32(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); + +void __fastpack0(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack1(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack2(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack3(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack4(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack5(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack6(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack7(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack8(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack9(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack10(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack11(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack12(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack13(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack14(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack15(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack16(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack17(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack18(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack19(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack20(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack21(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack22(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack23(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack24(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack25(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack26(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack27(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack28(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack29(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack30(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack31(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); +void __fastpack32(const uint32_t *__restrict__ in, uint32_t *__restrict__ out); + +void __fastpackwithoutmask0(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask1(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask2(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask3(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask4(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask5(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask6(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask7(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask8(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask9(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask10(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask11(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask12(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask13(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask14(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask15(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask16(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask17(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask18(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask19(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask20(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask21(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask22(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask23(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask24(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask25(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask26(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask27(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask28(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask29(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask30(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask31(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __fastpackwithoutmask32(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); + +} // namespace SIMDCompressionLib + +#endif // SIMDCompressionAndIntersection_BITPACKING diff --git a/include/SIMDCompressionAndIntersection/bitpackinghelpers.h b/include/SIMDCompressionAndIntersection/bitpackinghelpers.h new file mode 100644 index 0000000..a8ef3e8 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/bitpackinghelpers.h @@ -0,0 +1,691 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Leonid Boytsov, Nathan Kurz and Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_BITPACKINGHELPERS_H_ +#define SIMDCompressionAndIntersection_BITPACKINGHELPERS_H_ + +#include "bitpacking.h" +#include "integratedbitpacking.h" +#include "delta.h" +#include "util.h" + +namespace SIMDCompressionLib { + +struct BitPackingHelpers { + const static unsigned BlockSize = 32; + + static void inline fastunpack(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out, + const uint32_t bit) { + // Could have used function pointers instead of switch. + // Switch calls do offer the compiler more opportunities for optimization in + // theory. In this case, it makes no difference with a good compiler. + switch (bit) { + case 0: + __fastunpack0(in, out); + break; + case 1: + __fastunpack1(in, out); + break; + case 2: + __fastunpack2(in, out); + break; + case 3: + __fastunpack3(in, out); + break; + case 4: + __fastunpack4(in, out); + break; + case 5: + __fastunpack5(in, out); + break; + case 6: + __fastunpack6(in, out); + break; + case 7: + __fastunpack7(in, out); + break; + case 8: + __fastunpack8(in, out); + break; + case 9: + __fastunpack9(in, out); + break; + case 10: + __fastunpack10(in, out); + break; + case 11: + __fastunpack11(in, out); + break; + case 12: + __fastunpack12(in, out); + break; + case 13: + __fastunpack13(in, out); + break; + case 14: + __fastunpack14(in, out); + break; + case 15: + __fastunpack15(in, out); + break; + case 16: + __fastunpack16(in, out); + break; + case 17: + __fastunpack17(in, out); + break; + case 18: + __fastunpack18(in, out); + break; + case 19: + __fastunpack19(in, out); + break; + case 20: + __fastunpack20(in, out); + break; + case 21: + __fastunpack21(in, out); + break; + case 22: + __fastunpack22(in, out); + break; + case 23: + __fastunpack23(in, out); + break; + case 24: + __fastunpack24(in, out); + break; + case 25: + __fastunpack25(in, out); + break; + case 26: + __fastunpack26(in, out); + break; + case 27: + __fastunpack27(in, out); + break; + case 28: + __fastunpack28(in, out); + break; + case 29: + __fastunpack29(in, out); + break; + case 30: + __fastunpack30(in, out); + break; + case 31: + __fastunpack31(in, out); + break; + case 32: + __fastunpack32(in, out); + break; + default: + break; + } + } + + static void inline fastpack(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out, const uint32_t bit) { + // Could have used function pointers instead of switch. + // Switch calls do offer the compiler more opportunities for optimization in + // theory. In this case, it makes no difference with a good compiler. + switch (bit) { + case 0: + __fastpack0(in, out); + break; + case 1: + __fastpack1(in, out); + break; + case 2: + __fastpack2(in, out); + break; + case 3: + __fastpack3(in, out); + break; + case 4: + __fastpack4(in, out); + break; + case 5: + __fastpack5(in, out); + break; + case 6: + __fastpack6(in, out); + break; + case 7: + __fastpack7(in, out); + break; + case 8: + __fastpack8(in, out); + break; + case 9: + __fastpack9(in, out); + break; + case 10: + __fastpack10(in, out); + break; + case 11: + __fastpack11(in, out); + break; + case 12: + __fastpack12(in, out); + break; + case 13: + __fastpack13(in, out); + break; + case 14: + __fastpack14(in, out); + break; + case 15: + __fastpack15(in, out); + break; + case 16: + __fastpack16(in, out); + break; + case 17: + __fastpack17(in, out); + break; + case 18: + __fastpack18(in, out); + break; + case 19: + __fastpack19(in, out); + break; + case 20: + __fastpack20(in, out); + break; + case 21: + __fastpack21(in, out); + break; + case 22: + __fastpack22(in, out); + break; + case 23: + __fastpack23(in, out); + break; + case 24: + __fastpack24(in, out); + break; + case 25: + __fastpack25(in, out); + break; + case 26: + __fastpack26(in, out); + break; + case 27: + __fastpack27(in, out); + break; + case 28: + __fastpack28(in, out); + break; + case 29: + __fastpack29(in, out); + break; + case 30: + __fastpack30(in, out); + break; + case 31: + __fastpack31(in, out); + break; + case 32: + __fastpack32(in, out); + break; + default: + break; + } + } + + /*assumes that integers fit in the prescribed number of bits*/ + static void inline fastpackwithoutmask(const uint32_t *__restrict__ in, + uint32_t *__restrict__ out, + const uint32_t bit) { + // Could have used function pointers instead of switch. + // Switch calls do offer the compiler more opportunities for optimization in + // theory. In this case, it makes no difference with a good compiler. + switch (bit) { + case 0: + __fastpackwithoutmask0(in, out); + break; + case 1: + __fastpackwithoutmask1(in, out); + break; + case 2: + __fastpackwithoutmask2(in, out); + break; + case 3: + __fastpackwithoutmask3(in, out); + break; + case 4: + __fastpackwithoutmask4(in, out); + break; + case 5: + __fastpackwithoutmask5(in, out); + break; + case 6: + __fastpackwithoutmask6(in, out); + break; + case 7: + __fastpackwithoutmask7(in, out); + break; + case 8: + __fastpackwithoutmask8(in, out); + break; + case 9: + __fastpackwithoutmask9(in, out); + break; + case 10: + __fastpackwithoutmask10(in, out); + break; + case 11: + __fastpackwithoutmask11(in, out); + break; + case 12: + __fastpackwithoutmask12(in, out); + break; + case 13: + __fastpackwithoutmask13(in, out); + break; + case 14: + __fastpackwithoutmask14(in, out); + break; + case 15: + __fastpackwithoutmask15(in, out); + break; + case 16: + __fastpackwithoutmask16(in, out); + break; + case 17: + __fastpackwithoutmask17(in, out); + break; + case 18: + __fastpackwithoutmask18(in, out); + break; + case 19: + __fastpackwithoutmask19(in, out); + break; + case 20: + __fastpackwithoutmask20(in, out); + break; + case 21: + __fastpackwithoutmask21(in, out); + break; + case 22: + __fastpackwithoutmask22(in, out); + break; + case 23: + __fastpackwithoutmask23(in, out); + break; + case 24: + __fastpackwithoutmask24(in, out); + break; + case 25: + __fastpackwithoutmask25(in, out); + break; + case 26: + __fastpackwithoutmask26(in, out); + break; + case 27: + __fastpackwithoutmask27(in, out); + break; + case 28: + __fastpackwithoutmask28(in, out); + break; + case 29: + __fastpackwithoutmask29(in, out); + break; + case 30: + __fastpackwithoutmask30(in, out); + break; + case 31: + __fastpackwithoutmask31(in, out); + break; + case 32: + __fastpackwithoutmask32(in, out); + break; + default: + break; + } + } + + static void inline integratedfastunpack(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out, + const uint32_t bit) { + // Could have used function pointers instead of switch. + // Switch calls do offer the compiler more opportunities for optimization in + // theory. In this case, it makes no difference with a good compiler. + switch (bit) { + case 0: + __integratedfastunpack0(initoffset, in, out); + break; + case 1: + __integratedfastunpack1(initoffset, in, out); + break; + case 2: + __integratedfastunpack2(initoffset, in, out); + break; + case 3: + __integratedfastunpack3(initoffset, in, out); + break; + case 4: + __integratedfastunpack4(initoffset, in, out); + break; + case 5: + __integratedfastunpack5(initoffset, in, out); + break; + case 6: + __integratedfastunpack6(initoffset, in, out); + break; + case 7: + __integratedfastunpack7(initoffset, in, out); + break; + case 8: + __integratedfastunpack8(initoffset, in, out); + break; + case 9: + __integratedfastunpack9(initoffset, in, out); + break; + case 10: + __integratedfastunpack10(initoffset, in, out); + break; + case 11: + __integratedfastunpack11(initoffset, in, out); + break; + case 12: + __integratedfastunpack12(initoffset, in, out); + break; + case 13: + __integratedfastunpack13(initoffset, in, out); + break; + case 14: + __integratedfastunpack14(initoffset, in, out); + break; + case 15: + __integratedfastunpack15(initoffset, in, out); + break; + case 16: + __integratedfastunpack16(initoffset, in, out); + break; + case 17: + __integratedfastunpack17(initoffset, in, out); + break; + case 18: + __integratedfastunpack18(initoffset, in, out); + break; + case 19: + __integratedfastunpack19(initoffset, in, out); + break; + case 20: + __integratedfastunpack20(initoffset, in, out); + break; + case 21: + __integratedfastunpack21(initoffset, in, out); + break; + case 22: + __integratedfastunpack22(initoffset, in, out); + break; + case 23: + __integratedfastunpack23(initoffset, in, out); + break; + case 24: + __integratedfastunpack24(initoffset, in, out); + break; + case 25: + __integratedfastunpack25(initoffset, in, out); + break; + case 26: + __integratedfastunpack26(initoffset, in, out); + break; + case 27: + __integratedfastunpack27(initoffset, in, out); + break; + case 28: + __integratedfastunpack28(initoffset, in, out); + break; + case 29: + __integratedfastunpack29(initoffset, in, out); + break; + case 30: + __integratedfastunpack30(initoffset, in, out); + break; + case 31: + __integratedfastunpack31(initoffset, in, out); + break; + case 32: + __integratedfastunpack32(initoffset, in, out); + break; + default: + break; + } + } + + /*assumes that integers fit in the prescribed number of bits*/ + static void inline integratedfastpackwithoutmask( + const uint32_t initoffset, const uint32_t *__restrict__ in, + uint32_t *__restrict__ out, const uint32_t bit) { + // Could have used function pointers instead of switch. + // Switch calls do offer the compiler more opportunities for optimization in + // theory. In this case, it makes no difference with a good compiler. + switch (bit) { + case 0: + __integratedfastpack0(initoffset, in, out); + break; + case 1: + __integratedfastpack1(initoffset, in, out); + break; + case 2: + __integratedfastpack2(initoffset, in, out); + break; + case 3: + __integratedfastpack3(initoffset, in, out); + break; + case 4: + __integratedfastpack4(initoffset, in, out); + break; + case 5: + __integratedfastpack5(initoffset, in, out); + break; + case 6: + __integratedfastpack6(initoffset, in, out); + break; + case 7: + __integratedfastpack7(initoffset, in, out); + break; + case 8: + __integratedfastpack8(initoffset, in, out); + break; + case 9: + __integratedfastpack9(initoffset, in, out); + break; + case 10: + __integratedfastpack10(initoffset, in, out); + break; + case 11: + __integratedfastpack11(initoffset, in, out); + break; + case 12: + __integratedfastpack12(initoffset, in, out); + break; + case 13: + __integratedfastpack13(initoffset, in, out); + break; + case 14: + __integratedfastpack14(initoffset, in, out); + break; + case 15: + __integratedfastpack15(initoffset, in, out); + break; + case 16: + __integratedfastpack16(initoffset, in, out); + break; + case 17: + __integratedfastpack17(initoffset, in, out); + break; + case 18: + __integratedfastpack18(initoffset, in, out); + break; + case 19: + __integratedfastpack19(initoffset, in, out); + break; + case 20: + __integratedfastpack20(initoffset, in, out); + break; + case 21: + __integratedfastpack21(initoffset, in, out); + break; + case 22: + __integratedfastpack22(initoffset, in, out); + break; + case 23: + __integratedfastpack23(initoffset, in, out); + break; + case 24: + __integratedfastpack24(initoffset, in, out); + break; + case 25: + __integratedfastpack25(initoffset, in, out); + break; + case 26: + __integratedfastpack26(initoffset, in, out); + break; + case 27: + __integratedfastpack27(initoffset, in, out); + break; + case 28: + __integratedfastpack28(initoffset, in, out); + break; + case 29: + __integratedfastpack29(initoffset, in, out); + break; + case 30: + __integratedfastpack30(initoffset, in, out); + break; + case 31: + __integratedfastpack31(initoffset, in, out); + break; + case 32: + __integratedfastpack32(initoffset, in, out); + break; + default: + break; + } + } + + static void inline ipackwithoutmask(const uint32_t *in, const size_t Qty, + uint32_t *out, const uint32_t bit) { + if (Qty % BlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + uint32_t initoffset = 0; + + for (size_t k = 0; k < Qty / BlockSize; ++k) { + integratedfastpackwithoutmask(initoffset, in + k * BlockSize, + out + k * bit, bit); + initoffset = *(in + k * BlockSize + BlockSize - 1); + } + } + + static void inline pack(uint32_t *in, const size_t Qty, uint32_t *out, + const uint32_t bit) { + if (Qty % BlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + uint32_t initoffset = 0; + + for (size_t k = 0; k < Qty / BlockSize; ++k) { + const uint32_t nextoffset = *(in + k * BlockSize + BlockSize - 1); + if (bit < 32) + delta(initoffset, in + k * BlockSize); + fastpack(in + k * BlockSize, out + k * bit, bit); + initoffset = nextoffset; + } + } + + static void inline packWithoutDelta(uint32_t *in, const size_t Qty, + uint32_t *out, const uint32_t bit) { + for (size_t k = 0; k < Qty / BlockSize; ++k) { + fastpack(in + k * BlockSize, out + k * bit, bit); + } + } + + static void inline unpack(const uint32_t *in, const size_t Qty, uint32_t *out, + const uint32_t bit) { + if (Qty % BlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + uint32_t initoffset = 0; + + for (size_t k = 0; k < Qty / BlockSize; ++k) { + fastunpack(in + k * bit, out + k * BlockSize, bit); + if (bit < 32) + inverseDelta(initoffset, out + k * BlockSize); + initoffset = *(out + k * BlockSize + BlockSize - 1); + } + } + + static void inline unpackWithoutDelta(const uint32_t *in, const size_t Qty, + uint32_t *out, const uint32_t bit) { + for (size_t k = 0; k < Qty / BlockSize; ++k) { + fastunpack(in + k * bit, out + k * BlockSize, bit); + } + } + + static void inline packwithoutmask(uint32_t *in, const size_t Qty, + uint32_t *out, const uint32_t bit) { + if (Qty % BlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + uint32_t initoffset = 0; + + for (size_t k = 0; k < Qty / BlockSize; ++k) { + const uint32_t nextoffset = *(in + k * BlockSize + BlockSize - 1); + if (bit < 32) + delta(initoffset, in + k * BlockSize); + fastpackwithoutmask(in + k * BlockSize, out + k * bit, bit); + initoffset = nextoffset; + } + } + + static void inline packwithoutmaskWithoutDelta(uint32_t *in, const size_t Qty, + uint32_t *out, + const uint32_t bit) { + for (size_t k = 0; k < Qty / BlockSize; ++k) { + fastpackwithoutmask(in + k * BlockSize, out + k * bit, bit); + } + } + + static void inline iunpack(const uint32_t *in, const size_t Qty, + uint32_t *out, const uint32_t bit) { + if (Qty % BlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + + uint32_t initoffset = 0; + for (size_t k = 0; k < Qty / BlockSize; ++k) { + integratedfastunpack(initoffset, in + k * bit, out + k * BlockSize, bit); + initoffset = *(out + k * BlockSize + BlockSize - 1); + } + } + + /*static void GenRandom(std::vector& data, int b) { + data[0] = random(b); + + for(size_t i = 1 ; i < data.size() ; ++i ) + data[i] = random(b) + data[i-1]; + }*/ + + static void CheckMaxDiff(const std::vector &refdata, unsigned bit) { + for (size_t i = 1; i < refdata.size(); ++i) { + if (gccbits(refdata[i] - refdata[i - 1]) > bit) + throw std::runtime_error("bug"); + } + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_BITPACKINGHELPERS_H_ */ diff --git a/include/SIMDCompressionAndIntersection/boolarray.h b/include/SIMDCompressionAndIntersection/boolarray.h new file mode 100644 index 0000000..6b0f9d0 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/boolarray.h @@ -0,0 +1,181 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + */ + +#ifndef SIMDCompressionAndIntersection_BOOLARRAY_H_ +#define SIMDCompressionAndIntersection_BOOLARRAY_H_ + +#include "common.h" + +namespace SIMDCompressionLib { + +using namespace std; + +static inline int numberOfTrailingZeros(uint64_t x) { + if (x == 0) + return 64; + return __builtin_ctzl(x); +} + +class BoolArray { +public: + vector buffer; + size_t sizeinbits; + BoolArray(const size_t n, const uint64_t initval = 0) + : buffer(n / 64 + (n % 64 == 0 ? 0 : 1), initval), sizeinbits(n) {} + + BoolArray() : buffer(), sizeinbits(0) {} + + BoolArray(const BoolArray &ba) + : buffer(ba.buffer), sizeinbits(ba.sizeinbits) {} + + void inplaceIntersect(const BoolArray &other) { + assert(other.buffer.size() == buffer.size()); + for (size_t i = 0; i < buffer.size(); ++i) + buffer[i] &= other.buffer[i]; + } + + // this is no faster because the compiler will vectorize + // inplaceIntersect automagically? + void SIMDinplaceIntersect(const BoolArray &other) { + assert(other.buffer.size() == buffer.size()); + __m128i *bin = reinterpret_cast<__m128i *>(buffer.data()); + const __m128i *bo = reinterpret_cast(other.buffer.data()); + for (size_t i = 0; i < buffer.size() / 2; ++i) { + __m128i p1 = MM_LOAD_SI_128(bin + i); + __m128i p2 = MM_LOAD_SI_128(bo + i); + __m128i andp1p2 = _mm_and_si128(p1, p2); + _mm_storeu_si128(bin + i, andp1p2); + } + for (size_t i = buffer.size() / 2 * 2; i < buffer.size(); ++i) + buffer[i] &= other.buffer[i]; + } + + void intersect(const BoolArray &other, BoolArray &output) { + assert(other.buffer.size() == buffer.size()); + output.buffer.resize(buffer.size()); + for (size_t i = 0; i < buffer.size(); ++i) + output.buffer[i] = buffer[i] & other.buffer[i]; + } + + // this is no faster because the compiler will vectorize + // intersect automagically? + void SIMDintersect(const BoolArray &other, BoolArray &output) { + assert(other.buffer.size() == buffer.size()); + output.buffer.resize(buffer.size()); + const __m128i *bin = reinterpret_cast(buffer.data()); + const __m128i *bo = reinterpret_cast(other.buffer.data()); + __m128i *bout = reinterpret_cast<__m128i *>(output.buffer.data()); + + for (size_t i = 0; i < buffer.size() / 2; ++i) { + __m128i p1 = MM_LOAD_SI_128(bin + i); + __m128i p2 = MM_LOAD_SI_128(bo + i); + __m128i andp1p2 = _mm_and_si128(p1, p2); + _mm_storeu_si128(bout + i, andp1p2); + } + for (size_t i = buffer.size() / 2 * 2; i < buffer.size(); ++i) + output.buffer[i] = buffer[i] & other.buffer[i]; + } + + void setSizeInBits(const size_t sizeib) { sizeinbits = sizeib; } + + /** + * Write out this bitmap to a vector as a list of integers corresponding + * to set bits. The caller should have allocated enough memory. + */ + void toArray(vector &ans) { + uint32_t pos = 0; + for (uint32_t k = 0; k < buffer.size(); ++k) { + uint64_t myword = buffer[k]; + while (myword != 0) { + int ntz = __builtin_ctzl(myword); + ans[pos++] = k * 64 + ntz; + myword ^= (1ll << ntz); + } + } + ans.resize(pos); + } + + /** + * This is a version of toArray where we write to a pointer. + * Returns the number of written ints. + */ + size_t toInts(uint32_t *out) { + size_t pos = 0; + for (uint32_t k = 0; k < buffer.size(); ++k) { + const uint64_t myword = buffer[k]; + for (int offset = 0; offset < 64; ++offset) { + if ((myword >> offset) == 0) + break; + offset += numberOfTrailingZeros((myword >> offset)); + out[pos++] = 64 * k + offset; + } + } + return pos; + } + BoolArray &operator=(const BoolArray &x) { + this->buffer = x.buffer; + this->sizeinbits = x.sizeinbits; + return *this; + } + + /** + * set to true (whether it was already set to true or not) + * + * This is an expensive (random access) API, you really ought to + * prepare a new word and then append it. + */ + ALWAYS_INLINE + void set(const size_t pos) { + buffer[pos / 64] |= (static_cast(1) << (pos % 64)); + } + + /** + * set to false (whether it was already set to false or not) + * + * This is an expensive (random access) API, you really ought to + * prepare a new word and then append it. + */ + ALWAYS_INLINE + void unset(const size_t pos) { + buffer[pos / 64] |= ~(static_cast(1) << (pos % 64)); + } + + /** + * true of false? (set or unset) + */ + ALWAYS_INLINE + bool get(const size_t pos) const { + return (buffer[pos / 64] & (static_cast(1) << (pos % 64))) != 0; + } + + /** + * set all bits to 0 + */ + void reset() { + memset(buffer.data(), 0, + sizeof(uint64_t) * + buffer.size()); // memset can be slow, does it matter? + sizeinbits = 0; + } + + size_t sizeInBits() const { return sizeinbits; } + + size_t sizeInBytes() const { return buffer.size() * sizeof(uint64_t); } + + /** + * Return memory usage of a bitmap spanning n bits + */ + static size_t sizeInBytes(size_t n) { + size_t buffersize = n / 64 + (n % 64 == 0 ? 0 : 1); + return buffersize * sizeof(uint64_t); + } + + ~BoolArray() {} +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_BOOLARRAY_H_ */ diff --git a/include/SIMDCompressionAndIntersection/codecfactory.h b/include/SIMDCompressionAndIntersection/codecfactory.h new file mode 100644 index 0000000..ddaf7d8 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/codecfactory.h @@ -0,0 +1,185 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +#ifndef SIMDCompressionAndIntersection_CODECFACTORY_H_ +#define SIMDCompressionAndIntersection_CODECFACTORY_H_ + +#include "common.h" +#include "codecs.h" +#include "common.h" +#include "compositecodec.h" +#include "bitpackinghelpers.h" +#include "simdbitpackinghelpers.h" +#include "delta.h" +#include "util.h" +#include "synthetic.h" +#include "binarypacking.h" +#include "simdbinarypacking.h" +#include "simdvariablebyte.h" +#include "fastpfor.h" +#include "simdfastpfor.h" +#include "variablebyte.h" +#include "varintgb.h" +#include "streamvariablebyte.h" +#include "VarIntG8IU.h" // warning: patented scheme +#include "frameofreference.h" +#include "forcodec.h" + +namespace SIMDCompressionLib { + +using namespace std; + +typedef VariableByte leftovercodec; + +static std::map> initializefactory() { + std::map> schemes; +#ifdef __SSSE3__ + schemes["varintg8iu"] = shared_ptr(new VarIntG8IU()); +#endif /* __SSSE3__ */ + schemes["fastpfor"] = shared_ptr( + new CompositeCodec, leftovercodec>()); + + schemes["copy"] = shared_ptr(new JustCopy()); + schemes["varint"] = shared_ptr(new VariableByte()); + schemes["vbyte"] = shared_ptr(new VByte()); + schemes["maskedvbyte"] = shared_ptr(new MaskedVByte()); + schemes["streamvbyte"] = shared_ptr(new StreamVByteD1()); + schemes["frameofreference"] = + shared_ptr(new FrameOfReference()); + + schemes["simdframeofreference"] = + shared_ptr(new SIMDFrameOfReference()); + + schemes["varintgb"] = std::shared_ptr(new VarIntGB()); + + schemes["s4-fastpfor-d4"] = shared_ptr( + new CompositeCodec, leftovercodec>()); + schemes["s4-fastpfor-dm"] = shared_ptr( + new CompositeCodec, VariableByte>()); + schemes["s4-fastpfor-d1"] = shared_ptr( + new CompositeCodec, leftovercodec>()); + schemes["s4-fastpfor-d2"] = shared_ptr( + new CompositeCodec, leftovercodec>()); + + schemes["bp32"] = shared_ptr( + new CompositeCodec, + VariableByte>()); + schemes["ibp32"] = shared_ptr( + new CompositeCodec, + leftovercodec>()); + + schemes["s4-bp128-d1-ni"] = shared_ptr( + new CompositeCodec< + SIMDBinaryPacking>, + leftovercodec>()); + schemes["s4-bp128-d2-ni"] = shared_ptr( + new CompositeCodec< + SIMDBinaryPacking>, + leftovercodec>()); + schemes["s4-bp128-d4-ni"] = shared_ptr( + new CompositeCodec< + SIMDBinaryPacking>, + leftovercodec>()); + schemes["s4-bp128-dm-ni"] = shared_ptr( + new CompositeCodec< + SIMDBinaryPacking>, + leftovercodec>()); + + schemes["s4-bp128-d1"] = shared_ptr( + new CompositeCodec< + SIMDBinaryPacking>, + leftovercodec>()); + schemes["s4-bp128-d2"] = shared_ptr( + new CompositeCodec< + SIMDBinaryPacking>, + leftovercodec>()); + schemes["s4-bp128-d4"] = shared_ptr( + new CompositeCodec< + SIMDBinaryPacking>, + leftovercodec>()); + schemes["s4-bp128-dm"] = shared_ptr( + new CompositeCodec< + SIMDBinaryPacking>, + leftovercodec>()); + schemes["for"] = shared_ptr(new ForCODEC()); + return schemes; +} + +class CODECFactory { +public: + static map> scodecmap; + static shared_ptr defaultptr; + + // hacked for convenience + static vector> allSchemes() { + vector> ans; + for (auto i = scodecmap.begin(); i != scodecmap.end(); ++i) { + ans.push_back(i->second); + } + return ans; + } + + static vector allNames() { + vector ans; + for (auto i = scodecmap.begin(); i != scodecmap.end(); ++i) { + ans.push_back(i->first); + } + return ans; + } + + /** + * This function tries to determine whether the + * input is modified during compression. + */ + static bool modifiesInputDuringCompression(IntegerCODEC &v) { + vector test; + const uint32_t N = 2049; + for (uint32_t k = 0; k < N; ++k) + test.emplace_back(k); + vector out(N + 1024); + size_t outsize = out.size(); + v.encodeArray(test.data(), N, out.data(), outsize); + for (uint32_t k = 0; k < N; ++k) + if (test[k] != k) + return true; + return false; // granted this is not full-proof, but is ok in our context + } + + static string getName(IntegerCODEC &v) { + for (auto i = scodecmap.begin(); i != scodecmap.end(); ++i) { + if (i->second.get() == &v) + return i->first; + } + return "UNKNOWN"; + } + + static bool valid(string name) { + return (scodecmap.find(name) != scodecmap.end()); + } + + static shared_ptr &getFromName(string name) { + if (scodecmap.find(name) == scodecmap.end()) { + cerr << "name " << name << " does not refer to a CODEC." << endl; + cerr << "possible choices:" << endl; + for (auto i = scodecmap.begin(); i != scodecmap.end(); ++i) { + cerr << static_cast(i->first) + << endl; // useless cast, but just to be clear + } + return defaultptr; + } + return scodecmap[name]; + } +}; + +map> CODECFactory::scodecmap = + initializefactory(); + +shared_ptr CODECFactory::defaultptr = + shared_ptr(nullptr); +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_CODECFACTORY_H_ */ diff --git a/include/SIMDCompressionAndIntersection/codecs.h b/include/SIMDCompressionAndIntersection/codecs.h new file mode 100644 index 0000000..f90dddc --- /dev/null +++ b/include/SIMDCompressionAndIntersection/codecs.h @@ -0,0 +1,131 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +#ifndef SIMDCompressionAndIntersection_CODECS_H_ +#define SIMDCompressionAndIntersection_CODECS_H_ + +#include "common.h" +#include "util.h" +#include "bitpackinghelpers.h" + +namespace SIMDCompressionLib { + +using namespace std; + +class NotEnoughStorage : public std::runtime_error { +public: + size_t required; // number of 32-bit symbols required + NotEnoughStorage(const size_t req) : runtime_error(""), required(req) {} +}; + +class IntegerCODEC { +public: + /** + * You specify input and input length, as well as + * output and output length. nvalue gets modified to + * reflect how much was used. If the new value of + * nvalue is more than the original value, we can + * consider this a buffer overrun. + * + * You are responsible for allocating the memory (length + * for *in and nvalue for *out). + */ + virtual void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) = 0; + + /** + * Usage is similar to encodeArray except that it returns a pointer + * incremented from in. In theory it should be in+length. If the + * returned pointer is less than in+length, then this generally means + * that the decompression is not finished (some scheme compress + * the bulk of the data one way, and they then they compress remaining + * integers using another scheme). + * + * As with encodeArray, you need to have length elements allocated + * for *in and at least nvalue elements allocated for out. The value + * of the variable nvalue gets updated with the number actually used + * (if nvalue exceeds the original value, there might be a buffer + * overrun). + */ + virtual const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) = 0; + virtual ~IntegerCODEC() {} + + /** + * Will compress the content of a vector into + * another vector. + * + * This is offered for convenience. It might be slow. + */ + virtual vector compress(vector &data) { + vector compresseddata(data.size() * 2 + + 1024); // allocate plenty of memory + size_t memavailable = compresseddata.size(); + encodeArray(data.data(), data.size(), compresseddata.data(), memavailable); + compresseddata.resize(memavailable); + return compresseddata; + } + + /** + * Will uncompress the content of a vector into + * another vector. Some CODECs know exactly how much data to uncompress, + * others need to uncompress it all to know how data there is to uncompress... + * So it useful to have a hint (expected_uncompressed_size) that tells how + * much data there will be to uncompress. Otherwise, the code will + * try to guess, but the result is uncertain and inefficient. You really + * ought to keep track of how many symbols you had compressed. + * + * For convenience. Might be slow. + */ + virtual vector uncompress(vector &compresseddata, + size_t expected_uncompressed_size = 0) { + vector data( + expected_uncompressed_size); // allocate plenty of memory + size_t memavailable = data.size(); + try { + decodeArray(compresseddata.data(), compresseddata.size(), data.data(), + memavailable); + } catch (NotEnoughStorage &nes) { + data.resize(nes.required + 1024); + decodeArray(compresseddata.data(), compresseddata.size(), data.data(), + memavailable); + } + data.resize(memavailable); + return data; + } + + virtual string name() const = 0; +}; + +/****************** + * This just copies the data, no compression. + */ +class JustCopy : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + memcpy(out, in, sizeof(uint32_t) * length); + nvalue = length; + } + // like encodeArray, but we don't actually copy + void fakeencodeArray(const uint32_t * /*in*/, const size_t length, + size_t &nvalue) { + nvalue = length; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + memcpy(out, in, sizeof(uint32_t) * length); + nvalue = length; + return in + length; + } + string name() const { return "JustCopy"; } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_CODECS_H_ */ diff --git a/include/SIMDCompressionAndIntersection/common.h b/include/SIMDCompressionAndIntersection/common.h new file mode 100644 index 0000000..ffb22d3 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/common.h @@ -0,0 +1,60 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +#ifndef SIMDCompressionAndIntersection_COMMON_H_ +#define SIMDCompressionAndIntersection_COMMON_H_ + +#include +#include +#include +#include +#include +#ifndef _MSC_VER +#include +#endif +#include +#include +#include +#include +#ifndef _MSC_VER +#include +#include +#endif +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "platform.h" + +#ifdef USE_ALIGNED +#define MM_LOAD_SI_128 _mm_load_si128 +#define MM_STORE_SI_128 _mm_store_si128 +#else +#define MM_LOAD_SI_128 _mm_loadu_si128 +#define MM_STORE_SI_128 _mm_storeu_si128 +#endif + +namespace SIMDCompressionLib {} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_COMMON_H_ */ diff --git a/include/SIMDCompressionAndIntersection/compositecodec.h b/include/SIMDCompressionAndIntersection/compositecodec.h new file mode 100644 index 0000000..c1e1577 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/compositecodec.h @@ -0,0 +1,69 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +#ifndef SIMDCompressionAndIntersection_COMPOSITECODEC_H_ +#define SIMDCompressionAndIntersection_COMPOSITECODEC_H_ + +#include "common.h" +#include "util.h" +#include "codecs.h" + +namespace SIMDCompressionLib { + +/** + * This is a useful class for CODEC that only compress + * data having length a multiple of some unit length. + */ +template +class CompositeCodec : public IntegerCODEC { +public: + CompositeCodec() : codec1(), codec2() {} + Codec1 codec1; + Codec2 codec2; + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + const size_t roundedlength = length / Codec1::BlockSize * Codec1::BlockSize; + size_t nvalue1 = nvalue; + codec1.encodeArray(in, roundedlength, out, nvalue1); + + if (roundedlength < length) { + ASSERT(nvalue >= nvalue1, nvalue << " " << nvalue1); + size_t nvalue2 = nvalue - nvalue1; + codec2.encodeArray(in + roundedlength, length - roundedlength, + out + nvalue1, nvalue2); + nvalue = nvalue1 + nvalue2; + } else { + nvalue = nvalue1; + } + } + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + const uint32_t *const initin(in); + size_t mynvalue1 = nvalue; + const uint32_t *in2 = codec1.decodeArray(in, length, out, mynvalue1); + if (length + in > in2) { + assert(nvalue > mynvalue1); + size_t nvalue2 = nvalue - mynvalue1; + const uint32_t *in3 = codec2.decodeArray(in2, length - (in2 - in), + out + mynvalue1, nvalue2); + nvalue = mynvalue1 + nvalue2; + assert(initin + length >= in3); + return in3; + } + nvalue = mynvalue1; + assert(initin + length >= in2); + return in2; + } + string name() const { + ostringstream convert; + convert << codec1.name() << "+" << codec2.name(); + return convert.str(); + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_COMPOSITECODEC_H_ */ diff --git a/include/SIMDCompressionAndIntersection/delta.h b/include/SIMDCompressionAndIntersection/delta.h new file mode 100644 index 0000000..4ca74c4 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/delta.h @@ -0,0 +1,86 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Leonid Boytsov, Nathan Kurz and Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_DELTA_H_ +#define SIMDCompressionAndIntersection_DELTA_H_ + +#include "common.h" + +namespace SIMDCompressionLib { + +/** + * To avoid crazy dependencies, this header should not + * include any other header file. + */ + +template void delta(const T initoffset, T *data, const size_t size) { + if (size == 0) + return; // nothing to do + if (size > 1) + for (size_t i = size - 1; i > 0; --i) { + data[i] -= data[i - 1]; + } + data[0] -= initoffset; +} + +template void delta(const T initoffset, T *data) { + if (size == 0) + return; // nothing to do + if (size > 1) + for (size_t i = size - 1; i > 0; --i) { + data[i] -= data[i - 1]; + } + data[0] -= initoffset; +} + +template +void inverseDelta(const T initoffset, T *data, const size_t size) { + if (size == 0) + return; // nothing to do + data[0] += initoffset; + const size_t UnrollQty = 4; + const size_t sz0 = + (size / UnrollQty) * UnrollQty; // equal to 0, if size < UnrollQty + size_t i = 1; + if (sz0 >= UnrollQty) { + T a = data[0]; + for (; i < sz0 - UnrollQty; i += UnrollQty) { + a = data[i] += a; + a = data[i + 1] += a; + a = data[i + 2] += a; + a = data[i + 3] += a; + } + } + for (; i != size; ++i) { + data[i] += data[i - 1]; + } +} +template void inverseDelta(const T initoffset, T *data) { + if (size == 0) + return; // nothing to do + data[0] += initoffset; + const size_t UnrollQty = 4; + const size_t sz0 = + (size / UnrollQty) * UnrollQty; // equal to 0, if size < UnrollQty + size_t i = 1; + if (sz0 >= UnrollQty) { + T a = data[0]; + for (; i < sz0 - UnrollQty; i += UnrollQty) { + a = data[i] += a; + a = data[i + 1] += a; + a = data[i + 2] += a; + a = data[i + 3] += a; + } + } + for (; i != size; ++i) { + data[i] += data[i - 1]; + } +} + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_DELTA_H_ */ diff --git a/include/SIMDCompressionAndIntersection/deltatemplates.h b/include/SIMDCompressionAndIntersection/deltatemplates.h new file mode 100644 index 0000000..160c36d --- /dev/null +++ b/include/SIMDCompressionAndIntersection/deltatemplates.h @@ -0,0 +1,164 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Leonid Boytsov, Nathan Kurz and Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_DELTATEMPLATES_H_ +#define SIMDCompressionAndIntersection_DELTATEMPLATES_H_ + +#include "common.h" + +namespace SIMDCompressionLib { + +/** + * To avoid crazy dependencies, this header should not + * include any other header file. + */ + +/** + * The structs RegularDeltaSIMD, NoDelta, CoarseDelta4SIMD, CoarseDelta2SIMD, + * Max4DeltaSIMD + * are used in templates to specify which type of differential encoding to use + * (if any). + * + * See SIMDDeltaProcessor + */ + +struct RegularDeltaSIMD { + // Folklore code, unknown origin of this idea + ALWAYS_INLINE + static __m128i PrefixSum(__m128i curr, __m128i prev) { + const __m128i _tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr); + const __m128i _tmp2 = _mm_add_epi32(_mm_slli_si128(_tmp1, 4), _tmp1); + return _mm_add_epi32(_tmp2, _mm_shuffle_epi32(prev, 0xff)); + } + + ALWAYS_INLINE + static __m128i Delta(__m128i curr, __m128i prev) { + return _mm_sub_epi32( + curr, _mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12))); + } + + static bool usesDifferentialEncoding() { return true; } + + static std::string name() { return "Delta1"; } +}; + +struct NoDelta { + ALWAYS_INLINE + static __m128i PrefixSum(__m128i curr, __m128i) { return curr; } + ALWAYS_INLINE + static __m128i Delta(__m128i curr, __m128i) { return curr; } + + static bool usesDifferentialEncoding() { return false; } + static std::string name() { return "NoDelta"; } +}; + +struct CoarseDelta4SIMD { + ALWAYS_INLINE + // Proposed and implemented by L. Boytosv + static __m128i PrefixSum(__m128i curr, __m128i prev) { + return _mm_add_epi32(curr, prev); + } + ALWAYS_INLINE + static __m128i Delta(__m128i curr, __m128i prev) { + return _mm_sub_epi32(curr, prev); + } + + static bool usesDifferentialEncoding() { return true; } + + static std::string name() { return "Delta4"; } +}; + +struct CoarseDelta2SIMD { + ALWAYS_INLINE + // Proposed and implemented by L. Boytosv + static __m128i PrefixSum(__m128i curr, __m128i prev) { + const __m128i _tmp1 = _mm_add_epi32(_mm_slli_si128(curr, 8), curr); + return _mm_add_epi32(_tmp1, + _mm_shuffle_epi32(prev, _MM_SHUFFLE(3, 2, 3, 2))); + } + ALWAYS_INLINE + static __m128i Delta(__m128i curr, __m128i prev) { + return _mm_sub_epi32( + curr, _mm_or_si128(_mm_slli_si128(curr, 8), _mm_srli_si128(prev, 8))); + } + static bool usesDifferentialEncoding() { return true; } + + static std::string name() { return "Delta2"; } +}; + +struct Max4DeltaSIMD { + ALWAYS_INLINE + // The idea is due to N. Kurz + static __m128i PrefixSum(__m128i curr, __m128i prev) { + return _mm_add_epi32(curr, _mm_shuffle_epi32(prev, 0xff)); + } + ALWAYS_INLINE + static __m128i Delta(__m128i curr, __m128i prev) { + return _mm_sub_epi32(curr, _mm_shuffle_epi32(prev, 0xff)); + } + static std::string name() { return "DeltaM4"; } + + static bool usesDifferentialEncoding() { return true; } +}; + +/** + * Wrapper around the structs RegularDeltaSIMD, NoDelta, CoarseDelta4SIMD, + * CoarseDelta2SIMD, Max4DeltaSIMD + * to compute differential encoding and prefix sums. + */ +template struct SIMDDeltaProcessor { + static __m128i runPrefixSum(__m128i initOffset, uint32_t *pData) { + const size_t QtyDivBy4 = TotalQty / 4; + // The block should contain 8N 32-bit integers, where N is some integer + assert(QtyDivBy4 % 2 == 0); + + __m128i *pCurr = reinterpret_cast<__m128i *>(pData); + const __m128i *pEnd = pCurr + QtyDivBy4; + + // Leonid Boytsov: manual loop unrolling may be crucial here. + while (pCurr < pEnd) { + initOffset = DeltaHelper::PrefixSum(MM_LOAD_SI_128(pCurr), initOffset); + MM_STORE_SI_128(pCurr++, initOffset); + + initOffset = DeltaHelper::PrefixSum(MM_LOAD_SI_128(pCurr), initOffset); + MM_STORE_SI_128(pCurr++, initOffset); + } + + return initOffset; + } + + static void runDelta(__m128i initOffset, uint32_t *pData) { + const size_t QtyDivBy4 = TotalQty / 4; + // The block should contain 8N 32-bit integers, where N is some integer + assert(QtyDivBy4 && QtyDivBy4 % 2 == 0); + __m128i *pCurr = reinterpret_cast<__m128i *>(pData) + QtyDivBy4 - 1; + __m128i *pStart = reinterpret_cast<__m128i *>(pData); + __m128i a = MM_LOAD_SI_128(pCurr); + // Leonid Boytsov: manual loop unrolling may be crucial here. + while (pCurr > pStart + 1) { + __m128i b = MM_LOAD_SI_128(pCurr - 1); + MM_STORE_SI_128(pCurr, DeltaHelper::Delta(a, b)); + a = b; + --pCurr; + + b = MM_LOAD_SI_128(pCurr - 1); + MM_STORE_SI_128(pCurr, DeltaHelper::Delta(a, b)); + a = b; + --pCurr; + } + + __m128i b = MM_LOAD_SI_128(pStart); + MM_STORE_SI_128(pStart + 1, DeltaHelper::Delta(a, b)); + a = b; + + MM_STORE_SI_128(pStart, DeltaHelper::Delta(a, initOffset)); + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_DELTATEMPLATES_H_ */ diff --git a/include/SIMDCompressionAndIntersection/fastpfor.h b/include/SIMDCompressionAndIntersection/fastpfor.h new file mode 100644 index 0000000..217a061 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/fastpfor.h @@ -0,0 +1,367 @@ +/* + * This is the non-SIMD version of FastPFOR. + * It is not recommended per se, only provided for + * comparison purposes. + */ + +#ifndef SIMDCompressionAndIntersection_FASTPFOR_H_ +#define SIMDCompressionAndIntersection_FASTPFOR_H_ + +#include "common.h" +#include "codecs.h" +#include "bitpackinghelpers.h" +#include "util.h" +#include "delta.h" + +namespace SIMDCompressionLib { + +class ScalarSortedBitPacker { +public: + enum { DEFAULTSIZE = 128 }; + uint32_t buffer[32]; + + ScalarSortedBitPacker() { + for (uint32_t i = 0; i < 32; ++i) { + data[i] = new uint32_t[DEFAULTSIZE]; + memset(data[i], 0, DEFAULTSIZE * sizeof(uint32_t)); + actualsizes[i] = DEFAULTSIZE; + } + clear(); + } + + void reset() { + for (uint32_t i = 0; i < 32; ++i) { + delete[] data[i]; + data[i] = new uint32_t[DEFAULTSIZE]; + memset(data[i], 0, DEFAULTSIZE * sizeof(uint32_t)); + actualsizes[i] = DEFAULTSIZE; + } + clear(); + } + + ~ScalarSortedBitPacker() { free(); } + void free() { + clear(); + for (uint32_t i = 0; i < 32; ++i) + if (data[i] != NULL) { + delete[] data[i]; + data[i] = NULL; + actualsizes[i] = 0; + } + } + void directAppend(uint32_t i, uint32_t val) { data[i][sizes[i]++] = val; } + + const uint32_t *get(int i) { return data[i]; } + + void ensureCapacity(int i, uint32_t datatoadd) { + if (sizes[i] + datatoadd > actualsizes[i]) { + actualsizes[i] = (sizes[i] + datatoadd + 127) / 128 * 128 * 2; + uint32_t *tmp = new uint32_t[actualsizes[i]]; + for (uint32_t j = 0; j < sizes[i]; ++j) + tmp[j] = data[i][j]; + delete[] data[i]; + data[i] = tmp; + } + } + + void clear() { + for (uint32_t i = 0; i < 32; ++i) + sizes[i] = 0; // memset "might" be faster. + } + + uint32_t *write(uint32_t *out) { + uint32_t bitmap = 0; + for (uint32_t k = 1; k < 32; ++k) { + if (sizes[k] != 0) + bitmap |= (1U << k); + } + *(out++) = bitmap; + + for (uint32_t k = 1; k < 32; ++k) { + if (sizes[k] != 0) { + *out = sizes[k]; + out++; + uint32_t j = 0; + for (; j < sizes[k]; j += 32) { + BitPackingHelpers::fastpackwithoutmask(&data[k][j], out, k + 1); + out += k + 1; + } + out -= (j - sizes[k]) * (k + 1) / 32; + } + } + return out; + } + + const uint32_t *read(const uint32_t *in) { + clear(); + const uint32_t bitmap = *(in++); + + for (uint32_t k = 1; k < 32; ++k) { + if ((bitmap & (1U << k)) != 0) { + sizes[k] = *in++; + if (actualsizes[k] < sizes[k]) { + delete[] data[k]; + actualsizes[k] = (sizes[k] + 31) / 32 * 32; + data[k] = new uint32_t[actualsizes[k]]; + } + uint32_t j = 0; + for (; j + 31 < sizes[k]; j += 32) { + BitPackingHelpers::fastunpack(in, &data[k][j], k + 1); + in += k + 1; + } + uint32_t remaining = sizes[k] - j; + memcpy(buffer, in, (remaining * (k + 1) + 31) / 32 * sizeof(uint32_t)); + uint32_t *bpointer = buffer; + in += ((sizes[k] + 31) / 32 * 32 - j) / 32 * (k + 1); + for (; j < sizes[k]; j += 32) { + BitPackingHelpers::fastunpack(bpointer, &data[k][j], k + 1); + bpointer += k + 1; + } + in -= (j - sizes[k]) * (k + 1) / 32; + } + } + return in; + } + +private: + uint32_t *data[32]; + uint32_t sizes[32]; + uint32_t actualsizes[32]; + + // we don't want anyone to start copying this class + ScalarSortedBitPacker(const ScalarSortedBitPacker &); + ScalarSortedBitPacker &operator=(const ScalarSortedBitPacker &); +}; + +/** + * FastPFor + * + * Reference and documentation: + * + * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second + * through vectorization + * http://arxiv.org/abs/1209.2137 + * + * Designed by D. Lemire with ideas from Leonid Boytsov. This scheme is NOT + * patented. + * + */ +template // BlockSizeInUnitsOfPackSize should be 4 or 8 +class FastPFor : public IntegerCODEC { +public: + /** + * ps (page size) should be a multiple of BlockSize, any "large" + * value should do. + */ + FastPFor(uint32_t ps = 65536) + : PageSize(ps), bitsPageSize(gccbits(PageSize)), bpacker(), + bytescontainer(PageSize + 3 * PageSize / BlockSize) { + assert(ps / BlockSize * BlockSize == ps); + assert(gccbits(BlockSizeInUnitsOfPackSize * PACKSIZE - 1) <= 8); + } + enum { + PACKSIZE = 32, + overheadofeachexcept = 8, + overheadduetobits = 8, + overheadduetonmbrexcept = 8, + BlockSize = BlockSizeInUnitsOfPackSize * PACKSIZE + }; + + const uint32_t PageSize; + const uint32_t bitsPageSize; + ScalarSortedBitPacker bpacker; + vector bytescontainer; + + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + const uint32_t *const initin(in); + const size_t mynvalue = *in; + ++in; + if (mynvalue > nvalue) + throw NotEnoughStorage(mynvalue); + nvalue = mynvalue; + const uint32_t *const finalout(out + nvalue); + uint32_t prev = 0; + while (out != finalout) { + size_t thisnvalue(0); + size_t thissize = static_cast( + finalout > PageSize + out ? PageSize : (finalout - out)); + + __decodeArray(in, thisnvalue, out, thissize, prev); + in += thisnvalue; + out += thissize; + } + assert(initin + length >= in); + bpacker.reset(); // if you don't do this, the codec has a "memory". + return in; + } + + /** + * If you save the output and recover it in memory, you are + * responsible to ensure that the alignment is preserved. + * + * The input size (length) should be a multiple of + * BlockSizeInUnitsOfPackSize * PACKSIZE. (This was done + * to simplify slightly the implementation.) + */ + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + checkifdivisibleby(length, BlockSize); + const uint32_t *const initout(out); + const uint32_t *const finalin(in + length); + + *out++ = static_cast(length); + const size_t oldnvalue = nvalue; + nvalue = 1; + uint32_t prev = 0; + while (in != finalin) { + size_t thissize = static_cast( + finalin > PageSize + in ? PageSize : (finalin - in)); + size_t thisnvalue(0); + __encodeArray(in, thissize, out, thisnvalue, prev); + nvalue += thisnvalue; + out += thisnvalue; + in += thissize; + } + assert(out == nvalue + initout); + if (oldnvalue < nvalue) + std::cerr + << "It is possible we have a buffer overrun. You reported having allocated " + << oldnvalue * sizeof(uint32_t) + << " bytes for the compressed data but we needed " + << nvalue * sizeof(uint32_t) + << " bytes. Please increase the available memory" + " for compressed data or check the value of the last parameter provided " + " to the encodeArray method." << std::endl; + bpacker.reset(); // if you don't do this, the buffer has a memory + } + + void getBestBFromData(const uint32_t *in, uint8_t &bestb, + uint8_t &bestcexcept, uint8_t &maxb) { + uint32_t freqs[33]; + for (uint32_t k = 0; k <= 32; ++k) + freqs[k] = 0; + for (uint32_t k = 0; k < BlockSize; ++k) { + freqs[asmbits(in[k])]++; + } + bestb = 32; + while (freqs[bestb] == 0) + bestb--; + maxb = bestb; + uint32_t bestcost = bestb * BlockSize; + uint32_t cexcept = 0; + bestcexcept = static_cast(cexcept); + for (uint32_t b = bestb - 1; b < 32; --b) { + cexcept += freqs[b + 1]; + uint32_t thiscost = cexcept * overheadofeachexcept + + cexcept * (maxb - b) + b * BlockSize + + 8; // the extra 8 is the cost of storing maxbits + if (bestb - b == 1) + thiscost -= cexcept; + if (thiscost < bestcost) { + bestcost = thiscost; + bestb = static_cast(b); + bestcexcept = static_cast(cexcept); + } + } + } + + void __encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue, uint32_t &prev) { + uint32_t *const initout = out; // keep track of this + checkifdivisibleby(length, BlockSize); + uint32_t *const headerout = out++; // keep track of this + bpacker.clear(); + uint8_t *bc = bytescontainer.data(); + for (const uint32_t *const final = in + length; (in + BlockSize <= final); + in += BlockSize) { + uint8_t bestb, bestcexcept, maxb; + if (useDelta) { + uint32_t nextprev = in[BlockSize - 1]; + delta(prev, in, BlockSize); + prev = nextprev; + } + getBestBFromData(in, bestb, bestcexcept, maxb); + *bc++ = bestb; + *bc++ = bestcexcept; + if (bestcexcept > 0) { + *bc++ = maxb; + bpacker.ensureCapacity(maxb - bestb - 1, bestcexcept); + const uint32_t maxval = 1U << bestb; + for (uint32_t k = 0; k < BlockSize; ++k) { + if (in[k] >= maxval) { + bpacker.directAppend(maxb - bestb - 1, in[k] >> bestb); + *bc++ = static_cast(k); + } + } + } + for (size_t k = 0; k < BlockSizeInUnitsOfPackSize; ++k) { + BitPackingHelpers::fastpack(in + k * 32, out + k * bestb, bestb); + } + out += BlockSizeInUnitsOfPackSize * bestb; + } + headerout[0] = static_cast(out - headerout); + const uint32_t bytescontainersize = + static_cast(bc - bytescontainer.data()); + *(out++) = bytescontainersize; + memcpy(out, bytescontainer.data(), bytescontainersize); + out += (bytescontainersize + sizeof(uint32_t) - 1) / sizeof(uint32_t); + const uint32_t *const lastout = bpacker.write(out); + nvalue = lastout - initout; + } + + void __decodeArray(const uint32_t *in, size_t &length, uint32_t *out, + const size_t nvalue, uint32_t &prev) { + const uint32_t *const initin = in; + const uint32_t *const headerin = in++; + const uint32_t wheremeta = headerin[0]; + const uint32_t *inexcept = headerin + wheremeta; + const uint32_t bytesize = *inexcept++; + const uint8_t *bytep = reinterpret_cast(inexcept); + + inexcept += (bytesize + sizeof(uint32_t) - 1) / sizeof(uint32_t); + inexcept = bpacker.read(inexcept); + length = inexcept - initin; + const uint32_t *unpackpointers[32 + 1]; + for (uint32_t k = 1; k <= 32; ++k) { + unpackpointers[k] = bpacker.get(k - 1); + } + for (uint32_t run = 0; run < nvalue / BlockSize; ++run, out += BlockSize) { + const uint8_t b = *bytep++; + const uint8_t cexcept = *bytep++; + for (size_t k = 0; k < BlockSizeInUnitsOfPackSize; ++k) { + BitPackingHelpers::fastunpack(in + k * b, out + k * 32, b); + } + in += BlockSizeInUnitsOfPackSize * b; + if (cexcept > 0) { + const uint8_t maxbits = *bytep++; + if (maxbits - b == 1) { + for (uint32_t k = 0; k < cexcept; ++k) { + const uint8_t pos = *(bytep++); + out[pos] |= static_cast(1) << b; + } + } else { + const uint32_t *vals = unpackpointers[maxbits - b]; + unpackpointers[maxbits - b] += cexcept; + for (uint32_t k = 0; k < cexcept; ++k) { + const uint8_t pos = *(bytep++); + out[pos] |= vals[k] << b; + } + } + } + if (useDelta) { + inverseDelta(prev, out, BlockSize); + prev = out[BlockSize - 1]; + } + } + + assert(in == headerin + wheremeta); + } + + string name() const { return string("FastPFor") + (useDelta ? "Delta" : ""); } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_FASTPFOR_H_ */ diff --git a/include/SIMDCompressionAndIntersection/for.h b/include/SIMDCompressionAndIntersection/for.h new file mode 100644 index 0000000..cc989f6 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/for.h @@ -0,0 +1,285 @@ +/* + * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A fast implementation for Frame of Reference encoding. + * + * See the README.md file for more information, example code and references. + * + * Feel free to send comments/questions to chris@crupp.de. I am available + * for consulting. + */ + +#ifndef FOR_H_5580af15_4570_41f9_ba2b_8afb1400e81e +#define FOR_H_5580af15_4570_41f9_ba2b_8afb1400e81e + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Returns the size required to compress a sequence of |length| ints, + * each compressed with |bits| bits + * + * This function will NOT include any overhead required by + * for_compress_sorted() and for_compress_unsorted(). + * + * Invariant: bits <= 32 + */ +extern uint32_t for_compressed_size_bits(uint32_t length, uint32_t bits); + +/** + * Returns the size required to compress an unsorted sequence of |length| ints. + * + * This routine scans |in| for the min/max values and then calls + * for_compressed_size_bits(). + * + * The returned size will include the overhead required for + * for_compress_sorted() and for_compressed_unsorted(). + */ +extern uint32_t for_compressed_size_unsorted(const uint32_t *in, + uint32_t length); + +/** + * Returns the size required to compress a sorted sequence of |length| ints. + * + * This routine extracts min/max values at the beginning and end of + * the sequence, then calls for_compressed_size_bits(). It is therefore + * slightly faster than for_compressed_size_unsorted(). + * + * The returned size will include the overhead required for + * for_compress_sorted() and for_compressed_unsorted(). + */ +extern uint32_t for_compressed_size_sorted(const uint32_t *in, uint32_t length); + +/** + * Compresses a sequence of |length| ints at |in| and stores the result + * in |out|. + * + * |base| is the "offset" (or common delta value) of all ints. It is usually + * set to the minimum value of the uncompressed sequence. + * + * |bits| are the bits required to store a single integer. + * + * Returns the number of bytes used for compression. + * + * This is for advanced users who opt for storing |base| and |bits| on their + * own. This function is called by for_compress_sorted() and + * for_compress_unsorted(). + * + * Invariant: bits <= 32 + */ +extern uint32_t for_compress_bits(const uint32_t *in, uint8_t *out, + uint32_t length, uint32_t base, + uint32_t bits); + +/** + * Compresses an unsorted sequence of |length| ints at |in| and stores the + * result in |out|. + * + * This routine scans |in| for the min/max values and then calls + * for_compress_bits(). + * + * The minimun value and the bits are stored as metadata in |out|. + */ +extern uint32_t for_compress_unsorted(const uint32_t *in, uint8_t *out, + uint32_t length); + +/** + * Compresses a sorted sequence of |length| ints at |in| and stores the + * result in |out|. + * + * This routine extracts min/max values at the beginning and end of + * the sequence, then calls for_compress_bits(). + * + * The minimun value and the bits are stored as metadata in |out|. + */ +extern uint32_t for_compress_sorted(const uint32_t *in, uint8_t *out, + uint32_t length); + +/** + * Uncompresses a sequence of |length| ints at |in| and stores the + * result in |out|. + * + * |base| is the "offset" (or common delta value) of all ints. It is usually + * set to the minimum value of the uncompressed sequence. + * + * |bits| are the bits required to store a single integer. + * + * Returns the number of compressed bytes processed. + * + * This function is for advanced users. It is the counterpart of + * for_compress_bits(). + * + * Invariant: bits <= 32 + */ +extern uint32_t for_uncompress_bits(const uint8_t *in, uint32_t *out, + uint32_t length, uint32_t base, + uint32_t bits); + +/** + * Uncompresses a sequence of |length| ints at |in| and stores the + * result in |out|. + * + * This function is a convenience wrapper for for_uncompress_bits(). It + * expects metadata at the beginning of |in|. Use in combination with + * for_compress_sorted() and for_compress_unsorted(). + * + * Returns the number of compressed bytes processed. + */ +extern uint32_t for_uncompress(const uint8_t *in, uint32_t *out, + uint32_t length); + +/** + * Appends a |value| to a compressed sequence of unsorted integers. + * + * This function is optimized for appending new values at the end of an + * encoded sequence. This is only possible if the new value (more precisely: + * the delta of the new value) can be stored in the same amount of bits that + * were used to encode the other integers. + * + * If this is not the case then memory is allocated, the whole sequence is + * decoded and re-encoded using more bits. This requires a heap allocation + * with malloc(). + * + * Returns the size (in bytes) of the compressed data, or 0 if malloc() fails. + */ +extern uint32_t for_append_unsorted(uint8_t *in, uint32_t length, + uint32_t value); + +/** + * Appends a |value| to a compressed sequence of sorted integers. + * + * This function is optimized for appending new values at the end of an + * encoded sequence. This is only possible if the new value (more precisely: + * the delta of the new value) can be stored in the same amount of bits that + * were used to encode the other integers. + * + * If this is not the case then memory is allocated, the whole sequence is + * decoded and re-encoded using more bits. This requires a heap allocation + * with malloc(). + * + * Returns the size (in bytes) of the compressed data, or 0 if malloc() fails. + */ +extern uint32_t for_append_sorted(uint8_t *in, uint32_t length, uint32_t value); + +/** + * Appends a |value| to a compressed integer sequence. + * + * |base| is the "offset" (or common delta value) of all ints. It is usually + * set to the minimum value of the uncompressed sequence. + * + * |bits| are the bits required to store a single integer. + * + * Returns the size (in bytes) of the compressed data. + * + * Invariant: bits <= 32 + * Invariant: the new |value| (more precisely: |value - base|) can be stored + * in |bits| bits. Details can be found in the implementation of + * for_append() in for.c. + */ +extern uint32_t for_append_bits(uint8_t *in, uint32_t length, uint32_t base, + uint32_t bits, uint32_t value); + +/** + * Returns the value at the given |index| from a compressed sequence. + * + * Make sure that |index| does not exceed the length of the sequence. + * + * |base| is the "offset" (or common delta value) of all ints. It is usually + * set to the minimum value of the uncompressed sequence. + * + * Invariant: bits <= 32 + */ +extern uint32_t for_select_bits(const uint8_t *in, uint32_t base, uint32_t bits, + uint32_t index); + +/** + * Returns the value at the given |index| from a compressed sequence. + * + * Make sure that |index| does not exceed the length of the sequence. + * + * This function is a convenience wrapper for for_select_bits(). It + * expects metadata at the beginning of |in|. Use in combination with + * for_compress_sorted() and for_compress_unsorted(). + */ +extern uint32_t for_select(const uint8_t *in, uint32_t index); + +/** + * Performs a linear search for |value|. + * + * Returns the index of the found element, or |length| if the key was not + * found. + * + * This function is a convenience wrapper for for_linear_search_bits(). It + * expects metadata at the beginning of |in|. Use in combination with + * for_compress_sorted() and for_compress_unsorted(). + */ +extern uint32_t for_linear_search(const uint8_t *in, uint32_t length, + uint32_t value); + +/** + * Performs a linear search for |value|. + * + * Returns the index of the found element, or |length| if the key was not + * found. + * + * |base| is the "offset" (or common delta value) of all ints. It is usually + * set to the minimum value of the uncompressed sequence. + * + * Invariant: bits <= 32 + */ +extern uint32_t for_linear_search_bits(const uint8_t *in, uint32_t length, + uint32_t base, uint32_t bits, + uint32_t value); + +/** + * Performs lower bound binary search search for |value|. + * + * A lower bound search returns the first element in the sequence which does + * not compare less than |value|. + * The actual result is stored in |*actual|. + * + * This function is a convenience wrapper for for_lower_bound_search_bits(). It + * expects metadata at the beginning of |in|. Use in combination with + * for_compress_sorted() and for_compress_unsorted(). + */ +extern uint32_t for_lower_bound_search(const uint8_t *in, uint32_t length, + uint32_t value, uint32_t *actual); + +/** + * Performs lower bound binary search search for |value|. + * + * A lower bound search returns the first element in the sequence which does + * not compare less than |value|. + * The actual result is stored in |*actual|. + * + * |base| is the "offset" (or common delta value) of all ints. It is usually + * set to the minimum value of the uncompressed sequence. + * + * Invariant: bits <= 32 + */ +extern uint32_t for_lower_bound_search_bits(const uint8_t *in, uint32_t length, + uint32_t base, uint32_t bits, + uint32_t value, uint32_t *actual); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* FOR_H_5580af15_4570_41f9_ba2b_8afb1400e81e */ diff --git a/include/SIMDCompressionAndIntersection/forcodec.h b/include/SIMDCompressionAndIntersection/forcodec.h new file mode 100644 index 0000000..6c53aeb --- /dev/null +++ b/include/SIMDCompressionAndIntersection/forcodec.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Frame of Reference encoding + * + * based on code from http://github.com/cruppstahl/for + */ + +#ifndef INCLUDE_FOR_H +#define INCLUDE_FOR_H + +#include "common.h" +#include "codecs.h" +#include "util.h" +#include "for.h" + +namespace SIMDCompressionLib { + +/* + * Optimized scalar implementation of FOR (frame-of-reference) compression + */ +class ForCODEC : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + uint32_t cappedLength = static_cast( + std::min(length, std::numeric_limits::max())); + *(uint32_t *)out = cappedLength; + out++; + // for_compress_sorted() would be a bit faster, but requires + // sorted input + nvalue = (4 + for_compress_unsorted((const uint32_t *)in, (uint8_t *)out, + cappedLength) + + 3) / + 4; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t, uint32_t *out, + size_t &nvalue) { + nvalue = *in; + in++; + return in + for_uncompress((const uint8_t *)in, out, + static_cast(nvalue)); + } + + // append a key. + // Returns the new size of the compressed array *in bytes* + size_t appendToByteArray(uint8_t *in, const size_t bytesize, + uint32_t /*previous_key*/, uint32_t key) { + return append(in, bytesize, key); + } + + size_t append(uint8_t *in, const size_t /* unused */, uint32_t value) { + uint32_t length = *(uint32_t *)in; + size_t s = for_append_unsorted(in + 4, length, value); + *(uint32_t *)in = length + 1; + return s + 4; + } + + size_t findLowerBound(const uint32_t *in, const size_t, uint32_t key, + uint32_t *presult) { + uint32_t length = *in; + in++; + return (size_t)for_lower_bound_search((const uint8_t *)in, length, key, + presult); + } + + uint32_t select(const uint32_t *in, size_t index) { + in++; // Skip length + return for_select((const uint8_t *)in, static_cast(index)); + } + + string name() const { return "For"; } +}; + +} /* namespace */ + +#endif /* INCLUDE_FOR_H */ diff --git a/include/SIMDCompressionAndIntersection/frameofreference.h b/include/SIMDCompressionAndIntersection/frameofreference.h new file mode 100644 index 0000000..8bf35c3 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/frameofreference.h @@ -0,0 +1,134 @@ + + +#ifndef INCLUDE_FRAMEOFREFERENCE_H_ +#define INCLUDE_FRAMEOFREFERENCE_H_ + +#include "common.h" +#include "codecs.h" +#include "util.h" + +namespace SIMDCompressionLib { + +/** + * Simple implementation of FOR compression using blocks of + * 32 integers. + * + * This implementation is inferior to ForCODEC. Please use + * ForCODEC instead. + * + * FOR does not compress particularly well but it supports + * fast random access. + */ +class FrameOfReference : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + *out = static_cast( + std::min(length, std::numeric_limits::max())); + uint32_t *finalout = compress_length(in, *out, out + 1); + nvalue = finalout - out; + } + + const uint32_t *uncompress_length(const uint32_t *in, uint32_t *out, + uint32_t nvalue); + uint32_t *compress_length(const uint32_t *in, uint32_t length, uint32_t *out); + + const uint32_t *decodeArray(const uint32_t *in, const size_t, uint32_t *out, + size_t &nvalue) { + nvalue = *in; + in++; + return uncompress_length(in, out, static_cast(nvalue)); + } + + // appends the value "value" at the end of the compressed stream. Assumes that + // we have + // the space to do so. + // returns the next (total) size of the compressed output in bytes + // the "currentcompressedsizeinbytes" should be zero when no data has been + // compressed yet + size_t append(uint8_t *inbyte, const size_t currentcompressedsizeinbytes, + uint32_t value); + + // append a key. + // Returns the new size of the compressed array *in bytes* + size_t appendToByteArray(uint8_t *in, const size_t bytesize, + uint32_t /*previous_key*/, uint32_t key) { + return append(in, bytesize, key); + } + + // Performs a lower bound find in the encoded array. + // Returns the index + size_t findLowerBound(const uint32_t *in, const size_t length, uint32_t key, + uint32_t *presult); + + // Returns a decompressed value in an encoded array + uint32_t select(const uint32_t *in, size_t index); + + string name() const { return "FrameOfReference"; } + +private: +}; + +/** + * Accelerated implementation of FOR compression using blocks of + * 128 integers. . + * + * + * FOR does not compress particularly well but it supports + * fast random access. + */ +class SIMDFrameOfReference : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + *out = static_cast( + std::min(length, std::numeric_limits::max())); + uint32_t *finalout = simd_compress_length(in, *out, out + 1); + nvalue = finalout - out; + } + + const uint32_t *simd_uncompress_length(const uint32_t *in, uint32_t *out, + uint32_t nvalue); + uint32_t *simd_compress_length(const uint32_t *in, uint32_t length, + uint32_t *out); + uint32_t *simd_compress_length_sorted(const uint32_t *in, uint32_t length, + uint32_t *out); + + const uint32_t *decodeArray(const uint32_t *in, const size_t, uint32_t *out, + size_t &nvalue) { + nvalue = *in; + in++; + return simd_uncompress_length(in, out, static_cast(nvalue)); + } + + // appends the value "value" at the end of the compressed stream. Assumes that + // we have + // the space to do so. + // returns the next (total) size of the compressed output in bytes + // the "currentcompressedsizeinbytes" should be zero when no data has been + // compressed yet + size_t append(uint8_t *inbyte, const size_t currentcompressedsizeinbytes, + uint32_t value); + + // append a key. + // Returns the new size of the compressed array *in bytes* + size_t appendToByteArray(uint8_t *in, const size_t bytesize, + uint32_t /* previous_key*/, uint32_t key) { + return append(in, bytesize, key); + } + + // Performs a lower bound find in the encoded array. + // Returns the index + size_t findLowerBound(const uint32_t *in, const size_t length, uint32_t key, + uint32_t *presult); + + // Returns a decompressed value in an encoded array + uint32_t select(const uint32_t *in, size_t index); + + string name() const { return "SIMDFrameOfReference"; } + +private: +}; +} + +#endif /* INCLUDE_FRAMEOFREFERENCE_H_ */ diff --git a/include/SIMDCompressionAndIntersection/hybm2.h b/include/SIMDCompressionAndIntersection/hybm2.h new file mode 100644 index 0000000..c01fb78 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/hybm2.h @@ -0,0 +1,642 @@ +/* + * This is an implementation of the hyb+m2 method proposed in: + * + * J. S. Culpepper and A. Moffat. Efficient set intersection for + * inverted indexing. ACM Trans. Inf. Syst., 29(1):1:1Ð1:25, Dec. 2010. + * + * Implemented by Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_HYBM2_H_ +#define SIMDCompressionAndIntersection_HYBM2_H_ + +#include "common.h" +#include "codecs.h" +#include "codecfactory.h" +#include "boolarray.h" +#include "intersection.h" +#include "skipping.h" + +namespace SIMDCompressionLib { + +class HybM2 { +public: + // th = 0 means that we select bitmaps as needed + HybM2(IntegerCODEC &c, intersectionfunction inter, uint32_t MaxId, + vector &recovbuffer, uint32_t th = 32) + : bitmapmap(), shortlistmap(), mapuncompsizes(), mMaxId(MaxId), + threshold(th), recovbuffer(recovbuffer), codec(c), Inter(inter) {} + + /** + * Returns the *uncompressed* size of a given posting list (specified + * by ID). + */ + size_t getSizeInInts(uint32_t postId) { return mapuncompsizes[postId]; } + + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * into the data structure. The data will be either converted to a bitmap or + * compressed. + */ + size_t load(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (threshold == 0) + return loadOptimized(postid, data, length); + else if (length * threshold >= mMaxId) + return loadAsBitmap(postid, data, length); + else + return loadAsShortArray(postid, data, length); + } + + /** + * Check whether we have a posting list corresponding to postid + */ + bool hasBeenLoaded(const uint32_t postid) { + return ((shortlistmap.find(postid) != shortlistmap.end()) || + (bitmapmap.find(postid) != bitmapmap.end())); + } + + /** + * Compute the total of the volume of of posting lists + * corresponding to a query. + */ + size_t computeUnpackVolume(const vector &ids) { + size_t answer = 0; + for (uint32_t id : ids) { + answer += mapuncompsizes[id]; + } + return answer; + } + + /** + * Compute the total of the intersection of of posting lists + * corresponding to a query, and output how many integers are + * in + * + * ids: the query as a set of posting ids + * out: write to result + * sizeout: will indicate how many integers were written to out. + * + * return unpack volume defined as the total volume of the posting + * lists that were needed to compute the intersection (which can + * be less than the total volume possible due to early abandoning). + */ + size_t intersect(const vector &ids, uint32_t *out, + size_t &sizeout) { + if (ids.empty()) { + sizeout = 0; + return 0; + } + vector>>> shortlists; + vector>> bitmaps; + // vector bitmapscard; + + for (uint32_t id : ids) { + if (shortlistmap.find(id) != shortlistmap.end()) + shortlists.push_back(make_pair(mapuncompsizes[id], shortlistmap[id])); + else { + assert(bitmapmap.find(id) != bitmapmap.end()); + bitmaps.push_back(make_pair(mapuncompsizes[id], bitmapmap[id])); + } + } + size_t unpackVolume = 0; + if (shortlists.empty()) { + if (bitmaps.size() == 1) { + sizeout = bitmaps.front().second->toInts(out); + unpackVolume += sizeout; + return unpackVolume; + } + + BoolArray answer(mMaxId); + bitmaps[0].second->intersect(*bitmaps[1].second, answer); + unpackVolume += bitmaps[0].first + bitmaps[1].first; + for (uint32_t i = 2; i < bitmaps.size(); ++i) { + answer.inplaceIntersect(*bitmaps[i].second); + unpackVolume += bitmaps[i].first; + } + sizeout = answer.toInts(out); + return unpackVolume; + } else { + sort(shortlists.begin(), shortlists.end()); + sort(bitmaps.begin(), bitmaps.end()); + codec.decodeArray(shortlists[0].second->data(), + shortlists[0].second->size(), out, sizeout); + assert(shortlists[0].first == sizeout); + unpackVolume += sizeout; + assert(sizeout == shortlists[0].first); + for (uint32_t i = 1; (sizeout > 0) && (i < shortlists.size()); ++i) { + size_t thissize = recovbuffer.size(); + codec.decodeArray(shortlists[i].second->data(), + shortlists[i].second->size(), recovbuffer.data(), + thissize); + unpackVolume += thissize; + assert(shortlists[i].first == thissize); + sizeout = Inter(out, sizeout, recovbuffer.data(), thissize, out); + } + size_t pos = 0; + for (uint32_t i = 0; (sizeout > 0) && (i < bitmaps.size()); ++i) { + unpackVolume += bitmaps[i].first; + shared_ptr &ba = bitmaps[i].second; + pos = 0; + for (uint32_t i = 0; i < sizeout; ++i) { + if (!ba->get(out[i])) + continue; + else + out[pos++] = out[i]; + } + sizeout = pos; + } + return unpackVolume; + } + } + + ~HybM2() {} + + /** + * Estimate of the volume of data used by this object. + */ + size_t storageInBytes() const { + size_t answer = 0; + for (auto i : bitmapmap) + answer += i.second->sizeInBytes(); + for (auto i : shortlistmap) + answer += i.second->size() * sizeof(uint32_t); + return answer; + } + + size_t sizeOfRecoveryBufferInWords() const { return recovbuffer.size(); } + +private: + // load as either a bitmap or a compressed short list + size_t loadOptimized(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (mapuncompsizes.find(postid) != mapuncompsizes.end()) + return 0; + vector *compressedbuffer = new vector(length + 1024); + size_t outlength = compressedbuffer->size(); + vector tmp( + data, + data + length); // use the buffer because some codecs modify the input + codec.encodeArray(tmp.data(), length, compressedbuffer->data(), outlength); + if (outlength * sizeof(uint32_t) < + BoolArray::sizeInBytes(mMaxId)) { // we are good + if (recovbuffer.size() < length) + recovbuffer.resize(length); + compressedbuffer->resize(outlength); + compressedbuffer->shrink_to_fit(); + shortlistmap[postid] = shared_ptr>(compressedbuffer); + mapuncompsizes[postid] = length; + return compressedbuffer->size(); + } else { + delete compressedbuffer; + return loadAsBitmap(postid, data, length); + } + } + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * as a bitmap. + * + * Do not call this directly, call load() instead. + */ + size_t loadAsBitmap(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (bitmapmap.find(postid) != bitmapmap.end()) + return 0; + BoolArray *ba = new BoolArray(mMaxId); + for (uint32_t k = 0; k < length; ++k) + ba->set(data[k]); + bitmapmap[postid] = shared_ptr(ba); + mapuncompsizes[postid] = length; + return ba->sizeInBytes() / sizeof(uint32_t); + } + + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * as a short array. + * + * Do not call this directly, call load() instead. + */ + size_t loadAsShortArray(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (shortlistmap.find(postid) != shortlistmap.end()) + return 0; + if (recovbuffer.size() < length) + recovbuffer.resize(length); + vector *compressedbuffer = new vector(length + 1024); + size_t outlength = compressedbuffer->size(); + for (size_t i = 0; i < length; + ++i) // use the buffer because some codecs modify the input + recovbuffer[i] = data[i]; + codec.encodeArray(recovbuffer.data(), length, compressedbuffer->data(), + outlength); + compressedbuffer->resize(outlength); + compressedbuffer->shrink_to_fit(); + shortlistmap[postid] = shared_ptr>(compressedbuffer); + mapuncompsizes[postid] = length; + return compressedbuffer->size(); + } + + map> bitmapmap; + map>> shortlistmap; + map mapuncompsizes; + + const size_t mMaxId; // max value that can be stored in a list + const size_t threshold; //// 32 seems to be the recommended setting, no need + ///to change it? + + vector &recovbuffer; + + IntegerCODEC &codec; // how we compress the short lists + intersectionfunction Inter; +}; + +/** + * This is a version of HybM2 without compression (other than the bitmaps). + */ +class UncompressedHybM2 { +public: + UncompressedHybM2(intersectionfunction inter, uint32_t MaxId, + uint32_t th = 32) + : bitmapmap(), shortlistmap(), mapuncompsizes(), mMaxId(MaxId), + threshold(th), Inter(inter) {} + + /** + * Returns the *uncompressed* size of a given posting list (specified + * by ID). + */ + size_t getSizeInInts(uint32_t postId) { return mapuncompsizes[postId]; } + + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * into the data structure. The data will be either converted to a bitmap or + * compressed. + */ + size_t load(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (length * threshold >= mMaxId) + return loadAsBitmap(postid, data, length); + else + return loadAsShortArray(postid, data, length); + } + + /** + * Check whether we have a posting list corresponding to postid + */ + bool hasBeenLoaded(const uint32_t postid) { + return ((shortlistmap.find(postid) != shortlistmap.end()) || + (bitmapmap.find(postid) != bitmapmap.end())); + } + + /** + * Compute the total of the volume of of posting lists + * corresponding to a query. + */ + size_t computeUnpackVolume(const vector &ids) { + size_t answer = 0; + for (uint32_t id : ids) { + answer += mapuncompsizes[id]; + } + return answer; + } + + /** + * Compute the total of the intersection of of posting lists + * corresponding to a query, and output how many integers are + * in + * + * ids: the query as a set of posting ids + * out: write to result + * sizeout: will indicate how many integers were written to out. + * + * return unpack volume defined as the total volume of the posting + * lists that were needed to compute the intersection (which can + * be less than the total volume possible due to early abandoning). + */ + size_t intersect(const vector &ids, uint32_t *out, + size_t &sizeout) { + if (ids.empty()) { + sizeout = 0; + return 0; + } + vector>>> shortlists; + vector>> bitmaps; + // vector bitmapscard; + + for (uint32_t id : ids) { + if (shortlistmap.find(id) != shortlistmap.end()) + shortlists.push_back(make_pair(mapuncompsizes[id], shortlistmap[id])); + else { + assert(bitmapmap.find(id) != bitmapmap.end()); + bitmaps.push_back(make_pair(mapuncompsizes[id], bitmapmap[id])); + } + } + size_t unpackVolume = 0; + if (shortlists.empty()) { + if (bitmaps.size() == 1) { + sizeout = bitmaps.front().second->toInts(out); + unpackVolume += sizeout; + return unpackVolume; + } + + BoolArray answer(mMaxId); + bitmaps[0].second->intersect(*bitmaps[1].second, answer); + unpackVolume += bitmaps[0].first + bitmaps[1].first; + for (uint32_t i = 2; i < bitmaps.size(); ++i) { + answer.inplaceIntersect(*bitmaps[i].second); + unpackVolume += bitmaps[i].first; + } + sizeout = answer.toInts(out); + return unpackVolume; + } else { + sort(shortlists.begin(), shortlists.end()); + sort(bitmaps.begin(), bitmaps.end()); + assert(sizeout >= shortlists[0].second->size()); + sizeout = shortlists[0].second->size(); + unpackVolume += shortlists[0].second->size(); + assert(sizeout == shortlists[0].first); + // we have to make a copy because by convention the output is not directly + // from the index + const vector &firstvector = *shortlists[0].second; + for (uint32_t i = 0; i < firstvector.size(); ++i) + out[i] = firstvector[i]; + for (uint32_t i = 1; (sizeout > 0) && (i < shortlists.size()); ++i) { + unpackVolume += shortlists[i].first; + sizeout = Inter(out, sizeout, shortlists[i].second->data(), + shortlists[i].second->size(), out); + } + size_t pos = 0; + for (uint32_t i = 0; (sizeout > 0) && (i < bitmaps.size()); ++i) { + unpackVolume += bitmaps[i].first; + shared_ptr &ba = bitmaps[i].second; + pos = 0; + for (uint32_t i = 0; i < sizeout; ++i) { + if (!ba->get(out[i])) + continue; + else + out[pos++] = out[i]; + } + sizeout = pos; + } + return unpackVolume; + } + } + + ~UncompressedHybM2() {} + + /** + * Estimate of the volume of data used by this object. + */ + size_t storageInBytes() const { + size_t answer = 0; + for (auto i : bitmapmap) + answer += i.second->sizeInBytes(); + for (auto i : shortlistmap) + answer += i.second->size() * sizeof(uint32_t); + return answer; + } + +private: + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * as a bitmap. + * + * Do not call this directly, call load() instead. + */ + size_t loadAsBitmap(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (bitmapmap.find(postid) != bitmapmap.end()) + return 0; + BoolArray *ba = new BoolArray(mMaxId); + for (uint32_t k = 0; k < length; ++k) + ba->set(data[k]); + bitmapmap[postid] = shared_ptr(ba); + mapuncompsizes[postid] = length; + return ba->sizeInBytes() / sizeof(uint32_t); + } + + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * as a short array. + * + * Do not call this directly, call load() instead. + */ + size_t loadAsShortArray(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (shortlistmap.find(postid) != shortlistmap.end()) + return 0; + mapuncompsizes[postid] = length; + vector *compressedbuffer = + new vector(data, data + length); + shortlistmap[postid] = shared_ptr>(compressedbuffer); + return compressedbuffer->size(); + } + + map> bitmapmap; + map>> shortlistmap; + map mapuncompsizes; + + const size_t mMaxId; // max value that can be stored in a list + const size_t threshold; //// 32 seems to be the recommended setting, no need + ///to change it? + + intersectionfunction Inter; +}; + +/** + * This is a version of HybM2 with a skipping data structure akin to what is + * described + * by Culpepper and Moffat. We seem to get no gain from this approach. + */ +class SkippingHybM2 { +public: + SkippingHybM2(uint32_t MaxId, uint32_t th = 32, uint32_t BS = 8) + : bitmapmap(), shortlistmap(), mapuncompsizes(), mMaxId(MaxId), + threshold(th), BlockSizeLog(BS) {} + + /** + * Returns the *uncompressed* size of a given posting list (specified + * by ID). + */ + size_t getSizeInInts(uint32_t postId) { return mapuncompsizes[postId]; } + + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * into the data structure. The data will be either converted to a bitmap or + * compressed. + */ + size_t load(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (length * threshold >= mMaxId) + return loadAsBitmap(postid, data, length); + else + return loadAsShortArray(postid, data, length); + } + + /** + * Check whether we have a posting list corresponding to postid + */ + bool hasBeenLoaded(const uint32_t postid) { + return ((shortlistmap.find(postid) != shortlistmap.end()) || + (bitmapmap.find(postid) != bitmapmap.end())); + } + + /** + * Compute the total of the volume of of posting lists + * corresponding to a query. + */ + size_t computeUnpackVolume(const vector &ids) { + size_t answer = 0; + for (uint32_t id : ids) { + answer += mapuncompsizes[id]; + } + return answer; + } + + /** + * Compute the total of the intersection of of posting lists + * corresponding to a query, and output how many integers are + * in + * + * ids: the query as a set of posting ids + * out: write to result + * sizeout: will indicate how many integers were written to out. + * + * return unpack volume defined as the total volume of the posting + * lists that were needed to compute the intersection (which can + * be less than the total volume possible due to early abandoning). + */ + size_t intersect(const vector &ids, uint32_t *out, + size_t &sizeout) { + if (ids.empty()) { + sizeout = 0; + return 0; + } + vector>> shortlists; + vector>> bitmaps; + for (uint32_t id : ids) { + if (shortlistmap.find(id) != shortlistmap.end()) + shortlists.push_back(make_pair(mapuncompsizes[id], shortlistmap[id])); + else { + assert(bitmapmap.find(id) != bitmapmap.end()); + bitmaps.push_back(make_pair(mapuncompsizes[id], bitmapmap[id])); + } + } + size_t unpackVolume = 0; + if (shortlists.empty()) { + if (bitmaps.size() == 1) { + sizeout = bitmaps.front().second->toInts(out); + unpackVolume += sizeout; + return unpackVolume; + } + + BoolArray answer(mMaxId); + bitmaps[0].second->intersect(*bitmaps[1].second, answer); + unpackVolume += bitmaps[0].first + bitmaps[1].first; + for (uint32_t i = 2; i < bitmaps.size(); ++i) { + answer.inplaceIntersect(*bitmaps[i].second); + unpackVolume += bitmaps[i].first; + } + sizeout = answer.toInts(out); + return unpackVolume; + } else { + sort(shortlists.begin(), shortlists.end()); + sort(bitmaps.begin(), bitmaps.end()); + if (shortlists.size() == 1) { + sizeout = shortlists[0].second->decompress(out); + unpackVolume += shortlists[0].second->Length; + } else { + unpackVolume += shortlists[0].second->Length; + unpackVolume += shortlists[1].second->Length; + sizeout = shortlists[0].second->intersect(*shortlists[1].second, out); + for (uint32_t i = 2; (sizeout > 0) && (i < shortlists.size()); ++i) { + unpackVolume += shortlists[i].first; + sizeout = shortlists[i].second->intersect(out, sizeout, out); + } + } + size_t pos = 0; + for (uint32_t i = 0; (sizeout > 0) && (i < bitmaps.size()); ++i) { + unpackVolume += bitmaps[i].first; + shared_ptr &ba = bitmaps[i].second; + pos = 0; + for (uint32_t i = 0; i < sizeout; ++i) { + if (!ba->get(out[i])) + continue; + else + out[pos++] = out[i]; + } + sizeout = pos; + } + return unpackVolume; + } + } + + ~SkippingHybM2() {} + + /** + * Estimate of the volume of data used by this object. + */ + size_t storageInBytes() const { + size_t answer = 0; + for (auto i : bitmapmap) + answer += i.second->sizeInBytes(); + for (auto i : shortlistmap) + answer += i.second->storageInBytes(); + return answer; + } + +private: + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * as a bitmap. + * + * Do not call this directly, call load() instead. + */ + size_t loadAsBitmap(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (bitmapmap.find(postid) != bitmapmap.end()) + return 0; + BoolArray *ba = new BoolArray(mMaxId); + for (uint32_t k = 0; k < length; ++k) + ba->set(data[k]); + bitmapmap[postid] = shared_ptr(ba); + mapuncompsizes[postid] = length; + return ba->sizeInBytes() / sizeof(uint32_t); + } + + /** + * Load an array (data) of length "length" as the posting list corresponding + * to id postid + * as a short array. + * + * Do not call this directly, call load() instead. + */ + size_t loadAsShortArray(const uint32_t postid, const uint32_t *data, + const uint32_t length) { + if (shortlistmap.find(postid) != shortlistmap.end()) + return 0; + + Skipping *compressedbuffer = new Skipping(BlockSizeLog, data, length); + shortlistmap[postid] = shared_ptr(compressedbuffer); + return compressedbuffer->storageInBytes() / sizeof(uint32_t); + } + + map> bitmapmap; + map> shortlistmap; + map mapuncompsizes; + + const size_t mMaxId; // max value that can be stored in a list + const size_t threshold; //// 32 seems to be the recommended setting, no need + ///to change it? + uint32_t BlockSizeLog; +}; +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_HYBM2_H_ */ diff --git a/include/SIMDCompressionAndIntersection/integratedbitpacking.h b/include/SIMDCompressionAndIntersection/integratedbitpacking.h new file mode 100644 index 0000000..bf18c3b --- /dev/null +++ b/include/SIMDCompressionAndIntersection/integratedbitpacking.h @@ -0,0 +1,216 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +#ifndef SIMDCompressionAndIntersection_INTEGRATEDBITPACKING +#define SIMDCompressionAndIntersection_INTEGRATEDBITPACKING +#include +#include "platform.h" + +namespace SIMDCompressionLib { + +void __integratedfastunpack0(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack1(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack2(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack3(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack4(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack5(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack6(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack7(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack8(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack9(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack10(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack11(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack12(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack13(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack14(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack15(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack16(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack17(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack18(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack19(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack20(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack21(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack22(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack23(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack24(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack25(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack26(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack27(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack28(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack29(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack30(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack31(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastunpack32(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); + +void __integratedfastpack0(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack1(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack2(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack3(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack4(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack5(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack6(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack7(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack8(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack9(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack10(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack11(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack12(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack13(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack14(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack15(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack16(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack17(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack18(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack19(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack20(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack21(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack22(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack23(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack24(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack25(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack26(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack27(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack28(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack29(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack30(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack31(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); +void __integratedfastpack32(const uint32_t initoffset, + const uint32_t *__restrict__ in, + uint32_t *__restrict__ out); + +} // namespace SIMDCompressionLib + +#endif // SIMDCompressionAndIntersection_INTEGRATEDBITPACKING diff --git a/include/SIMDCompressionAndIntersection/intersection.h b/include/SIMDCompressionAndIntersection/intersection.h new file mode 100644 index 0000000..fb89d7b --- /dev/null +++ b/include/SIMDCompressionAndIntersection/intersection.h @@ -0,0 +1,108 @@ + + +#ifndef SIMDCompressionAndIntersection_INTERSECTION_H_ +#define SIMDCompressionAndIntersection_INTERSECTION_H_ + +#include + +namespace SIMDCompressionLib { + +using namespace std; +/* + * Given two arrays, this writes the intersection to out. Returns the + * cardinality of the intersection. + */ +typedef size_t (*intersectionfunction)(const uint32_t *set1, + const size_t length1, + const uint32_t *set2, + const size_t length2, uint32_t *out); + +/* + * Given two arrays, this writes the intersection to out. Returns the + * cardinality of the intersection. + * + * This is a mix of very fast vectorized intersection algorithms, several + * designed by N. Kurz, with adaptations by D. Lemire. + */ +size_t SIMDintersection(const uint32_t *set1, const size_t length1, + const uint32_t *set2, const size_t length2, + uint32_t *out); + +#ifdef __AVX2__ +#include + +/* + * Straight port of SIMDintersection to AVX2. + */ +size_t SIMDintersection_avx2(const uint32_t *set1, const size_t length1, + const uint32_t *set2, const size_t length2, + uint32_t *out); + +#endif +/* + * Given two arrays, this writes the intersection to out. Returns the + * cardinality of the intersection. + * + * This is a well-written, but otherwise unsophisticated function. + * Written by N. Kurz. + */ +size_t nate_scalar(const uint32_t *set1, const size_t length1, + const uint32_t *set2, const size_t length2, uint32_t *out); + +/* + * Given two arrays, this writes the intersection to out. Returns the + * cardinality of the intersection. + * + * This applies a state-of-the-art algorithm. First coded by O. Kaser, adapted + * by D. Lemire. + */ +size_t onesidedgallopingintersection(const uint32_t *smallset, + const size_t smalllength, + const uint32_t *largeset, + const size_t largelength, uint32_t *out); + +class IntersectionFactory { +public: + static std::map intersection_schemes; + + static vector allNames() { + vector ans; + for (auto i = intersection_schemes.begin(); i != intersection_schemes.end(); + ++i) { + ans.push_back(i->first); + } + return ans; + } + + static string getName(intersectionfunction v) { + for (auto i = intersection_schemes.begin(); i != intersection_schemes.end(); + ++i) { + if (i->second == v) + return i->first; + } + return "UNKNOWN"; + } + + static bool valid(string name) { + return (intersection_schemes.find(name) != intersection_schemes.end()); + } + + static intersectionfunction getFromName(string name) { + if (intersection_schemes.find(name) == intersection_schemes.end()) { + cerr << "name " << name << " does not refer to an intersection procedure." + << endl; + cerr << "possible choices:" << endl; + for (auto i = intersection_schemes.begin(); + i != intersection_schemes.end(); ++i) { + cerr << static_cast(i->first) + << endl; // useless cast, but just to be clear + } + return NULL; + } + return intersection_schemes[name]; + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_INTERSECTION_H_ */ diff --git a/include/SIMDCompressionAndIntersection/mersenne.h b/include/SIMDCompressionAndIntersection/mersenne.h new file mode 100644 index 0000000..f00dcb1 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/mersenne.h @@ -0,0 +1,93 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + */ + +#ifndef SIMDCompressionAndIntersection_MERSENNE_H_ +#define SIMDCompressionAndIntersection_MERSENNE_H_ + +#include "common.h" +#include "util.h" + +namespace SIMDCompressionLib { + +/** + * Mersenne twister - random number generator. + * Generate uniform distribution of 32 bit integers with the MT19937 algorithm. + * source: http://bannister.us/weblog/?s=Mersenne + */ +class ZRandom { + +public: + enum { N = 624, M = 397 }; + unsigned int MT[N + 1]; + unsigned int *map[N]; + int nValues; + + ZRandom(unsigned int iSeed = 20070102); + void seed(unsigned iSeed); + unsigned int getValue(); + unsigned int getValue(const uint32_t MaxValue); + double getDouble(); + bool test(const double p); +}; + +ZRandom::ZRandom(unsigned iSeed) : nValues(0) { seed(iSeed); } + +void ZRandom::seed(unsigned iSeed) { + nValues = 0; + // Seed the array used in random number generation. + MT[0] = iSeed; + for (int i = 1; i < N; ++i) { + MT[i] = 1 + (69069 * MT[i - 1]); + } + // Compute map once to avoid % in inner loop. + for (int i = 0; i < N; ++i) { + map[i] = MT + ((i + M) % N); + } +} + +inline bool ZRandom::test(const double p) { return getDouble() <= p; } +inline double ZRandom::getDouble() { + return double(getValue()) * (1.0 / 4294967296.0); +} + +unsigned int ZRandom::getValue(const uint32_t MaxValue) { + unsigned int used = MaxValue; + used |= used >> 1; + used |= used >> 2; + used |= used >> 4; + used |= used >> 8; + used |= used >> 16; + + // Draw numbers until one is found in [0,n] + unsigned int i; + do + i = getValue() & used; // toss unused bits to shorten search + while (i > MaxValue); + return i; +} + +unsigned int ZRandom::getValue() { + if (0 == nValues) { + MT[N] = MT[0]; + for (int i = 0; i < N; ++i) { + unsigned y = (0x80000000 & MT[i]) | (0x7FFFFFFF & MT[i + 1]); + unsigned v = *(map[i]) ^ (y >> 1); + if (1 & y) + v ^= 2567483615; + MT[i] = v; + } + nValues = N; + } + unsigned y = MT[N - nValues--]; + y ^= y >> 11; + y ^= static_cast((y << 7) & 2636928640); + y ^= static_cast((y << 15) & 4022730752); + y ^= y >> 18; + return y; +} + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_MERSENNE_H_ */ diff --git a/include/SIMDCompressionAndIntersection/platform.h b/include/SIMDCompressionAndIntersection/platform.h new file mode 100644 index 0000000..309aef0 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/platform.h @@ -0,0 +1,44 @@ +/* platform.h: Cross-platform macros and compatibility shims. */ + +#pragma once + +#include + +#if defined(_MSC_VER) +#define ALWAYS_INLINE __forceinline +#define CONST_FUNCTION +#define PURE_FUNCTION +#define __restrict__ __restrict +#define SIMDCOMP_ALIGNED(x) __declspec(align(x)) +#else +#if defined(__GNUC__) +#define ALWAYS_INLINE __attribute__((always_inline)) inline +#define CONST_FUNCTION __attribute__((const)) +#define PURE_FUNCTION __attribute__((pure)) +#define SIMDCOMP_ALIGNED(x) __attribute__((aligned(x))) +#endif +#endif + +#ifdef _MSC_VER +#include + +uint32_t __inline __builtin_clz(uint32_t value) { + unsigned long leading_zero = 0; + return _BitScanReverse(&leading_zero, value) == 0 ? 0 : (31 - leading_zero); +} + +uint32_t __inline __builtin_ctz(uint32_t value) { + unsigned long trailing_zero = 0; + return _BitScanForward(&trailing_zero, value) == 0 ? 32 : trailing_zero; +} + +uint32_t __inline __builtin_ctzl(uint64_t value) { +#ifdef _M_X64 + unsigned long trailing_zero = 0; + return _BitScanForward64(&trailing_zero, value) == 0 ? 64 : trailing_zero; +#else + return ((value & 0xFFFFFFFF) == 0) ? (__builtin_ctz(value >> 32) + 32) + : __builtin_ctz(value & 0xFFFFFFFF); +#endif +} +#endif diff --git a/include/SIMDCompressionAndIntersection/simdbinarypacking.h b/include/SIMDCompressionAndIntersection/simdbinarypacking.h new file mode 100644 index 0000000..879064b --- /dev/null +++ b/include/SIMDCompressionAndIntersection/simdbinarypacking.h @@ -0,0 +1,403 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +#ifndef SIMDCompressionAndIntersection_SIMDBINARYPACKING_H_ +#define SIMDCompressionAndIntersection_SIMDBINARYPACKING_H_ + +#include "codecs.h" +#include "simdbitpackinghelpers.h" +#include "util.h" + +namespace SIMDCompressionLib { + +extern "C" { + +/* searches "bit" 128-bit vectors from "in" (= 128 encoded integers) for the +* first encoded uint32 value + * which is >= |key|, and returns its position. It is assumed that the values + * stored are in sorted order. + * The encoded key is stored in "*presult". If no value is larger or equal to +* the key, +* 128 is returned */ +int simdsearchd1(__m128i *initOffset, const __m128i *in, uint32_t bit, + uint32_t key, uint32_t *presult); + +/** + * Simply scan, updating initOffset as it proceeds. + */ +void simdscand1(__m128i *initOffset, const __m128i *in, uint32_t bit); + +/* returns the value stored at the specified "slot". */ +uint32_t simdselectd1(__m128i *initOffset, const __m128i *in, uint32_t bit, + int slot); +} + +template struct SIMDBlockPacker { + typedef SIMDDeltaProcessor DeltaProcessor; + static void unpackblock(const uint32_t *in, uint32_t *out, const uint32_t bit, + __m128i &initoffset) { + if (ArrayDispatch) + ArrayDispatch::SIMDunpack(reinterpret_cast(in), out, + bit); + else + simdunpack(reinterpret_cast(in), out, bit); + if (bit < 32) { + initoffset = DeltaProcessor::runPrefixSum(initoffset, out); + } else { + initoffset = + MM_LOAD_SI_128(reinterpret_cast<__m128i *>(out + SIMDBlockSize - 4)); + } + } + + static uint32_t maxbits(const uint32_t *in, __m128i &initoffset) { + const __m128i *pin = reinterpret_cast(in); + __m128i newvec = MM_LOAD_SI_128(pin); + __m128i accumulator = DeltaHelper::Delta(newvec, initoffset); + __m128i oldvec = newvec; + for (uint32_t k = 1; 4 * k < SIMDBlockSize; ++k) { + newvec = MM_LOAD_SI_128(pin + k); + accumulator = + _mm_or_si128(accumulator, DeltaHelper::Delta(newvec, oldvec)); + oldvec = newvec; + } + initoffset = oldvec; + return maxbitas32int(accumulator); + } + + static void packblockwithoutmask(uint32_t *in, uint32_t *out, + const uint32_t bit, __m128i &initoffset) { + __m128i nextoffset = + MM_LOAD_SI_128(reinterpret_cast<__m128i *>(in + SIMDBlockSize - 4)); + if (bit < 32) + DeltaProcessor::runDelta(initoffset, in); + if (ArrayDispatch) + ArrayDispatch::SIMDpackwithoutmask(in, reinterpret_cast<__m128i *>(out), + bit); + else + simdpackwithoutmask(in, reinterpret_cast<__m128i *>(out), bit); + initoffset = nextoffset; + } + + static string name() { + if (ArrayDispatch) + return string("SIMDBlockPackerAD+") + DeltaHelper::name(); + else + return string("SIMDBlockPacker+") + DeltaHelper::name(); + } +}; + +template +struct SIMDIntegratedBlockPacker { + + static void unpackblock(const uint32_t *in, uint32_t *out, const uint32_t bit, + __m128i &initoffset) { + if (ArrayDispatch) + initoffset = IntegratedArrayDispatch::SIMDiunpack( + initoffset, reinterpret_cast(in), out, bit); + else + initoffset = SIMDiunpack( + initoffset, reinterpret_cast(in), out, bit); + } + + static uint32_t maxbits(const uint32_t *in, __m128i &initoffset) { + const __m128i *pin = reinterpret_cast(in); + __m128i newvec = MM_LOAD_SI_128(pin); + __m128i accumulator = DeltaHelper::Delta(newvec, initoffset); + __m128i oldvec = newvec; + for (uint32_t k = 1; 4 * k < SIMDBlockSize; ++k) { + newvec = MM_LOAD_SI_128(pin + k); + accumulator = + _mm_or_si128(accumulator, DeltaHelper::Delta(newvec, oldvec)); + oldvec = newvec; + } + initoffset = oldvec; + return maxbitas32int(accumulator); + } + + static void packblockwithoutmask(uint32_t *in, uint32_t *out, + const uint32_t bit, __m128i &initoffset) { + __m128i nextoffset = + MM_LOAD_SI_128(reinterpret_cast<__m128i *>(in + SIMDBlockSize - 4)); + if (ArrayDispatch) + IntegratedArrayDispatch::SIMDipackwithoutmask( + initoffset, in, reinterpret_cast<__m128i *>(out), bit); + else + SIMDipackwithoutmask(initoffset, in, + reinterpret_cast<__m128i *>(out), bit); + initoffset = nextoffset; + } + + static string name() { + if (ArrayDispatch) + return string("SIMDIntegratedBlockPackerAD+") + DeltaHelper::name(); + else + return string("SIMDIntegratedBlockPacker+") + DeltaHelper::name(); + } +}; + +/** + * + * + * Code data in miniblocks of 128 integers. + * To preserve alignment, we regroup + * 8 such miniblocks into a block of 8 * 128 = 1024 + * integers. + * + */ +template class SIMDBinaryPacking : public IntegerCODEC { +public: +#ifdef USE_ALIGNED + static const uint32_t CookiePadder = 123456; // just some made up number +#endif + static const uint32_t MiniBlockSize = 128; + static const uint32_t HowManyMiniBlocks = 16; + static const uint32_t BlockSize = + MiniBlockSize; // HowManyMiniBlocks * MiniBlockSize; + + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + checkifdivisibleby(length, BlockSize); + const uint32_t *const initout(out); +#ifdef USE_ALIGNED + if (needPaddingTo128Bits(out) or needPaddingTo128Bits(in)) + throw std::runtime_error( + "alignment issue: pointers should be aligned on 128-bit boundaries"); +#endif + *out++ = static_cast(length); +#ifdef USE_ALIGNED + while (needPaddingTo128Bits(out)) + *out++ = CookiePadder; +#endif + uint32_t Bs[HowManyMiniBlocks]; + __m128i init = _mm_set1_epi32(0); + const uint32_t *const final = in + length; + for (; in + HowManyMiniBlocks * MiniBlockSize <= final; + in += HowManyMiniBlocks * MiniBlockSize) { + __m128i tmpinit = init; + for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) { + Bs[i] = BlockPacker::maxbits(in + i * MiniBlockSize, tmpinit); + } + *out++ = (Bs[0] << 24) | (Bs[1] << 16) | (Bs[2] << 8) | Bs[3]; + *out++ = (Bs[4] << 24) | (Bs[5] << 16) | (Bs[6] << 8) | Bs[7]; + *out++ = (Bs[8] << 24) | (Bs[9] << 16) | (Bs[10] << 8) | Bs[11]; + *out++ = (Bs[12] << 24) | (Bs[13] << 16) | (Bs[14] << 8) | Bs[15]; + for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) { + BlockPacker::packblockwithoutmask(in + i * MiniBlockSize, out, Bs[i], + init); + out += MiniBlockSize / 32 * Bs[i]; + } + } + if (in < final) { + const size_t howmany = (final - in) / MiniBlockSize; + __m128i tmpinit = init; + memset(&Bs[0], 0, HowManyMiniBlocks * sizeof(uint32_t)); + for (uint32_t i = 0; i < howmany; ++i) { + Bs[i] = BlockPacker::maxbits(in + i * MiniBlockSize, tmpinit); + } + *out++ = (Bs[0] << 24) | (Bs[1] << 16) | (Bs[2] << 8) | Bs[3]; + *out++ = (Bs[4] << 24) | (Bs[5] << 16) | (Bs[6] << 8) | Bs[7]; + *out++ = (Bs[8] << 24) | (Bs[9] << 16) | (Bs[10] << 8) | Bs[11]; + *out++ = (Bs[12] << 24) | (Bs[13] << 16) | (Bs[14] << 8) | Bs[15]; + for (uint32_t i = 0; i < howmany; ++i) { + BlockPacker::packblockwithoutmask(in + i * MiniBlockSize, out, Bs[i], + init); + out += MiniBlockSize / 32 * Bs[i]; + } + in += howmany * MiniBlockSize; + assert(in == final); + } + nvalue = out - initout; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t /*length*/, + uint32_t *out, size_t &nvalue) { +#ifdef USE_ALIGNED + if (needPaddingTo128Bits(out) or needPaddingTo128Bits(in)) + throw std::runtime_error( + "alignment issue: pointers should be aligned on 128-bit boundaries"); +#endif + const uint32_t actuallength = *in++; +#ifdef USE_ALIGNED + while (needPaddingTo128Bits(in)) { + if (in[0] != CookiePadder) + throw logic_error("SIMDBinaryPacking alignment issue."); + ++in; + } +#endif + const uint32_t *const initout(out); + uint32_t Bs[HowManyMiniBlocks]; + __m128i init = _mm_set1_epi32(0); + for (; out < initout + + actuallength / (HowManyMiniBlocks * MiniBlockSize) * + HowManyMiniBlocks * MiniBlockSize; + out += HowManyMiniBlocks * MiniBlockSize) { + for (uint32_t i = 0; i < 4; ++i, ++in) { + Bs[0 + 4 * i] = static_cast(in[0] >> 24); + Bs[1 + 4 * i] = static_cast(in[0] >> 16); + Bs[2 + 4 * i] = static_cast(in[0] >> 8); + Bs[3 + 4 * i] = static_cast(in[0]); + } + for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) { + BlockPacker::unpackblock(in, out + i * MiniBlockSize, Bs[i], init); + in += MiniBlockSize / 32 * Bs[i]; + } + } + + if (out < initout + actuallength) { + const size_t howmany = (initout + actuallength - out) / MiniBlockSize; + for (uint32_t i = 0; i < 4; ++i, ++in) { + Bs[0 + 4 * i] = static_cast(in[0] >> 24); + Bs[1 + 4 * i] = static_cast(in[0] >> 16); + Bs[2 + 4 * i] = static_cast(in[0] >> 8); + Bs[3 + 4 * i] = static_cast(in[0]); + } + for (uint32_t i = 0; i < howmany; ++i) { + BlockPacker::unpackblock(in, out + i * MiniBlockSize, Bs[i], init); + in += MiniBlockSize / 32 * Bs[i]; + } + out += howmany * MiniBlockSize; + assert(out == initout + actuallength); + } + nvalue = out - initout; + return in; + } + + // Returns a decompressed value in an encoded array + // could be greatly optimized in the non-differential coding case: currently + // just for delta coding + // WARNING: THIS IMPLEMENTATION WILL ONLY PROVIDE THE CORRECT RESULT + // WHEN USING REGULAR (D1) DIFFERENTIAL CODING. TODO: Generalize the + // support. TODO: Should check the type. + uint32_t select(uint32_t *in, size_t index) { +#ifdef USE_ALIGNED + if (needPaddingTo128Bits(in)) + throw std::runtime_error( + "alignment issue: pointers should be aligned on 128-bit boundaries"); +#endif + const uint32_t actuallength = *in++; +#ifdef USE_ALIGNED + while (needPaddingTo128Bits(in)) { + if (in[0] != CookiePadder) + throw logic_error("SIMDBinaryPacking alignment issue."); + ++in; + } +#endif + uint32_t Bs[HowManyMiniBlocks]; + __m128i init = _mm_set1_epi32(0); + size_t runningindex = 0; + for (; runningindex < actuallength / (HowManyMiniBlocks * MiniBlockSize) * + HowManyMiniBlocks * MiniBlockSize;) { + for (uint32_t i = 0; i < 4; ++i, ++in) { + Bs[0 + 4 * i] = static_cast(in[0] >> 24); + Bs[1 + 4 * i] = static_cast(in[0] >> 16); + Bs[2 + 4 * i] = static_cast(in[0] >> 8); + Bs[3 + 4 * i] = static_cast(in[0]); + } + for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) { + if (runningindex + 128 > index) { + return simdselectd1(&init, (const __m128i *)in, Bs[i], + static_cast(index - runningindex)); + } + simdscand1(&init, (const __m128i *)in, Bs[i]); + runningindex += MiniBlockSize; + in += MiniBlockSize / 32 * Bs[i]; + } + } + + if (runningindex < actuallength) { + const size_t howmany = (actuallength - runningindex) / MiniBlockSize; + for (uint32_t i = 0; i < 4; ++i, ++in) { + Bs[0 + 4 * i] = static_cast(in[0] >> 24); + Bs[1 + 4 * i] = static_cast(in[0] >> 16); + Bs[2 + 4 * i] = static_cast(in[0] >> 8); + Bs[3 + 4 * i] = static_cast(in[0]); + } + for (uint32_t i = 0; i < howmany; ++i) { + if (runningindex + 128 > index) { + return simdselectd1(&init, (const __m128i *)in, Bs[i], + static_cast(index - runningindex)); + } + simdscand1(&init, (const __m128i *)in, Bs[i]); + runningindex += MiniBlockSize; + in += MiniBlockSize / 32 * Bs[i]; + } + } + return static_cast(runningindex); + } + + // Performs a lower bound find in the encoded array. + // Returns the index + // WARNING: THIS IMPLEMENTATION WILL ONLY PROVIDE THE CORRECT RESULT + // WHEN USING REGULAR (D1) DIFFERENTIAL CODING. TODO: Generalize the + // support. TODO: Should check the type. + size_t findLowerBound(const uint32_t *in, const size_t /*length*/, + uint32_t key, uint32_t *presult) { +#ifdef USE_ALIGNED + if (needPaddingTo128Bits(in)) + throw std::runtime_error( + "alignment issue: pointers should be aligned on 128-bit boundaries"); +#endif + const uint32_t actuallength = *in++; +#ifdef USE_ALIGNED + while (needPaddingTo128Bits(in)) { + if (in[0] != CookiePadder) + throw logic_error("SIMDBinaryPacking alignment issue."); + ++in; + } +#endif + uint32_t Bs[HowManyMiniBlocks]; + __m128i init = _mm_set1_epi32(0); + size_t runningindex = 0; + for (; runningindex < actuallength / (HowManyMiniBlocks * MiniBlockSize) * + HowManyMiniBlocks * MiniBlockSize;) { + for (uint32_t i = 0; i < 4; ++i, ++in) { + Bs[0 + 4 * i] = static_cast(in[0] >> 24); + Bs[1 + 4 * i] = static_cast(in[0] >> 16); + Bs[2 + 4 * i] = static_cast(in[0] >> 8); + Bs[3 + 4 * i] = static_cast(in[0]); + } + for (uint32_t i = 0; i < HowManyMiniBlocks; ++i) { + size_t index = + simdsearchd1(&init, (const __m128i *)in, Bs[i], key, presult); + runningindex += index; + if (index < MiniBlockSize) + return runningindex; + in += MiniBlockSize / 32 * Bs[i]; + } + } + + if (runningindex < actuallength) { + const size_t howmany = (actuallength - runningindex) / MiniBlockSize; + for (uint32_t i = 0; i < 4; ++i, ++in) { + Bs[0 + 4 * i] = static_cast(in[0] >> 24); + Bs[1 + 4 * i] = static_cast(in[0] >> 16); + Bs[2 + 4 * i] = static_cast(in[0] >> 8); + Bs[3 + 4 * i] = static_cast(in[0]); + } + for (uint32_t i = 0; i < howmany; ++i) { + size_t index = + simdsearchd1(&init, (const __m128i *)in, Bs[i], key, presult); + runningindex += index; + if (index < MiniBlockSize) + return runningindex; + in += MiniBlockSize / 32 * Bs[i]; + } + } + return runningindex; + } + + string name() const { + ostringstream convert; + convert << "SIMDBinaryPacking" + << "With" << BlockPacker::name() << MiniBlockSize; + return convert.str(); + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_SIMDBINARYPACKING_H_ */ diff --git a/include/SIMDCompressionAndIntersection/simdbitpacking.h b/include/SIMDCompressionAndIntersection/simdbitpacking.h new file mode 100644 index 0000000..f3a4e3d --- /dev/null +++ b/include/SIMDCompressionAndIntersection/simdbitpacking.h @@ -0,0 +1,117 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire + */ +#ifndef SIMDCompressionAndIntersection_SIMDBITPACKING_H_ +#define SIMDCompressionAndIntersection_SIMDBITPACKING_H_ + +#include "common.h" + +namespace SIMDCompressionLib { + +void __SIMD_fastunpack1(const __m128i *, uint32_t *); +void __SIMD_fastunpack2(const __m128i *, uint32_t *); +void __SIMD_fastunpack3(const __m128i *, uint32_t *); +void __SIMD_fastunpack4(const __m128i *, uint32_t *); +void __SIMD_fastunpack5(const __m128i *, uint32_t *); +void __SIMD_fastunpack6(const __m128i *, uint32_t *); +void __SIMD_fastunpack7(const __m128i *, uint32_t *); +void __SIMD_fastunpack8(const __m128i *, uint32_t *); +void __SIMD_fastunpack9(const __m128i *, uint32_t *); +void __SIMD_fastunpack10(const __m128i *, uint32_t *); +void __SIMD_fastunpack11(const __m128i *, uint32_t *); +void __SIMD_fastunpack12(const __m128i *, uint32_t *); +void __SIMD_fastunpack13(const __m128i *, uint32_t *); +void __SIMD_fastunpack14(const __m128i *, uint32_t *); +void __SIMD_fastunpack15(const __m128i *, uint32_t *); +void __SIMD_fastunpack16(const __m128i *, uint32_t *); +void __SIMD_fastunpack17(const __m128i *, uint32_t *); +void __SIMD_fastunpack18(const __m128i *, uint32_t *); +void __SIMD_fastunpack19(const __m128i *, uint32_t *); +void __SIMD_fastunpack20(const __m128i *, uint32_t *); +void __SIMD_fastunpack21(const __m128i *, uint32_t *); +void __SIMD_fastunpack22(const __m128i *, uint32_t *); +void __SIMD_fastunpack23(const __m128i *, uint32_t *); +void __SIMD_fastunpack24(const __m128i *, uint32_t *); +void __SIMD_fastunpack25(const __m128i *, uint32_t *); +void __SIMD_fastunpack26(const __m128i *, uint32_t *); +void __SIMD_fastunpack27(const __m128i *, uint32_t *); +void __SIMD_fastunpack28(const __m128i *, uint32_t *); +void __SIMD_fastunpack29(const __m128i *, uint32_t *); +void __SIMD_fastunpack30(const __m128i *, uint32_t *); +void __SIMD_fastunpack31(const __m128i *, uint32_t *); +void __SIMD_fastunpack32(const __m128i *, uint32_t *); + +void __SIMD_fastpackwithoutmask0(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask1(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask2(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask3(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask4(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask5(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask6(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask7(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask8(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask9(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask10(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask11(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask12(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask13(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask14(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask15(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask16(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask17(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask18(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask19(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask20(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask21(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask22(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask23(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask24(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask25(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask26(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask27(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask28(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask29(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask30(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask31(const uint32_t *, __m128i *); +void __SIMD_fastpackwithoutmask32(const uint32_t *, __m128i *); + +void __SIMD_fastpack0(const uint32_t *, __m128i *); +void __SIMD_fastpack1(const uint32_t *, __m128i *); +void __SIMD_fastpack2(const uint32_t *, __m128i *); +void __SIMD_fastpack3(const uint32_t *, __m128i *); +void __SIMD_fastpack4(const uint32_t *, __m128i *); +void __SIMD_fastpack5(const uint32_t *, __m128i *); +void __SIMD_fastpack6(const uint32_t *, __m128i *); +void __SIMD_fastpack7(const uint32_t *, __m128i *); +void __SIMD_fastpack8(const uint32_t *, __m128i *); +void __SIMD_fastpack9(const uint32_t *, __m128i *); +void __SIMD_fastpack10(const uint32_t *, __m128i *); +void __SIMD_fastpack11(const uint32_t *, __m128i *); +void __SIMD_fastpack12(const uint32_t *, __m128i *); +void __SIMD_fastpack13(const uint32_t *, __m128i *); +void __SIMD_fastpack14(const uint32_t *, __m128i *); +void __SIMD_fastpack15(const uint32_t *, __m128i *); +void __SIMD_fastpack16(const uint32_t *, __m128i *); +void __SIMD_fastpack17(const uint32_t *, __m128i *); +void __SIMD_fastpack18(const uint32_t *, __m128i *); +void __SIMD_fastpack19(const uint32_t *, __m128i *); +void __SIMD_fastpack20(const uint32_t *, __m128i *); +void __SIMD_fastpack21(const uint32_t *, __m128i *); +void __SIMD_fastpack22(const uint32_t *, __m128i *); +void __SIMD_fastpack23(const uint32_t *, __m128i *); +void __SIMD_fastpack24(const uint32_t *, __m128i *); +void __SIMD_fastpack25(const uint32_t *, __m128i *); +void __SIMD_fastpack26(const uint32_t *, __m128i *); +void __SIMD_fastpack27(const uint32_t *, __m128i *); +void __SIMD_fastpack28(const uint32_t *, __m128i *); +void __SIMD_fastpack29(const uint32_t *, __m128i *); +void __SIMD_fastpack30(const uint32_t *, __m128i *); +void __SIMD_fastpack31(const uint32_t *, __m128i *); +void __SIMD_fastpack32(const uint32_t *, __m128i *); + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_SIMDBITPACKING_H_ */ diff --git a/include/SIMDCompressionAndIntersection/simdbitpackinghelpers.h b/include/SIMDCompressionAndIntersection/simdbitpackinghelpers.h new file mode 100644 index 0000000..27c66fe --- /dev/null +++ b/include/SIMDCompressionAndIntersection/simdbitpackinghelpers.h @@ -0,0 +1,1156 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Leonid Boytsov, Nathan Kurz and Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_SIMD_BITPACKING_HELPERS_H_ +#define SIMDCompressionAndIntersection_SIMD_BITPACKING_HELPERS_H_ + +#include "common.h" +#include "simdbitpacking.h" +#include "usimdbitpacking.h" +#include "simdintegratedbitpacking.h" +#include "delta.h" +#include "util.h" + +namespace SIMDCompressionLib { + +const size_t SIMDBlockSize = 128; + +void SIMD_nullunpacker32(const __m128i *__restrict__, + uint32_t *__restrict__ out) { + memset(out, 0, 32 * 4 * 4); +} +void uSIMD_nullunpacker32(const __m128i *__restrict__, + uint32_t *__restrict__ out) { + memset(out, 0, 32 * 4 * 4); +} + +void simdunpack(const __m128i *__restrict__ in, uint32_t *__restrict__ out, + const uint32_t bit) { + switch (bit) { + case 0: + SIMD_nullunpacker32(in, out); + return; + + case 1: + __SIMD_fastunpack1(in, out); + return; + + case 2: + __SIMD_fastunpack2(in, out); + return; + + case 3: + __SIMD_fastunpack3(in, out); + return; + + case 4: + __SIMD_fastunpack4(in, out); + return; + + case 5: + __SIMD_fastunpack5(in, out); + return; + + case 6: + __SIMD_fastunpack6(in, out); + return; + + case 7: + __SIMD_fastunpack7(in, out); + return; + + case 8: + __SIMD_fastunpack8(in, out); + return; + + case 9: + __SIMD_fastunpack9(in, out); + return; + + case 10: + __SIMD_fastunpack10(in, out); + return; + + case 11: + __SIMD_fastunpack11(in, out); + return; + + case 12: + __SIMD_fastunpack12(in, out); + return; + + case 13: + __SIMD_fastunpack13(in, out); + return; + + case 14: + __SIMD_fastunpack14(in, out); + return; + + case 15: + __SIMD_fastunpack15(in, out); + return; + + case 16: + __SIMD_fastunpack16(in, out); + return; + + case 17: + __SIMD_fastunpack17(in, out); + return; + + case 18: + __SIMD_fastunpack18(in, out); + return; + + case 19: + __SIMD_fastunpack19(in, out); + return; + + case 20: + __SIMD_fastunpack20(in, out); + return; + + case 21: + __SIMD_fastunpack21(in, out); + return; + + case 22: + __SIMD_fastunpack22(in, out); + return; + + case 23: + __SIMD_fastunpack23(in, out); + return; + + case 24: + __SIMD_fastunpack24(in, out); + return; + + case 25: + __SIMD_fastunpack25(in, out); + return; + + case 26: + __SIMD_fastunpack26(in, out); + return; + + case 27: + __SIMD_fastunpack27(in, out); + return; + + case 28: + __SIMD_fastunpack28(in, out); + return; + + case 29: + __SIMD_fastunpack29(in, out); + return; + + case 30: + __SIMD_fastunpack30(in, out); + return; + + case 31: + __SIMD_fastunpack31(in, out); + return; + + case 32: + __SIMD_fastunpack32(in, out); + return; + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +/*assumes that integers fit in the prescribed number of bits*/ +void simdpackwithoutmask(const uint32_t *__restrict__ in, + __m128i *__restrict__ out, const uint32_t bit) { + switch (bit) { + case 0: + return; + + case 1: + __SIMD_fastpackwithoutmask1(in, out); + return; + + case 2: + __SIMD_fastpackwithoutmask2(in, out); + return; + + case 3: + __SIMD_fastpackwithoutmask3(in, out); + return; + + case 4: + __SIMD_fastpackwithoutmask4(in, out); + return; + + case 5: + __SIMD_fastpackwithoutmask5(in, out); + return; + + case 6: + __SIMD_fastpackwithoutmask6(in, out); + return; + + case 7: + __SIMD_fastpackwithoutmask7(in, out); + return; + + case 8: + __SIMD_fastpackwithoutmask8(in, out); + return; + + case 9: + __SIMD_fastpackwithoutmask9(in, out); + return; + + case 10: + __SIMD_fastpackwithoutmask10(in, out); + return; + + case 11: + __SIMD_fastpackwithoutmask11(in, out); + return; + + case 12: + __SIMD_fastpackwithoutmask12(in, out); + return; + + case 13: + __SIMD_fastpackwithoutmask13(in, out); + return; + + case 14: + __SIMD_fastpackwithoutmask14(in, out); + return; + + case 15: + __SIMD_fastpackwithoutmask15(in, out); + return; + + case 16: + __SIMD_fastpackwithoutmask16(in, out); + return; + + case 17: + __SIMD_fastpackwithoutmask17(in, out); + return; + + case 18: + __SIMD_fastpackwithoutmask18(in, out); + return; + + case 19: + __SIMD_fastpackwithoutmask19(in, out); + return; + + case 20: + __SIMD_fastpackwithoutmask20(in, out); + return; + + case 21: + __SIMD_fastpackwithoutmask21(in, out); + return; + + case 22: + __SIMD_fastpackwithoutmask22(in, out); + return; + + case 23: + __SIMD_fastpackwithoutmask23(in, out); + return; + + case 24: + __SIMD_fastpackwithoutmask24(in, out); + return; + + case 25: + __SIMD_fastpackwithoutmask25(in, out); + return; + + case 26: + __SIMD_fastpackwithoutmask26(in, out); + return; + + case 27: + __SIMD_fastpackwithoutmask27(in, out); + return; + + case 28: + __SIMD_fastpackwithoutmask28(in, out); + return; + + case 29: + __SIMD_fastpackwithoutmask29(in, out); + return; + + case 30: + __SIMD_fastpackwithoutmask30(in, out); + return; + + case 31: + __SIMD_fastpackwithoutmask31(in, out); + return; + + case 32: + __SIMD_fastpackwithoutmask32(in, out); + return; + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +/*assumes that integers fit in the prescribed number of bits*/ +void simdpack(const uint32_t *__restrict__ in, __m128i *__restrict__ out, + const uint32_t bit) { + switch (bit) { + case 0: + return; + + case 1: + __SIMD_fastpack1(in, out); + return; + + case 2: + __SIMD_fastpack2(in, out); + return; + + case 3: + __SIMD_fastpack3(in, out); + return; + + case 4: + __SIMD_fastpack4(in, out); + return; + + case 5: + __SIMD_fastpack5(in, out); + return; + + case 6: + __SIMD_fastpack6(in, out); + return; + + case 7: + __SIMD_fastpack7(in, out); + return; + + case 8: + __SIMD_fastpack8(in, out); + return; + + case 9: + __SIMD_fastpack9(in, out); + return; + + case 10: + __SIMD_fastpack10(in, out); + return; + + case 11: + __SIMD_fastpack11(in, out); + return; + + case 12: + __SIMD_fastpack12(in, out); + return; + + case 13: + __SIMD_fastpack13(in, out); + return; + + case 14: + __SIMD_fastpack14(in, out); + return; + + case 15: + __SIMD_fastpack15(in, out); + return; + + case 16: + __SIMD_fastpack16(in, out); + return; + + case 17: + __SIMD_fastpack17(in, out); + return; + + case 18: + __SIMD_fastpack18(in, out); + return; + + case 19: + __SIMD_fastpack19(in, out); + return; + + case 20: + __SIMD_fastpack20(in, out); + return; + + case 21: + __SIMD_fastpack21(in, out); + return; + + case 22: + __SIMD_fastpack22(in, out); + return; + + case 23: + __SIMD_fastpack23(in, out); + return; + + case 24: + __SIMD_fastpack24(in, out); + return; + + case 25: + __SIMD_fastpack25(in, out); + return; + + case 26: + __SIMD_fastpack26(in, out); + return; + + case 27: + __SIMD_fastpack27(in, out); + return; + + case 28: + __SIMD_fastpack28(in, out); + return; + + case 29: + __SIMD_fastpack29(in, out); + return; + + case 30: + __SIMD_fastpack30(in, out); + return; + + case 31: + __SIMD_fastpack31(in, out); + return; + + case 32: + __SIMD_fastpack32(in, out); + return; + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +void usimdunpack(const __m128i *__restrict__ in, uint32_t *__restrict__ out, + const uint32_t bit) { + switch (bit) { + case 0: + uSIMD_nullunpacker32(in, out); + return; + + case 1: + __uSIMD_fastunpack1(in, out); + return; + + case 2: + __uSIMD_fastunpack2(in, out); + return; + + case 3: + __uSIMD_fastunpack3(in, out); + return; + + case 4: + __uSIMD_fastunpack4(in, out); + return; + + case 5: + __uSIMD_fastunpack5(in, out); + return; + + case 6: + __uSIMD_fastunpack6(in, out); + return; + + case 7: + __uSIMD_fastunpack7(in, out); + return; + + case 8: + __uSIMD_fastunpack8(in, out); + return; + + case 9: + __uSIMD_fastunpack9(in, out); + return; + + case 10: + __uSIMD_fastunpack10(in, out); + return; + + case 11: + __uSIMD_fastunpack11(in, out); + return; + + case 12: + __uSIMD_fastunpack12(in, out); + return; + + case 13: + __uSIMD_fastunpack13(in, out); + return; + + case 14: + __uSIMD_fastunpack14(in, out); + return; + + case 15: + __uSIMD_fastunpack15(in, out); + return; + + case 16: + __uSIMD_fastunpack16(in, out); + return; + + case 17: + __uSIMD_fastunpack17(in, out); + return; + + case 18: + __uSIMD_fastunpack18(in, out); + return; + + case 19: + __uSIMD_fastunpack19(in, out); + return; + + case 20: + __uSIMD_fastunpack20(in, out); + return; + + case 21: + __uSIMD_fastunpack21(in, out); + return; + + case 22: + __uSIMD_fastunpack22(in, out); + return; + + case 23: + __uSIMD_fastunpack23(in, out); + return; + + case 24: + __uSIMD_fastunpack24(in, out); + return; + + case 25: + __uSIMD_fastunpack25(in, out); + return; + + case 26: + __uSIMD_fastunpack26(in, out); + return; + + case 27: + __uSIMD_fastunpack27(in, out); + return; + + case 28: + __uSIMD_fastunpack28(in, out); + return; + + case 29: + __uSIMD_fastunpack29(in, out); + return; + + case 30: + __uSIMD_fastunpack30(in, out); + return; + + case 31: + __uSIMD_fastunpack31(in, out); + return; + + case 32: + __uSIMD_fastunpack32(in, out); + return; + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +/*assumes that integers fit in the prescribed number of bits*/ +void usimdpackwithoutmask(const uint32_t *__restrict__ in, + __m128i *__restrict__ out, const uint32_t bit) { + switch (bit) { + case 0: + return; + + case 1: + __uSIMD_fastpackwithoutmask1(in, out); + return; + + case 2: + __uSIMD_fastpackwithoutmask2(in, out); + return; + + case 3: + __uSIMD_fastpackwithoutmask3(in, out); + return; + + case 4: + __uSIMD_fastpackwithoutmask4(in, out); + return; + + case 5: + __uSIMD_fastpackwithoutmask5(in, out); + return; + + case 6: + __uSIMD_fastpackwithoutmask6(in, out); + return; + + case 7: + __uSIMD_fastpackwithoutmask7(in, out); + return; + + case 8: + __uSIMD_fastpackwithoutmask8(in, out); + return; + + case 9: + __uSIMD_fastpackwithoutmask9(in, out); + return; + + case 10: + __uSIMD_fastpackwithoutmask10(in, out); + return; + + case 11: + __uSIMD_fastpackwithoutmask11(in, out); + return; + + case 12: + __uSIMD_fastpackwithoutmask12(in, out); + return; + + case 13: + __uSIMD_fastpackwithoutmask13(in, out); + return; + + case 14: + __uSIMD_fastpackwithoutmask14(in, out); + return; + + case 15: + __uSIMD_fastpackwithoutmask15(in, out); + return; + + case 16: + __uSIMD_fastpackwithoutmask16(in, out); + return; + + case 17: + __uSIMD_fastpackwithoutmask17(in, out); + return; + + case 18: + __uSIMD_fastpackwithoutmask18(in, out); + return; + + case 19: + __uSIMD_fastpackwithoutmask19(in, out); + return; + + case 20: + __uSIMD_fastpackwithoutmask20(in, out); + return; + + case 21: + __uSIMD_fastpackwithoutmask21(in, out); + return; + + case 22: + __uSIMD_fastpackwithoutmask22(in, out); + return; + + case 23: + __uSIMD_fastpackwithoutmask23(in, out); + return; + + case 24: + __uSIMD_fastpackwithoutmask24(in, out); + return; + + case 25: + __uSIMD_fastpackwithoutmask25(in, out); + return; + + case 26: + __uSIMD_fastpackwithoutmask26(in, out); + return; + + case 27: + __uSIMD_fastpackwithoutmask27(in, out); + return; + + case 28: + __uSIMD_fastpackwithoutmask28(in, out); + return; + + case 29: + __uSIMD_fastpackwithoutmask29(in, out); + return; + + case 30: + __uSIMD_fastpackwithoutmask30(in, out); + return; + + case 31: + __uSIMD_fastpackwithoutmask31(in, out); + return; + + case 32: + __uSIMD_fastpackwithoutmask32(in, out); + return; + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +void usimdpack(const uint32_t *__restrict__ in, __m128i *__restrict__ out, + const uint32_t bit) { + switch (bit) { + case 0: + return; + + case 1: + __uSIMD_fastpack1(in, out); + return; + + case 2: + __uSIMD_fastpack2(in, out); + return; + + case 3: + __uSIMD_fastpack3(in, out); + return; + + case 4: + __uSIMD_fastpack4(in, out); + return; + + case 5: + __uSIMD_fastpack5(in, out); + return; + + case 6: + __uSIMD_fastpack6(in, out); + return; + + case 7: + __uSIMD_fastpack7(in, out); + return; + + case 8: + __uSIMD_fastpack8(in, out); + return; + + case 9: + __uSIMD_fastpack9(in, out); + return; + + case 10: + __uSIMD_fastpack10(in, out); + return; + + case 11: + __uSIMD_fastpack11(in, out); + return; + + case 12: + __uSIMD_fastpack12(in, out); + return; + + case 13: + __uSIMD_fastpack13(in, out); + return; + + case 14: + __uSIMD_fastpack14(in, out); + return; + + case 15: + __uSIMD_fastpack15(in, out); + return; + + case 16: + __uSIMD_fastpack16(in, out); + return; + + case 17: + __uSIMD_fastpack17(in, out); + return; + + case 18: + __uSIMD_fastpack18(in, out); + return; + + case 19: + __uSIMD_fastpack19(in, out); + return; + + case 20: + __uSIMD_fastpack20(in, out); + return; + + case 21: + __uSIMD_fastpack21(in, out); + return; + + case 22: + __uSIMD_fastpack22(in, out); + return; + + case 23: + __uSIMD_fastpack23(in, out); + return; + + case 24: + __uSIMD_fastpack24(in, out); + return; + + case 25: + __uSIMD_fastpack25(in, out); + return; + + case 26: + __uSIMD_fastpack26(in, out); + return; + + case 27: + __uSIMD_fastpack27(in, out); + return; + + case 28: + __uSIMD_fastpack28(in, out); + return; + + case 29: + __uSIMD_fastpack29(in, out); + return; + + case 30: + __uSIMD_fastpack30(in, out); + return; + + case 31: + __uSIMD_fastpack31(in, out); + return; + + case 32: + __uSIMD_fastpack32(in, out); + return; + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +namespace ArrayDispatch { +typedef void (*unpackingfunction)(const __m128i *, uint32_t *); +typedef void (*packingfunction)(const uint32_t *, __m128i *); + +constexpr unpackingfunction unpack[33] = { + SIMD_nullunpacker32, __SIMD_fastunpack1, __SIMD_fastunpack2, + __SIMD_fastunpack3, __SIMD_fastunpack4, __SIMD_fastunpack5, + __SIMD_fastunpack6, __SIMD_fastunpack7, __SIMD_fastunpack8, + __SIMD_fastunpack9, __SIMD_fastunpack10, __SIMD_fastunpack11, + __SIMD_fastunpack12, __SIMD_fastunpack13, __SIMD_fastunpack14, + __SIMD_fastunpack15, __SIMD_fastunpack16, __SIMD_fastunpack17, + __SIMD_fastunpack18, __SIMD_fastunpack19, __SIMD_fastunpack20, + __SIMD_fastunpack21, __SIMD_fastunpack22, __SIMD_fastunpack23, + __SIMD_fastunpack24, __SIMD_fastunpack25, __SIMD_fastunpack26, + __SIMD_fastunpack27, __SIMD_fastunpack28, __SIMD_fastunpack29, + __SIMD_fastunpack30, __SIMD_fastunpack31, __SIMD_fastunpack32}; + +ALWAYS_INLINE +void SIMDunpack(const __m128i *__restrict__ in, uint32_t *__restrict__ out, + const uint32_t bit) { + return unpack[bit](in, out); +} + +constexpr packingfunction packwithoutmask[33] = { + __SIMD_fastpackwithoutmask0, __SIMD_fastpackwithoutmask1, + __SIMD_fastpackwithoutmask2, __SIMD_fastpackwithoutmask3, + __SIMD_fastpackwithoutmask4, __SIMD_fastpackwithoutmask5, + __SIMD_fastpackwithoutmask6, __SIMD_fastpackwithoutmask7, + __SIMD_fastpackwithoutmask8, __SIMD_fastpackwithoutmask9, + __SIMD_fastpackwithoutmask10, __SIMD_fastpackwithoutmask11, + __SIMD_fastpackwithoutmask12, __SIMD_fastpackwithoutmask13, + __SIMD_fastpackwithoutmask14, __SIMD_fastpackwithoutmask15, + __SIMD_fastpackwithoutmask16, __SIMD_fastpackwithoutmask17, + __SIMD_fastpackwithoutmask18, __SIMD_fastpackwithoutmask19, + __SIMD_fastpackwithoutmask20, __SIMD_fastpackwithoutmask21, + __SIMD_fastpackwithoutmask22, __SIMD_fastpackwithoutmask23, + __SIMD_fastpackwithoutmask24, __SIMD_fastpackwithoutmask25, + __SIMD_fastpackwithoutmask26, __SIMD_fastpackwithoutmask27, + __SIMD_fastpackwithoutmask28, __SIMD_fastpackwithoutmask29, + __SIMD_fastpackwithoutmask30, __SIMD_fastpackwithoutmask31, + __SIMD_fastpackwithoutmask32}; + +ALWAYS_INLINE +void SIMDpackwithoutmask(const uint32_t *__restrict__ in, + __m128i *__restrict__ out, const uint32_t bit) { + packwithoutmask[bit](in, out); +} +constexpr packingfunction pack[33] = { + __SIMD_fastpack0, __SIMD_fastpack1, __SIMD_fastpack2, __SIMD_fastpack3, + __SIMD_fastpack4, __SIMD_fastpack5, __SIMD_fastpack6, __SIMD_fastpack7, + __SIMD_fastpack8, __SIMD_fastpack9, __SIMD_fastpack10, __SIMD_fastpack11, + __SIMD_fastpack12, __SIMD_fastpack13, __SIMD_fastpack14, __SIMD_fastpack15, + __SIMD_fastpack16, __SIMD_fastpack17, __SIMD_fastpack18, __SIMD_fastpack19, + __SIMD_fastpack20, __SIMD_fastpack21, __SIMD_fastpack22, __SIMD_fastpack23, + __SIMD_fastpack24, __SIMD_fastpack25, __SIMD_fastpack26, __SIMD_fastpack27, + __SIMD_fastpack28, __SIMD_fastpack29, __SIMD_fastpack30, __SIMD_fastpack31, + __SIMD_fastpack32}; + +ALWAYS_INLINE +void SIMDpack(const uint32_t *__restrict__ in, __m128i *__restrict__ out, + const uint32_t bit) { + pack[bit](in, out); +} + +constexpr unpackingfunction Uunpack[33] = { + uSIMD_nullunpacker32, __uSIMD_fastunpack1, __uSIMD_fastunpack2, + __uSIMD_fastunpack3, __uSIMD_fastunpack4, __uSIMD_fastunpack5, + __uSIMD_fastunpack6, __uSIMD_fastunpack7, __uSIMD_fastunpack8, + __uSIMD_fastunpack9, __uSIMD_fastunpack10, __uSIMD_fastunpack11, + __uSIMD_fastunpack12, __uSIMD_fastunpack13, __uSIMD_fastunpack14, + __uSIMD_fastunpack15, __uSIMD_fastunpack16, __uSIMD_fastunpack17, + __uSIMD_fastunpack18, __uSIMD_fastunpack19, __uSIMD_fastunpack20, + __uSIMD_fastunpack21, __uSIMD_fastunpack22, __uSIMD_fastunpack23, + __uSIMD_fastunpack24, __uSIMD_fastunpack25, __uSIMD_fastunpack26, + __uSIMD_fastunpack27, __uSIMD_fastunpack28, __uSIMD_fastunpack29, + __uSIMD_fastunpack30, __uSIMD_fastunpack31, __uSIMD_fastunpack32}; + +ALWAYS_INLINE +void uSIMDunpack(const __m128i *__restrict__ in, uint32_t *__restrict__ out, + const uint32_t bit) { + return Uunpack[bit](in, out); +} + +constexpr packingfunction Upackwithoutmask[33] = { + __uSIMD_fastpackwithoutmask0, __uSIMD_fastpackwithoutmask1, + __uSIMD_fastpackwithoutmask2, __uSIMD_fastpackwithoutmask3, + __uSIMD_fastpackwithoutmask4, __uSIMD_fastpackwithoutmask5, + __uSIMD_fastpackwithoutmask6, __uSIMD_fastpackwithoutmask7, + __uSIMD_fastpackwithoutmask8, __uSIMD_fastpackwithoutmask9, + __uSIMD_fastpackwithoutmask10, __uSIMD_fastpackwithoutmask11, + __uSIMD_fastpackwithoutmask12, __uSIMD_fastpackwithoutmask13, + __uSIMD_fastpackwithoutmask14, __uSIMD_fastpackwithoutmask15, + __uSIMD_fastpackwithoutmask16, __uSIMD_fastpackwithoutmask17, + __uSIMD_fastpackwithoutmask18, __uSIMD_fastpackwithoutmask19, + __uSIMD_fastpackwithoutmask20, __uSIMD_fastpackwithoutmask21, + __uSIMD_fastpackwithoutmask22, __uSIMD_fastpackwithoutmask23, + __uSIMD_fastpackwithoutmask24, __uSIMD_fastpackwithoutmask25, + __uSIMD_fastpackwithoutmask26, __uSIMD_fastpackwithoutmask27, + __uSIMD_fastpackwithoutmask28, __uSIMD_fastpackwithoutmask29, + __uSIMD_fastpackwithoutmask30, __uSIMD_fastpackwithoutmask31, + __uSIMD_fastpackwithoutmask32}; + +ALWAYS_INLINE +void uSIMDpackwithoutmask(const uint32_t *__restrict__ in, + __m128i *__restrict__ out, const uint32_t bit) { + Upackwithoutmask[bit](in, out); +} +constexpr packingfunction Upack[33] = { + __uSIMD_fastpack0, __uSIMD_fastpack1, __uSIMD_fastpack2, + __uSIMD_fastpack3, __uSIMD_fastpack4, __uSIMD_fastpack5, + __uSIMD_fastpack6, __uSIMD_fastpack7, __uSIMD_fastpack8, + __uSIMD_fastpack9, __uSIMD_fastpack10, __uSIMD_fastpack11, + __uSIMD_fastpack12, __uSIMD_fastpack13, __uSIMD_fastpack14, + __uSIMD_fastpack15, __uSIMD_fastpack16, __uSIMD_fastpack17, + __uSIMD_fastpack18, __uSIMD_fastpack19, __uSIMD_fastpack20, + __uSIMD_fastpack21, __uSIMD_fastpack22, __uSIMD_fastpack23, + __uSIMD_fastpack24, __uSIMD_fastpack25, __uSIMD_fastpack26, + __uSIMD_fastpack27, __uSIMD_fastpack28, __uSIMD_fastpack29, + __uSIMD_fastpack30, __uSIMD_fastpack31, __uSIMD_fastpack32}; + +ALWAYS_INLINE +void uSIMDpack(const uint32_t *__restrict__ in, __m128i *__restrict__ out, + const uint32_t bit) { + Upack[bit](in, out); +} +} + +template struct SIMDBitPackingHelpers { + + static void pack(uint32_t *in, const size_t Qty, uint32_t *out, + const uint32_t bit) { + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + if (SIMDBlockSize % 32) { + throw std::logic_error("Incorrect SIMDBlockSize."); + } + __m128i initoffset = _mm_set1_epi32(0); + + for (size_t k = 0; k < Qty / SIMDBlockSize; ++k) { + __m128i nextoffset = MM_LOAD_SI_128(reinterpret_cast<__m128i *>( + (in + k * SIMDBlockSize + SIMDBlockSize - 4))); + + if (bit < 32) + SIMDDeltaProcessor::runDelta( + initoffset, in + k * SIMDBlockSize); + simdpack(in + k * SIMDBlockSize, + reinterpret_cast<__m128i *>(out + SIMDBlockSize * k * bit / 32), + bit); + initoffset = nextoffset; + } + } + + static void unpack(const uint32_t *in, size_t Qty, uint32_t *out, + const uint32_t bit) { + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + __m128i initoffset = _mm_set1_epi32(0); + + for (size_t k = 0; k < Qty / SIMDBlockSize; ++k) { + simdunpack( + reinterpret_cast(in + SIMDBlockSize * k * bit / 32), + out + k * SIMDBlockSize, bit); + if (bit < 32) { + initoffset = + SIMDDeltaProcessor::runPrefixSum( + initoffset, out + k * SIMDBlockSize); + } + } + } + + static void packwithoutmask(uint32_t *in, const size_t Qty, uint32_t *out, + const uint32_t bit) { + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + __m128i initoffset = _mm_set1_epi32(0); + + for (size_t k = 0; k < Qty / SIMDBlockSize; ++k) { + __m128i nextoffset = MM_LOAD_SI_128(reinterpret_cast<__m128i *>( + (in + k * SIMDBlockSize + SIMDBlockSize - 4))); + if (bit < 32) + SIMDDeltaProcessor::runDelta( + initoffset, in + k * SIMDBlockSize); + simdpackwithoutmask( + in + k * SIMDBlockSize, + reinterpret_cast<__m128i *>(out + SIMDBlockSize * k * bit / 32), bit); + initoffset = nextoffset; + } + } + + static void ipack(const uint32_t *in, const size_t Qty, uint32_t *_out, + const uint32_t bit) { + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + __m128i *out = reinterpret_cast<__m128i *>(_out); + __m128i initoffset = _mm_set1_epi32(0U); + ; + + for (size_t k = 0; k < Qty / SIMDBlockSize; ++k) { + SIMDipack(initoffset, in + k * SIMDBlockSize, out + k * bit, + bit); + initoffset = MM_LOAD_SI_128(reinterpret_cast( + in + k * SIMDBlockSize + SIMDBlockSize - 4)); + // memcpy(&initoffset, (in+k*SIMDBlockSize+SIMDBlockSize - 4), sizeof + // initoffset);// Daniel: memcpy looks like a hack + } + } + + static void ipackwithoutmask(const uint32_t *in, const size_t Qty, + uint32_t *_out, const uint32_t bit) { + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + __m128i *out = reinterpret_cast<__m128i *>(_out); + __m128i initoffset = _mm_set1_epi32(0U); + ; + + for (size_t k = 0; k < Qty / SIMDBlockSize; ++k) { + SIMDipackwithoutmask(initoffset, in + k * SIMDBlockSize, + out + k * bit, bit); + initoffset = MM_LOAD_SI_128(reinterpret_cast( + in + k * SIMDBlockSize + SIMDBlockSize - 4)); + // memcpy(&initoffset, (in+k*SIMDBlockSize+SIMDBlockSize - 4), sizeof + // initoffset);// Daniel: memcpy looks like a hack + } + } + + static void iunpack(const uint32_t *_in, size_t Qty, uint32_t *out, + const uint32_t bit) { + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + const __m128i *in = reinterpret_cast(_in); + + __m128i initoffset = _mm_set1_epi32(0U); + ; + + for (size_t k = 0; k < Qty / SIMDBlockSize; ++k) { + initoffset = SIMDiunpack(initoffset, in + k * bit, + out + k * SIMDBlockSize, bit); + } + } + + // this is not expected to be useful, only for benchmarking + static void ipatchedunpack(const uint32_t *_in, size_t Qty, uint32_t *out, + const uint32_t bit) { + if (Qty % SIMDBlockSize) { + throw std::logic_error("Incorrect # of entries."); + } + const __m128i *in = reinterpret_cast(_in); + + __m128i initoffset = _mm_set1_epi32(0U); + ; + + for (size_t k = 0; k < Qty / SIMDBlockSize; ++k) { + initoffset = SIMDipatchedunpack( + initoffset, in + k * bit, out + k * SIMDBlockSize, + reinterpret_cast(out + k * SIMDBlockSize), bit); + } + } + + static void CheckMaxDiff(const std::vector &refdata, unsigned bit) { + for (size_t i = 4; i < refdata.size(); ++i) { + if (gccbits(refdata[i] - refdata[i - 4]) > bit) + throw std::runtime_error("bug"); + } + } +}; + +} // namespace SIMDCompressionLib + +#endif diff --git a/include/SIMDCompressionAndIntersection/simdfastpfor.h b/include/SIMDCompressionAndIntersection/simdfastpfor.h new file mode 100644 index 0000000..a0d5488 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/simdfastpfor.h @@ -0,0 +1,279 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ +#ifndef SIMDCompressionAndIntersection_SIMDFASTPFOR_H_ +#define SIMDCompressionAndIntersection_SIMDFASTPFOR_H_ + +#include "common.h" +#include "codecs.h" +#include "sortedbitpacking.h" +#include "simdbitpacking.h" +#include "util.h" +#include "delta.h" + +namespace SIMDCompressionLib { + +/** + * SIMDFastPFor + * + * Reference and documentation: + * + * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second + * through vectorization + * http://arxiv.org/abs/1209.2137 + * + * Note that this implementation is slightly improved compared to the version + * presented + * in the paper. + * + * Designed by D. Lemire with ideas from Leonid Boytsov. This scheme is NOT + * patented. + * + */ +template +class SIMDFastPFor : public IntegerCODEC { +public: + /** + * ps (page size) should be a multiple of BlockSize, any "large" + * value should do. + */ + SIMDFastPFor(uint32_t ps = 65536) + : PageSize(ps), bitsPageSize(gccbits(PageSize)), bpacker(), + bytescontainer(PageSize + 3 * PageSize / BlockSize) { + assert(ps / BlockSize * BlockSize == ps); + assert(gccbits(BlockSizeInUnitsOfPackSize * PACKSIZE - 1) <= 8); + } + enum { + PACKSIZE = 32, + overheadofeachexcept = 8, + overheadduetobits = 8, + overheadduetonmbrexcept = 8, + BlockSize = BlockSizeInUnitsOfPackSize * PACKSIZE + }; + + const uint32_t PageSize; + const uint32_t bitsPageSize; + SortedBitPacker bpacker; + vector bytescontainer; + + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { +#ifdef USE_ALIGNED + if (needPaddingTo128Bits(out) or needPaddingTo128Bits(in)) + throw std::runtime_error( + "alignment issue: pointers should be aligned on 128-bit boundaries"); +#endif + const uint32_t *const initin(in); + const size_t mynvalue = *in; + ++in; + if (mynvalue > nvalue) + throw NotEnoughStorage(mynvalue); + nvalue = mynvalue; + const uint32_t *const finalout(out + nvalue); + __m128i prev = _mm_set1_epi32(0); + while (out != finalout) { + size_t thisnvalue(0); + size_t thissize = static_cast( + finalout > PageSize + out ? PageSize : (finalout - out)); + + __decodeArray(in, thisnvalue, out, thissize, prev); + in += thisnvalue; + out += thissize; + } + assert(initin + length >= in); + bpacker.reset(); // if you don't do this, the codec has a "memory". + return in; + } + + /** + * If you save the output and recover it in memory, you are + * responsible to ensure that the alignment is preserved. + * + * The input size (length) should be a multiple of + * BlockSizeInUnitsOfPackSize * PACKSIZE. (This was done + * to simplify slightly the implementation.) + */ + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { +#ifdef USE_ALIGNED + if (needPaddingTo128Bits(out) or needPaddingTo128Bits(in)) + throw std::runtime_error( + "alignment issue: pointers should be aligned on 128-bit boundaries"); +#endif + checkifdivisibleby(length, BlockSize); + const uint32_t *const initout(out); + const uint32_t *const finalin(in + length); + + *out++ = static_cast(length); + const size_t oldnvalue = nvalue; + nvalue = 1; + __m128i prev = _mm_set1_epi32(0); + while (in != finalin) { + size_t thissize = static_cast( + finalin > PageSize + in ? PageSize : (finalin - in)); + size_t thisnvalue(0); + __encodeArray(in, thissize, out, thisnvalue, prev); + nvalue += thisnvalue; + out += thisnvalue; + in += thissize; + } + assert(out == nvalue + initout); + if (oldnvalue < nvalue) + std::cerr + << "It is possible we have a buffer overrun. You reported having allocated " + << oldnvalue * sizeof(uint32_t) + << " bytes for the compressed data but we needed " + << nvalue * sizeof(uint32_t) + << " bytes. Please increase the available memory" + " for compressed data or check the value of the last parameter provided " + " to the encodeArray method." << std::endl; + bpacker.reset(); // if you don't do this, the buffer has a memory + } + + void getBestBFromData(const uint32_t *in, uint8_t &bestb, + uint8_t &bestcexcept, uint8_t &maxb) { + uint32_t freqs[33]; + for (uint32_t k = 0; k <= 32; ++k) + freqs[k] = 0; + for (uint32_t k = 0; k < BlockSize; ++k) { + freqs[asmbits(in[k])]++; + } + bestb = 32; + while (freqs[bestb] == 0) + bestb--; + maxb = bestb; + uint32_t bestcost = bestb * BlockSize; + uint32_t cexcept = 0; + bestcexcept = static_cast(cexcept); + for (uint32_t b = bestb - 1; b < 32; --b) { + cexcept += freqs[b + 1]; + uint32_t thiscost = cexcept * overheadofeachexcept + + cexcept * (maxb - b) + b * BlockSize + + 8; // the extra 8 is the cost of storing maxbits + if (bestb - b == 1) + thiscost -= cexcept; + if (thiscost < bestcost) { + bestcost = thiscost; + bestb = static_cast(b); + bestcexcept = static_cast(cexcept); + } + } + } + + void + __encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue, + __m128i &prev) { // = _mm_set1_epi32 (0);// for delta + uint32_t *const initout = out; // keep track of this + checkifdivisibleby(length, BlockSize); + uint32_t *const headerout = out++; // keep track of this + bpacker.clear(); + uint8_t *bc = bytescontainer.data(); +#ifdef USE_ALIGNED + out = padTo128bits(out); + if (needPaddingTo128Bits(in)) + throw std::runtime_error("alignment bug"); +#endif + for (const uint32_t *const final = in + length; (in + BlockSize <= final); + in += BlockSize) { + uint8_t bestb, bestcexcept, maxb; + + const __m128i nextprev = + MM_LOAD_SI_128(reinterpret_cast(in + BlockSize - 4)); + SIMDDeltaProcessor::runDelta(prev, in); + prev = nextprev; + + getBestBFromData(in, bestb, bestcexcept, maxb); + *bc++ = bestb; + *bc++ = bestcexcept; + if (bestcexcept > 0) { + *bc++ = maxb; + bpacker.ensureCapacity(maxb - bestb - 1, bestcexcept); + const uint32_t maxval = 1U << bestb; + for (uint32_t k = 0; k < BlockSize; ++k) { + if (in[k] >= maxval) { + bpacker.directAppend(maxb - bestb - 1, in[k] >> bestb); + *bc++ = static_cast(k); + } + } + } + for (int k = 0; k < BlockSize; k += 128) { + simdpack(in + k, reinterpret_cast<__m128i *>(out), bestb); + out += 4 * bestb; + } + } + headerout[0] = static_cast(out - headerout); + const uint32_t bytescontainersize = + static_cast(bc - bytescontainer.data()); + *(out++) = bytescontainersize; + memcpy(out, bytescontainer.data(), bytescontainersize); + out += (bytescontainersize + sizeof(uint32_t) - 1) / sizeof(uint32_t); + const uint32_t *const lastout = bpacker.write(out); + nvalue = lastout - initout; + } + + void __decodeArray(const uint32_t *in, size_t &length, uint32_t *out, + const size_t nvalue, __m128i &prev) { + const uint32_t *const initin = in; + const uint32_t *const headerin = in++; + const uint32_t wheremeta = headerin[0]; + const uint32_t *inexcept = headerin + wheremeta; + const uint32_t bytesize = *inexcept++; + const uint8_t *bytep = reinterpret_cast(inexcept); + + inexcept += (bytesize + sizeof(uint32_t) - 1) / sizeof(uint32_t); + inexcept = bpacker.read(inexcept); + length = inexcept - initin; + const uint32_t *unpackpointers[32 + 1]; + for (uint32_t k = 1; k <= 32; ++k) { + unpackpointers[k] = bpacker.get(k - 1); + } +#ifdef USE_ALIGNED + in = padTo128bits(in); + assert(!needPaddingTo128Bits(out)); +#endif + for (uint32_t run = 0; run < nvalue / BlockSize; ++run, out += BlockSize) { + const uint8_t b = *bytep++; + const uint8_t cexcept = *bytep++; + for (int k = 0; k < BlockSize; k += 128) { + if (arraydispatch) + simdunpack(reinterpret_cast(in), out + k, b); + else + ArrayDispatch::SIMDunpack(reinterpret_cast(in), + out + k, b); + in += 4 * b; + } + if (cexcept > 0) { + const uint8_t maxbits = *bytep++; + if (maxbits - b == 1) { + for (uint32_t k = 0; k < cexcept; ++k) { + const uint8_t pos = *(bytep++); + out[pos] |= static_cast(1) << b; + } + } else { + const uint32_t *vals = unpackpointers[maxbits - b]; + unpackpointers[maxbits - b] += cexcept; + for (uint32_t k = 0; k < cexcept; ++k) { + const uint8_t pos = *(bytep++); + out[pos] |= vals[k] << b; + } + } + } + prev = + SIMDDeltaProcessor::runPrefixSum(prev, out); + } + + assert(in == headerin + wheremeta); + } + + string name() const { return string("SIMDFastPFor") + DeltaHelper::name(); } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_SIMDFASTPFOR_H_ */ diff --git a/include/SIMDCompressionAndIntersection/simdintegratedbitpacking.h b/include/SIMDCompressionAndIntersection/simdintegratedbitpacking.h new file mode 100644 index 0000000..9493fae --- /dev/null +++ b/include/SIMDCompressionAndIntersection/simdintegratedbitpacking.h @@ -0,0 +1,895 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Leonid Boytsov, Nathan Kurz and Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_SIMD_INTEGRATED_BITPACKING_H +#define SIMDCompressionAndIntersection_SIMD_INTEGRATED_BITPACKING_H + +/** + * To avoid crazy dependencies, this header should not + * include any other header beside delta.h. + */ +#include "deltatemplates.h" + +namespace SIMDCompressionLib { + +template +__m128i iunpack0(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack0(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack0(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask0(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack1(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack1(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack1(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask1(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack2(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack2(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack2(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask2(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack3(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack3(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack3(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask3(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack4(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack4(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack4(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask4(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack5(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack5(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack5(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask5(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack6(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack6(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack6(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask6(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack7(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack7(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack7(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask7(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack8(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack8(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack8(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask8(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack9(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack9(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack9(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask9(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack10(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack10(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack10(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask10(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack11(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack11(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack11(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask11(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack12(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack12(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack12(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask12(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack13(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack13(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack13(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask13(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack14(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack14(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack14(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask14(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack15(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack15(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack15(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask15(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack16(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack16(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack16(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask16(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack17(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack17(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack17(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask17(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack18(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack18(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack18(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask18(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack19(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack19(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack19(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask19(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack20(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack20(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack20(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask20(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack21(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack21(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack21(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask21(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack22(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack22(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack22(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask22(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack23(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack23(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack23(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask23(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack24(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack24(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack24(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask24(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack25(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack25(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack25(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask25(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack26(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack26(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack26(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask26(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack27(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack27(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack27(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask27(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack28(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack28(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack28(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask28(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack29(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack29(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack29(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask29(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack30(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack30(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack30(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask30(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack31(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack31(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack31(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask31(__m128i, const uint32_t *, __m128i *); + +template +__m128i iunpack32(__m128i, const __m128i *, uint32_t *); +template +__m128i ipatchedunpack32(__m128i, const __m128i *, uint32_t *, const __m128i *); +template void ipack32(__m128i, const uint32_t *, __m128i *); +template +void ipackwithoutmask32(__m128i, const uint32_t *, __m128i *); + +typedef __m128i (*integratedunpackingfunction)(__m128i, const __m128i *, + uint32_t *); +typedef __m128i (*integratedpatchedunpackingfunction)(__m128i, const __m128i *, + uint32_t *, + const __m128i *); + +typedef void (*integratedpackingfunction)(__m128i, const uint32_t *, __m128i *); + +template struct IntegratedArrayDispatch { + static integratedunpackingfunction unpack[33]; + + static inline __m128i SIMDiunpack(__m128i initOffset, const __m128i *in, + uint32_t *out, const uint32_t bit) { + return unpack[bit](initOffset, in, out); + } + static integratedpatchedunpackingfunction patchedunpack[33]; + + static inline __m128i SIMDipatchedunpack(__m128i initOffset, + const __m128i *in, uint32_t *out, + const __m128i *patchedbuffer, + const uint32_t bit) { + return patchedunpack[bit](initOffset, in, out, patchedbuffer); + } + static integratedpackingfunction packwithoutmask[33]; + + static inline void SIMDipackwithoutmask(__m128i initOffset, + const uint32_t *in, __m128i *out, + const uint32_t bit) { + packwithoutmask[bit](initOffset, in, out); + } + static integratedpackingfunction pack[33]; + + static inline void SIMDipack(__m128i initOffset, const uint32_t *in, + __m128i *out, const uint32_t bit) { + pack[bit](initOffset, in, out); + } +}; + +template +integratedunpackingfunction IntegratedArrayDispatch::unpack[33] = { + iunpack0, iunpack1, iunpack2, + iunpack3, iunpack4, iunpack5, + iunpack6, iunpack7, iunpack8, + iunpack9, iunpack10, iunpack11, + iunpack12, iunpack13, iunpack14, + iunpack15, iunpack16, iunpack17, + iunpack18, iunpack19, iunpack20, + iunpack21, iunpack22, iunpack23, + iunpack24, iunpack25, iunpack26, + iunpack27, iunpack28, iunpack29, + iunpack30, iunpack31, iunpack32}; + +template +integratedpatchedunpackingfunction + IntegratedArrayDispatch::patchedunpack[33] = { + ipatchedunpack0, ipatchedunpack1, + ipatchedunpack2, ipatchedunpack3, + ipatchedunpack4, ipatchedunpack5, + ipatchedunpack6, ipatchedunpack7, + ipatchedunpack8, ipatchedunpack9, + ipatchedunpack10, ipatchedunpack11, + ipatchedunpack12, ipatchedunpack13, + ipatchedunpack14, ipatchedunpack15, + ipatchedunpack16, ipatchedunpack17, + ipatchedunpack18, ipatchedunpack19, + ipatchedunpack20, ipatchedunpack21, + ipatchedunpack22, ipatchedunpack23, + ipatchedunpack24, ipatchedunpack25, + ipatchedunpack26, ipatchedunpack27, + ipatchedunpack28, ipatchedunpack29, + ipatchedunpack30, ipatchedunpack31, + ipatchedunpack32}; + +template +integratedpackingfunction + IntegratedArrayDispatch::packwithoutmask[33] = { + ipackwithoutmask0, ipackwithoutmask1, + ipackwithoutmask2, ipackwithoutmask3, + ipackwithoutmask4, ipackwithoutmask5, + ipackwithoutmask6, ipackwithoutmask7, + ipackwithoutmask8, ipackwithoutmask9, + ipackwithoutmask10, ipackwithoutmask11, + ipackwithoutmask12, ipackwithoutmask13, + ipackwithoutmask14, ipackwithoutmask15, + ipackwithoutmask16, ipackwithoutmask17, + ipackwithoutmask18, ipackwithoutmask19, + ipackwithoutmask20, ipackwithoutmask21, + ipackwithoutmask22, ipackwithoutmask23, + ipackwithoutmask24, ipackwithoutmask25, + ipackwithoutmask26, ipackwithoutmask27, + ipackwithoutmask28, ipackwithoutmask29, + ipackwithoutmask30, ipackwithoutmask31, + ipackwithoutmask32}; + +template +integratedpackingfunction IntegratedArrayDispatch::pack[33] = { + ipack0, ipack1, ipack2, + ipack3, ipack4, ipack5, + ipack6, ipack7, ipack8, + ipack9, ipack10, ipack11, + ipack12, ipack13, ipack14, + ipack15, ipack16, ipack17, + ipack18, ipack19, ipack20, + ipack21, ipack22, ipack23, + ipack24, ipack25, ipack26, + ipack27, ipack28, ipack29, + ipack30, ipack31, ipack32}; + +template +inline __m128i SIMDiunpack(__m128i initOffset, const __m128i *in, uint32_t *out, + const uint32_t bit) { + switch (bit) { + case 0: + return iunpack0(initOffset, in, out); + + case 1: + return iunpack1(initOffset, in, out); + + case 2: + return iunpack2(initOffset, in, out); + + case 3: + return iunpack3(initOffset, in, out); + + case 4: + return iunpack4(initOffset, in, out); + + case 5: + return iunpack5(initOffset, in, out); + + case 6: + return iunpack6(initOffset, in, out); + + case 7: + return iunpack7(initOffset, in, out); + + case 8: + return iunpack8(initOffset, in, out); + + case 9: + return iunpack9(initOffset, in, out); + + case 10: + return iunpack10(initOffset, in, out); + + case 11: + return iunpack11(initOffset, in, out); + + case 12: + return iunpack12(initOffset, in, out); + + case 13: + return iunpack13(initOffset, in, out); + + case 14: + return iunpack14(initOffset, in, out); + + case 15: + return iunpack15(initOffset, in, out); + + case 16: + return iunpack16(initOffset, in, out); + + case 17: + return iunpack17(initOffset, in, out); + + case 18: + return iunpack18(initOffset, in, out); + + case 19: + return iunpack19(initOffset, in, out); + + case 20: + return iunpack20(initOffset, in, out); + + case 21: + return iunpack21(initOffset, in, out); + + case 22: + return iunpack22(initOffset, in, out); + + case 23: + return iunpack23(initOffset, in, out); + + case 24: + return iunpack24(initOffset, in, out); + + case 25: + return iunpack25(initOffset, in, out); + + case 26: + return iunpack26(initOffset, in, out); + + case 27: + return iunpack27(initOffset, in, out); + + case 28: + return iunpack28(initOffset, in, out); + + case 29: + return iunpack29(initOffset, in, out); + + case 30: + return iunpack30(initOffset, in, out); + + case 31: + return iunpack31(initOffset, in, out); + + case 32: + return iunpack32(initOffset, in, out); + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +template +inline __m128i SIMDipatchedunpack(__m128i initOffset, const __m128i *in, + uint32_t *out, const __m128i *patchedbuffer, + const uint32_t bit) { + switch (bit) { + case 0: + return ipatchedunpack0(initOffset, in, out, patchedbuffer); + + case 1: + return ipatchedunpack1(initOffset, in, out, patchedbuffer); + + case 2: + return ipatchedunpack2(initOffset, in, out, patchedbuffer); + + case 3: + return ipatchedunpack3(initOffset, in, out, patchedbuffer); + + case 4: + return ipatchedunpack4(initOffset, in, out, patchedbuffer); + + case 5: + return ipatchedunpack5(initOffset, in, out, patchedbuffer); + + case 6: + return ipatchedunpack6(initOffset, in, out, patchedbuffer); + + case 7: + return ipatchedunpack7(initOffset, in, out, patchedbuffer); + + case 8: + return ipatchedunpack8(initOffset, in, out, patchedbuffer); + + case 9: + return ipatchedunpack9(initOffset, in, out, patchedbuffer); + + case 10: + return ipatchedunpack10(initOffset, in, out, patchedbuffer); + + case 11: + return ipatchedunpack11(initOffset, in, out, patchedbuffer); + + case 12: + return ipatchedunpack12(initOffset, in, out, patchedbuffer); + + case 13: + return ipatchedunpack13(initOffset, in, out, patchedbuffer); + + case 14: + return ipatchedunpack14(initOffset, in, out, patchedbuffer); + + case 15: + return ipatchedunpack15(initOffset, in, out, patchedbuffer); + + case 16: + return ipatchedunpack16(initOffset, in, out, patchedbuffer); + + case 17: + return ipatchedunpack17(initOffset, in, out, patchedbuffer); + + case 18: + return ipatchedunpack18(initOffset, in, out, patchedbuffer); + + case 19: + return ipatchedunpack19(initOffset, in, out, patchedbuffer); + + case 20: + return ipatchedunpack20(initOffset, in, out, patchedbuffer); + + case 21: + return ipatchedunpack21(initOffset, in, out, patchedbuffer); + + case 22: + return ipatchedunpack22(initOffset, in, out, patchedbuffer); + + case 23: + return ipatchedunpack23(initOffset, in, out, patchedbuffer); + + case 24: + return ipatchedunpack24(initOffset, in, out, patchedbuffer); + + case 25: + return ipatchedunpack25(initOffset, in, out, patchedbuffer); + + case 26: + return ipatchedunpack26(initOffset, in, out, patchedbuffer); + + case 27: + return ipatchedunpack27(initOffset, in, out, patchedbuffer); + + case 28: + return ipatchedunpack28(initOffset, in, out, patchedbuffer); + + case 29: + return ipatchedunpack29(initOffset, in, out, patchedbuffer); + + case 30: + return ipatchedunpack30(initOffset, in, out, patchedbuffer); + + case 31: + return ipatchedunpack31(initOffset, in, out, patchedbuffer); + + case 32: + return ipatchedunpack32(initOffset, in, out, patchedbuffer); + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +/*assumes that integers fit in the prescribed number of bits*/ +template +void SIMDipackwithoutmask(__m128i initOffset, const uint32_t *in, __m128i *out, + const uint32_t bit) { + switch (bit) { + case 0: + return; + + case 1: + ipackwithoutmask1(initOffset, in, out); + return; + + case 2: + ipackwithoutmask2(initOffset, in, out); + return; + + case 3: + ipackwithoutmask3(initOffset, in, out); + return; + + case 4: + ipackwithoutmask4(initOffset, in, out); + return; + + case 5: + ipackwithoutmask5(initOffset, in, out); + return; + + case 6: + ipackwithoutmask6(initOffset, in, out); + return; + + case 7: + ipackwithoutmask7(initOffset, in, out); + return; + + case 8: + ipackwithoutmask8(initOffset, in, out); + return; + + case 9: + ipackwithoutmask9(initOffset, in, out); + return; + + case 10: + ipackwithoutmask10(initOffset, in, out); + return; + + case 11: + ipackwithoutmask11(initOffset, in, out); + return; + + case 12: + ipackwithoutmask12(initOffset, in, out); + return; + + case 13: + ipackwithoutmask13(initOffset, in, out); + return; + + case 14: + ipackwithoutmask14(initOffset, in, out); + return; + + case 15: + ipackwithoutmask15(initOffset, in, out); + return; + + case 16: + ipackwithoutmask16(initOffset, in, out); + return; + + case 17: + ipackwithoutmask17(initOffset, in, out); + return; + + case 18: + ipackwithoutmask18(initOffset, in, out); + return; + + case 19: + ipackwithoutmask19(initOffset, in, out); + return; + + case 20: + ipackwithoutmask20(initOffset, in, out); + return; + + case 21: + ipackwithoutmask21(initOffset, in, out); + return; + + case 22: + ipackwithoutmask22(initOffset, in, out); + return; + + case 23: + ipackwithoutmask23(initOffset, in, out); + return; + + case 24: + ipackwithoutmask24(initOffset, in, out); + return; + + case 25: + ipackwithoutmask25(initOffset, in, out); + return; + + case 26: + ipackwithoutmask26(initOffset, in, out); + return; + + case 27: + ipackwithoutmask27(initOffset, in, out); + return; + + case 28: + ipackwithoutmask28(initOffset, in, out); + return; + + case 29: + ipackwithoutmask29(initOffset, in, out); + return; + + case 30: + ipackwithoutmask30(initOffset, in, out); + return; + + case 31: + ipackwithoutmask31(initOffset, in, out); + return; + + case 32: + ipackwithoutmask32(initOffset, in, out); + return; + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +template +void SIMDipack(__m128i initOffset, const uint32_t *in, __m128i *out, + const uint32_t bit) { + switch (bit) { + case 0: + return; + + case 1: + ipack1(initOffset, in, out); + return; + + case 2: + ipack2(initOffset, in, out); + return; + + case 3: + ipack3(initOffset, in, out); + return; + + case 4: + ipack4(initOffset, in, out); + return; + + case 5: + ipack5(initOffset, in, out); + return; + + case 6: + ipack6(initOffset, in, out); + return; + + case 7: + ipack7(initOffset, in, out); + return; + + case 8: + ipack8(initOffset, in, out); + return; + + case 9: + ipack9(initOffset, in, out); + return; + + case 10: + ipack10(initOffset, in, out); + return; + + case 11: + ipack11(initOffset, in, out); + return; + + case 12: + ipack12(initOffset, in, out); + return; + + case 13: + ipack13(initOffset, in, out); + return; + + case 14: + ipack14(initOffset, in, out); + return; + + case 15: + ipack15(initOffset, in, out); + return; + + case 16: + ipack16(initOffset, in, out); + return; + + case 17: + ipack17(initOffset, in, out); + return; + + case 18: + ipack18(initOffset, in, out); + return; + + case 19: + ipack19(initOffset, in, out); + return; + + case 20: + ipack20(initOffset, in, out); + return; + + case 21: + ipack21(initOffset, in, out); + return; + + case 22: + ipack22(initOffset, in, out); + return; + + case 23: + ipack23(initOffset, in, out); + return; + + case 24: + ipack24(initOffset, in, out); + return; + + case 25: + ipack25(initOffset, in, out); + return; + + case 26: + ipack26(initOffset, in, out); + return; + + case 27: + ipack27(initOffset, in, out); + return; + + case 28: + ipack28(initOffset, in, out); + return; + + case 29: + ipack29(initOffset, in, out); + return; + + case 30: + ipack30(initOffset, in, out); + return; + + case 31: + ipack31(initOffset, in, out); + return; + + case 32: + ipack32(initOffset, in, out); + return; + + default: + break; + } + throw std::logic_error("number of bits is unsupported"); +} + +} // namespace SIMDCompressionLib + +#endif diff --git a/include/SIMDCompressionAndIntersection/simdvariablebyte.h b/include/SIMDCompressionAndIntersection/simdvariablebyte.h new file mode 100644 index 0000000..b575c9e --- /dev/null +++ b/include/SIMDCompressionAndIntersection/simdvariablebyte.h @@ -0,0 +1,493 @@ +/** + * (c) Part of the copyright is to Indeed.com + * Licensed under the Apache License Version 2.0 + */ + +/* + * Based on an initial design by Jeff Plaisance and + * improved by Nathan Kurz. + */ + +#ifndef SIMDCompressionAndIntersection_SIMDVARIABLEBYTE_H_ +#define SIMDCompressionAndIntersection_SIMDVARIABLEBYTE_H_ + +#include "common.h" +#include "codecs.h" +#include "util.h" + +namespace SIMDCompressionLib { + +extern "C" { +size_t masked_vbyte_read_loop(const uint8_t *in, uint32_t *out, + uint64_t length); +size_t masked_vbyte_read_loop_delta(const uint8_t *in, uint32_t *out, + uint64_t length, uint32_t prev); +size_t masked_vbyte_read_loop_fromcompressedsize(const uint8_t *in, + uint32_t *out, + size_t inputsize); +size_t masked_vbyte_read_loop_fromcompressedsize_delta(const uint8_t *in, + uint32_t *out, + size_t inputsize, + uint32_t prev); +// size_t read_ints(const uint8_t* in, uint32_t* out, int length) ; +// size_t read_ints_delta(const uint8_t* in, uint32_t* out, int length, uint32_t +// prev) ; +uint32_t masked_vbyte_select_delta(const uint8_t *in, uint64_t length, + uint32_t prev, size_t slot); +int masked_vbyte_search_delta(const uint8_t *in, uint64_t length, uint32_t prev, + uint32_t key, uint32_t *presult); +} + +/** + * This is a SIMD-accelerated version that is byte-by-byte format compatible + * with + * the VByte codec (that is, standard vbyte). + */ +template class MaskedVByte : public IntegerCODEC { +public: + MaskedVByte() {} + + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + const uint8_t *const initbout = reinterpret_cast(out); + *out = static_cast(length); + out++; + uint8_t *bout = reinterpret_cast(out); + uint32_t prev = 0; + for (size_t k = 0; k < length; ++k) { + + const uint32_t val = delta ? (in[k] - prev) : in[k]; + if (delta) + prev = in[k]; + /** + * Code below could be shorter. Whether it could be faster + * depends on your compiler and machine. + */ + if (val < (1U << 7)) { + *bout = val & 0x7F; + ++bout; + } else if (val < (1U << 14)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 7); + ++bout; + } else if (val < (1U << 21)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 14); + ++bout; + } else if (val < (1U << 28)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 21); + ++bout; + } else { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 21) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 28); + ++bout; + } + } + while (needPaddingTo32Bits(bout)) { + *bout++ = 0xff; + } + const size_t storageinbytes = bout - initbout; + nvalue = storageinbytes / 4; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + nvalue = *in; + const uint8_t *inbyte = + decodeFromByteArray((const uint8_t *)in, length, out, nvalue); + inbyte = padTo32bits(inbyte); + return reinterpret_cast(inbyte); + } + + // Same as above, but operates on byte arrays (uint8_t *) and avoids + // the padding at the end + const uint8_t *decodeFromByteArray(const uint8_t *in, + const size_t /* length */, uint32_t *out, + size_t &nvalue) { + nvalue = *(uint32_t *)in; + in += sizeof(uint32_t); + if (nvalue == 0) { + return in; // abort + } + if (delta) { + uint32_t prev = 0; + in += masked_vbyte_read_loop_delta(in, out, nvalue, prev); + } else + in += masked_vbyte_read_loop(in, out, nvalue); + return in; + } + + // append a key. Keys must be in sorted order. We assume that there is + // enough room and that delta encoding was used. + // Returns the new size of the compressed array *in bytes* + size_t appendToByteArray(uint8_t *in, size_t bytesize, uint32_t previous_key, + uint32_t key) { + uint32_t num_ints = *(uint32_t *)in; + if (bytesize == 0) + bytesize = 4; + uint8_t *bytein = in + bytesize; + bytein += encodeOneIntegerToByteArray(key - previous_key, bytein); + *(uint32_t *)in = num_ints + 1; + return bytein - in; + } + + // Returns a decompressed value in a delta-encoded array + // only supported for delta encoded data (TODO) + uint32_t select(uint32_t *in, size_t index) { + assert(delta == true); + uint32_t num_ints = *in; + in++; + return (masked_vbyte_select_delta((uint8_t *)in, num_ints, 0, index)); + } + + // Performs a lower bound find in the delta-encoded array. + // Returns the index + // length is the size of the compressed input + // only supported for delta encoded data (TODO) + size_t findLowerBound(const uint32_t *in, const size_t /*length*/, + uint32_t key, uint32_t *presult) { + assert(delta == true); + uint32_t num_ints = *in; + in++; + return ( + masked_vbyte_search_delta((uint8_t *)in, num_ints, 0, key, presult)); + } + // insert the key in sorted order. We assume that there is enough room + // and that delta encoding was used. + size_t insert(uint32_t *in, const size_t length, uint32_t key) { + assert(delta); + size_t bytesize = length * 4; + bytesize -= paddingBytes(in, length); + uint8_t *bytein = (uint8_t *)in; + uint8_t *byteininit = bytein; + bytein += insert(bytein, bytesize, key); + + while (needPaddingTo32Bits(bytein)) { + *bytein++ = 0xFF; + } + size_t storageinbytes = bytein - byteininit; + assert((storageinbytes % 4) == 0); + return storageinbytes / 4; + } + + // insert the key in sorted order. We assume that there is enough room and + // that delta encoding was used. + // the new size (in *byte) is returned + size_t insertInByteArray(uint8_t *inbyte, const size_t length, uint32_t key) { + uint32_t prev = 0; + assert(delta); + const uint8_t *const endbyte = + reinterpret_cast(inbyte + length); + // this assumes that there is a value to be read + + while (endbyte > inbyte + 5) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c < 128) { + inbyte += 1; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c < 128) { + inbyte += 2; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c < 128) { + inbyte += 3; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c < 128) { + inbyte += 4; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + prev = v + prev; + if (prev >= key) { + return length + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + } + while (endbyte > inbyte) { + unsigned int shift = 0; + for (uint32_t v = 0; endbyte > inbyte; shift += 7) { + uint8_t c = *inbyte++; + v += ((c & 127) << shift); + if ((c < 128)) { + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + break; + } + } + } + // if we make it here, then we need to append + assert(key >= prev); + return length + encodeOneIntegerToByteArray(key - prev, inbyte); + } + + std::string name() const { + if (delta) + return "MaskedVByteDelta"; + else + return "MaskedVByte"; + } + +private: + // convenience function used by insert, writes key and newvalue to compressed + // stream, and return + // extra storage used, pointer should be right after where nextvalue is right + // now + size_t __insert(uint8_t *in, uint32_t previous, uint32_t key, + uint32_t nextvalue, size_t followingbytes) { + assert(nextvalue >= key); + assert(key >= previous); + size_t oldstorage = storageCost(nextvalue - previous); + size_t newstorage = + storageCost(nextvalue - key) + storageCost(key - previous); + assert(newstorage >= oldstorage); + if (newstorage > oldstorage) + std::memmove(in + newstorage - oldstorage, in, followingbytes); + uint8_t *newin = in - oldstorage; + newin += encodeOneIntegerToByteArray(key - previous, newin); + newin += encodeOneIntegerToByteArray(nextvalue - key, newin); + assert(newin == in + newstorage - oldstorage); + return newstorage - oldstorage; + } + + // how many bytes are required to store this integer? + int storageCost(uint32_t val) { + if (val < (1U << 7)) { + return 1; + } else if (val < (1U << 14)) { + return 2; + } else if (val < (1U << 21)) { + return 3; + } else if (val < (1U << 28)) { + return 4; + } else { + return 5; + } + } + + template uint8_t extract7bits(const uint32_t val) { + return static_cast((val >> (7 * i)) & ((1U << 7) - 1)); + } + + template uint8_t extract7bitsmaskless(const uint32_t val) { + return static_cast((val >> (7 * i))); + } + + // determine how many padding bytes were used + int paddingBytes(const uint32_t *in, const size_t length) { + if (length == 0) + return 0; + uint32_t lastword = in[length - 1]; + if (lastword < (1U << 8)) { + return 3; + } else if (lastword < (1U << 16)) { + return 2; + } else if (lastword < (1U << 24)) { + return 1; + } + return 0; + } + + // write one compressed integer (without differential coding) + // returns the number of bytes written + size_t encodeOneIntegerToByteArray(uint32_t val, uint8_t *bout) { + const uint8_t *const initbout = bout; + if (val < (1U << 7)) { + *bout = val & 0x7F; + ++bout; + } else if (val < (1U << 14)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 7); + ++bout; + } else if (val < (1U << 21)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 14); + ++bout; + } else if (val < (1U << 28)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 21); + ++bout; + } else { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 21) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 28); + ++bout; + } + return bout - initbout; + } +}; + +// this version differs from MaskedVByte in that it does not write out the +// number of integers compressed as part of a header. +template class HeadlessMaskedVByte : public IntegerCODEC { +public: + HeadlessMaskedVByte() {} + + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + const uint8_t *const initbout = reinterpret_cast(out); + uint8_t *bout = reinterpret_cast(out); + uint32_t prev = 0; + for (size_t k = 0; k < length; ++k) { + const uint32_t val = delta ? (in[k] - prev) : in[k]; + if (delta) + prev = in[k]; + /** + * Code below could be shorter. Whether it could be faster + * depends on your compiler and machine. + */ + if (val < (1U << 7)) { + *bout = val & 0x7F; + ++bout; + } else if (val < (1U << 14)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 7); + ++bout; + } else if (val < (1U << 21)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 14); + ++bout; + } else if (val < (1U << 28)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 21); + ++bout; + } else { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 21) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 28); + ++bout; + } + } + while (needPaddingTo32Bits(bout)) { + *bout++ = 0xFFU; + ; + } + const size_t storageinbytes = bout - initbout; + nvalue = storageinbytes / 4; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + if (length == 0) { + nvalue = 0; + return in; // abort + } + const uint8_t *inbyte = reinterpret_cast(in); + if (delta) { + uint32_t prev = 0; + nvalue = masked_vbyte_read_loop_fromcompressedsize_delta( + inbyte, out, length * 4, prev); + } else { + nvalue = + masked_vbyte_read_loop_fromcompressedsize(inbyte, out, length * 4); + } + + return in + length; + } + + std::string name() const { + if (delta) + return "HeadlessMaskedVByteDelta"; + else + return "HeadlessMaskedVByte"; + } + +private: + template uint8_t extract7bits(const uint32_t val) { + return static_cast((val >> (7 * i)) & ((1U << 7) - 1)); + } + + template uint8_t extract7bitsmaskless(const uint32_t val) { + return static_cast((val >> (7 * i))); + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_SIMDVARIABLEBYTE_H_ */ diff --git a/include/SIMDCompressionAndIntersection/skipping.h b/include/SIMDCompressionAndIntersection/skipping.h new file mode 100644 index 0000000..3df98a4 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/skipping.h @@ -0,0 +1,292 @@ +/* + * This is a simple implementation of a skipping data structure and algorithms similar to + * what is described in + * + * Sanders and Transier, Intersection in Integer Inverted Indices, ALENEX 2007, 2007. + * + * As suggested in their conclusion, we leave the higher-level structure uncompressed. We also + * use differential coding. + * + * To paraphrase Sanders and Transier... + * + * In addition to a delta-encoded compressed list, a top-level data structure stores + * every B-th element of N in t together with its position in the main list (B is a tuning + * parameter). We can now run any search algorithm on t and then scan only the pieces of + * the main list that might contain an element to be located. + * + * In our implementation, we assume that B is a power of two and use 1 << BlockSizeLog as + * the block size. + * + * Sanders and Transier's proposal is similar in spirit to the skipping + * structure proposed in + * + * Moffat, A., Zobel, J.: Self-indexing inverted files for fast text retrieval. + * ACM Transactions on Information Systems 14 (1996). + * + * + * Author: Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_SKIPPING_H_ +#define SIMDCompressionAndIntersection_SKIPPING_H_ + +#include "common.h" + +namespace SIMDCompressionLib { + +class Skipping { +public: + Skipping(uint32_t BS, const uint32_t *data, uint32_t length) + : BlockSizeLog(BS), mainbuffer(), highbuffer(), Length(0) { + if ((BlockSizeLog == 0) && (BlockSizeLog >= 32)) + throw runtime_error("please use a reasonable BlockSizeLog"); + load(data, length); // cheap constructor + } + + ~Skipping() {} + + size_t storageInBytes() const { + return mainbuffer.size() * sizeof(uint8_t) + + highbuffer.size() * sizeof(higharraypair) + + sizeof(Length); // rough estimates (good enough) + } + + uint32_t decompress(uint32_t *out) const { + const uint8_t *bout = mainbuffer.data(); + uint32_t pos = 0; + + uint32_t val = 0; + for (uint32_t k = 0; k < Length; ++k) { + bout = decode(bout, val); + out[pos++] = val; + } + return pos; + } + + /** + * Intersects the current Skipping structure with a (small) uncompressed array + * and + * writes the answer to out. + */ + uint32_t intersect(const uint32_t *smallarray, uint32_t length, + uint32_t *out) const { + uint32_t intersectsize = 0; + const uint8_t *largemainpointer = mainbuffer.data(); + uint32_t largemainval = 0; + largemainpointer = decode(largemainpointer, largemainval); + uint32_t x = 0; + for (uint32_t k = 0; k < length; ++k) { + uint32_t val = smallarray[k]; + // if the last value of the current block is too small, skip the block + // entirely + if (highbuffer[x >> BlockSizeLog].first < val) { + do { + x = ((x >> BlockSizeLog) + 1) << BlockSizeLog; + if (x >= Length) { + return intersectsize; + } + } while (highbuffer[x >> BlockSizeLog].first < val); + largemainpointer = + mainbuffer.data() + highbuffer[x >> BlockSizeLog].second; + largemainval = highbuffer[(x >> BlockSizeLog) - 1].first; + largemainpointer = decode(largemainpointer, largemainval); + } + // at this point, we have that the last value of the current block is >= + // val + // this means that we shall decode at most one block before giving up + while (largemainval < val) { + ++x; + if (x >= Length) { + return intersectsize; + } + largemainpointer = decode(largemainpointer, largemainval); + } + if (largemainval == val) { + out[intersectsize++] = val; + } + } + return intersectsize; + } + uint32_t intersect(const Skipping &otherlarger, uint32_t *out) const { + // we assume that "this" is the smallest of the two + if (otherlarger.Length < Length) + return otherlarger.intersect(*this, out); + if (Length == 0) + return 0; // special silly case + assert(otherlarger.Length >= Length); + assert(otherlarger.Length > 0); + uint32_t intersectsize = 0; + + const uint8_t *inbyte = mainbuffer.data(); + const uint8_t *const endbyte = mainbuffer.data() + mainbuffer.size(); + const uint8_t *largemainpointer = otherlarger.mainbuffer.data(); + uint32_t largemainval = 0; + largemainpointer = decode(largemainpointer, largemainval); + uint32_t val = 0; // where I put decoded values + uint32_t x = 0; + while (endbyte > inbyte) { + inbyte = decode(inbyte, val); + // if the last value of the current block is too small, skip the block + // entirely + if (otherlarger.highbuffer[x >> otherlarger.BlockSizeLog].first < val) { + do { + x = ((x >> otherlarger.BlockSizeLog) + 1) << otherlarger.BlockSizeLog; + if (x >= otherlarger.Length) { + return intersectsize; + } + } while (otherlarger.highbuffer[x >> otherlarger.BlockSizeLog].first < + val); + largemainpointer = + otherlarger.mainbuffer.data() + + otherlarger.highbuffer[x >> otherlarger.BlockSizeLog].second; + largemainval = + otherlarger.highbuffer[(x >> otherlarger.BlockSizeLog) - 1].first; + largemainpointer = decode(largemainpointer, largemainval); + } + // at this point, we have that the last value of the current block is >= + // val + // this means that we shall decode at most one block before giving up + while (largemainval < val) { + ++x; + if (x >= otherlarger.Length) { + return intersectsize; + } + largemainpointer = decode(largemainpointer, largemainval); + } + if (largemainval == val) { + out[intersectsize++] = val; + } + } + return intersectsize; + } + + uint32_t BlockSizeLog; + vector mainbuffer; + typedef pair higharraypair; + + typedef vector higharray; + higharray highbuffer; + uint32_t Length; + + // please don't use the default constructor... + + Skipping() : BlockSizeLog(0), mainbuffer(), highbuffer(), Length(0) {} + +private: + Skipping(const Skipping &); + + // making it private on purpose + Skipping &operator=(const Skipping &); + + void load(const uint32_t *data, uint32_t length); + + template uint8_t extract7bits(const uint32_t val) { + return static_cast((val >> (7 * i)) & ((1U << 7) - 1)); + } + + template uint8_t extract7bitsmaskless(const uint32_t val) { + return static_cast((val >> (7 * i))); + } + static inline const uint8_t *decode(const uint8_t *buffer, uint32_t &prev) { + // manually unrolled for performance + uint32_t v = 0; + uint8_t c = *buffer++; + v += (c & 127); + if ((c & 128)) { + prev += v; + return buffer; + } + c = *buffer++; + v += ((c & 127) << 7); + if ((c & 128)) { + prev += v; + return buffer; + } + c = *buffer++; + v += ((c & 127) << 14); + if ((c & 128)) { + prev += v; + return buffer; + } + c = *buffer++; + v += ((c & 127) << 21); + if ((c & 128)) { + prev += v; + return buffer; + } + c = *buffer++; + v += ((c & 127) << 30); + prev += v; + return buffer; + } +}; + +void Skipping::load(const uint32_t *data, uint32_t len) { + assert(numeric_limits::max() < + (numeric_limits::max() / 5)); // check for overflow + Length = len; + if (Length == 0) + return; // nothing to do + uint32_t BlockNumber = (Length + (1 << BlockSizeLog) - 1) / + (1 << BlockSizeLog); // count full blocks + assert(BlockNumber << BlockSizeLog >= Length); + highbuffer.resize(BlockNumber); + mainbuffer.resize(5 * Length); + uint8_t *bout = mainbuffer.data(); + uint8_t *const boutinit = bout; + uint32_t prev = 0; + for (uint32_t k = 0; k < BlockNumber; ++k) { + const uint32_t howmany = (((k + 1) << BlockSizeLog) > Length) + ? Length - (k << BlockSizeLog) + : 1 << BlockSizeLog; + highbuffer[k] = make_pair(data[(k << BlockSizeLog) + howmany - 1], + static_cast(bout - boutinit)); + for (uint32_t x = 0; x < howmany; ++x) { + const uint32_t v = data[x + (k << BlockSizeLog)]; + const uint32_t val = v - prev; + prev = v; + if (val < (1U << 7)) { + *bout = static_cast(val | (1U << 7)); + ++bout; + } else if (val < (1U << 14)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bitsmaskless<1>(val) | (1U << 7); + ++bout; + } else if (val < (1U << 21)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bitsmaskless<2>(val) | (1U << 7); + ++bout; + } else if (val < (1U << 28)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bits<2>(val); + ++bout; + *bout = extract7bitsmaskless<3>(val) | (1U << 7); + ++bout; + } else { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bits<2>(val); + ++bout; + *bout = extract7bits<3>(val); + ++bout; + *bout = extract7bitsmaskless<4>(val) | (1U << 7); + ++bout; + } + } + } + mainbuffer.resize(static_cast(bout - boutinit)); + mainbuffer.shrink_to_fit(); +} + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_SKIPPING_H_ */ diff --git a/include/SIMDCompressionAndIntersection/sortedbitpacking.h b/include/SIMDCompressionAndIntersection/sortedbitpacking.h new file mode 100644 index 0000000..2d00e8b --- /dev/null +++ b/include/SIMDCompressionAndIntersection/sortedbitpacking.h @@ -0,0 +1,200 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +#ifndef SIMDCompressionAndIntersection_SORTEDBITPACKING_H_ +#define SIMDCompressionAndIntersection_SORTEDBITPACKING_H_ + +#include "common.h" +#include "simdbitpacking.h" +#include "bitpackinghelpers.h" + +namespace SIMDCompressionLib { + +template CONST_FUNCTION static T *padTo128bits(T *inbyte) { + return reinterpret_cast((reinterpret_cast(inbyte) + 15) & + ~15); +} + +/** + * This is a minimalist class that allows you to store data + * in one of 32 "stores". Each store is for + * integers having bit width 1, 2..., 32 respectively. + * + * Design by D. Lemire + */ +class BasicSortedBitPacker { +public: + enum { DEFAULTSIZE = 128 }; // should be a multiple of 128 + uint32_t buffer[32]; + + static string name() { return "uBSBP"; } + + BasicSortedBitPacker() { + for (uint32_t i = 0; i < 32; ++i) { + data[i] = new uint32_t[DEFAULTSIZE]; + memset(data[i], 0, DEFAULTSIZE * sizeof(uint32_t)); + actualsizes[i] = DEFAULTSIZE; + } + clear(); + } + + void reset() { + for (uint32_t i = 0; i < 32; ++i) { + delete[] data[i]; + data[i] = new uint32_t[DEFAULTSIZE]; + memset(data[i], 0, DEFAULTSIZE * sizeof(uint32_t)); + actualsizes[i] = DEFAULTSIZE; + } + clear(); + } + + ~BasicSortedBitPacker() { free(); } + void free() { + clear(); + for (uint32_t i = 0; i < 32; ++i) + if (data[i] != NULL) { + delete[] data[i]; + data[i] = NULL; + actualsizes[i] = 0; + } + } + void directAppend(uint32_t i, uint32_t val) { data[i][sizes[i]++] = val; } + + const uint32_t *get(int i) { return data[i]; } + + void ensureCapacity(int i, uint32_t datatoadd) { + if (sizes[i] + datatoadd > actualsizes[i]) { + actualsizes[i] = (sizes[i] + datatoadd + 127) / 128 * 128 * + 2; // so we always get a multiple of 128 + uint32_t *tmp = new uint32_t[actualsizes[i]]; + for (uint32_t j = 0; j < sizes[i]; ++j) + tmp[j] = data[i][j]; + delete[] data[i]; + data[i] = tmp; + } + } + + void clear() { + for (uint32_t i = 0; i < 32; ++i) + sizes[i] = 0; // memset "might" be faster. + } + + uint32_t *write(uint32_t *out) { + uint32_t bitmap = 0; + for (uint32_t k = 1; k < 32; ++k) { + if (sizes[k] != 0) + bitmap |= (1U << k); + } + *(out++) = bitmap; + + for (uint32_t k = 1; k < 32; ++k) { + if (sizes[k] != 0) { + *out = sizes[k]; + out++; + uint32_t j = 0; + for (; j + 128 <= sizes[k]; j += 128) { + usimdpackwithoutmask(&data[k][j], reinterpret_cast<__m128i *>(out), + k + 1); + out += 4 * (k + 1); + } + // falling back on scalar + for (; j < sizes[k]; j += 32) { + BitPackingHelpers::fastpackwithoutmask(&data[k][j], out, k + 1); + out += k + 1; + } + out -= (j - sizes[k]) * (k + 1) / 32; + } + } + return out; + } + const uint32_t *read(const uint32_t *in) { + clear(); + const uint32_t bitmap = *(in++); + + for (uint32_t k = 1; k < 32; ++k) { + if ((bitmap & (1U << k)) != 0) { + sizes[k] = *in++; + if (actualsizes[k] < sizes[k]) { + delete[] data[k]; + actualsizes[k] = (sizes[k] + 127) / 128 * 128; + data[k] = new uint32_t[actualsizes[k]]; + } + uint32_t j = 0; + for (; j + 128 <= sizes[k]; j += 128) { + + usimdunpack(reinterpret_cast(in), &data[k][j], + k + 1); + in += 4 * (k + 1); + } + for (; j + 31 < sizes[k]; j += 32) { + BitPackingHelpers::fastunpack(in, &data[k][j], k + 1); + in += k + 1; + } + uint32_t remaining = sizes[k] - j; + memcpy(buffer, in, (remaining * (k + 1) + 31) / 32 * sizeof(uint32_t)); + uint32_t *bpointer = buffer; + in += ((sizes[k] + 31) / 32 * 32 - j) / 32 * (k + 1); + for (; j < sizes[k]; j += 32) { + BitPackingHelpers::fastunpack(bpointer, &data[k][j], k + 1); + bpointer += k + 1; + } + in -= (j - sizes[k]) * (k + 1) / 32; + } + } + return in; + } + + // for debugging + void sanityCheck() { + for (uint32_t k = 0; k < 32; ++k) { + if (sizes[k] > actualsizes[k]) { + cerr << "overflow at " << k << endl; + throw runtime_error("bug"); + } + if (sizes[k] != 0) { + cout << "k=" << k << endl; + uint32_t mask = 0u; + for (uint32_t j = 0; j < sizes[k]; ++j) { + cout << data[k][j] << " "; + mask |= data[k][j]; + } + cout << endl; + + if (gccbits(mask) > k + 1) { + cerr << "At " << (k + 1) << " we have " << gccbits(mask) << endl; + throw runtime_error("bug"); + } + } + } + } + + bool equals(const BasicSortedBitPacker &o) { + for (uint32_t k = 0; k < 32; ++k) { + if (sizes[k] != o.sizes[k]) { + return false; + } + for (uint32_t j = 0; j < sizes[k]; ++j) + if (data[k][j] != o.data[k][j]) { + return false; + } + } + return true; + } + +private: + uint32_t *data[32]; + uint32_t sizes[32]; + uint32_t actualsizes[32]; + + // we don't want anyone to start copying this class + BasicSortedBitPacker(const BasicSortedBitPacker &); + BasicSortedBitPacker &operator=(const BasicSortedBitPacker &); +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_SORTEDBITPACKING_H_ */ diff --git a/include/SIMDCompressionAndIntersection/streamvariablebyte.h b/include/SIMDCompressionAndIntersection/streamvariablebyte.h new file mode 100644 index 0000000..d575688 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/streamvariablebyte.h @@ -0,0 +1,247 @@ +#ifndef SIMDCompressionAndIntersection_STREAMVARIABLEBYTE_ +#define SIMDCompressionAndIntersection_STREAMVARIABLEBYTE_ + +#include "common.h" +#include "codecs.h" + +namespace SIMDCompressionLib { + +/** + * StreamVByte is an integer CODEC invented by Nathan Kurz. + */ + +extern "C" { +uint64_t svb_encode(uint8_t *out, const uint32_t *in, uint32_t count, int delta, + int type); +uint8_t *svb_decode_avx_simple(uint32_t *out, uint8_t *keyPtr, uint8_t *dataPtr, + uint64_t count); +uint8_t *svb_decode_avx_d1_simple(uint32_t *out, uint8_t *keyPtr, + uint8_t *dataPtr, uint64_t count); +uint8_t *svb_decode_scalar_d1_init(uint32_t *outPtr, const uint8_t *keyPtr, + uint8_t *dataPtr, uint32_t count, + uint32_t prev); +uint32_t svb_select_avx_d1_init(uint8_t *keyPtr, uint8_t *dataPtr, + uint64_t count, uint32_t prev, int slot); +int svb_find_avx_d1_init(uint8_t *keyPtr, uint8_t *dataPtr, uint64_t count, + uint32_t prev, uint32_t key, uint32_t *presult); +uint8_t *svb_insert_scalar_d1_init(uint8_t *keyPtr, uint8_t *dataPtr, + size_t dataSize, uint32_t count, + uint32_t prev, uint32_t new_key, + uint32_t *position); +uint8_t *svb_append_scalar_d1(uint8_t *keyPtr, uint8_t *dataPtr, + size_t sizebytes, size_t count, uint32_t delta); +} + +/** + * Regular StreamVByte (no differential coding) + */ +class StreamVByte : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t count, uint32_t *out, + size_t &nvalue) { + uint64_t bytesWritten = svb_encode( + (uint8_t *)out, in, static_cast(std::min( + count, std::numeric_limits::max())), + 0, 1); + nvalue = static_cast(bytesWritten + 3) / 4; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t /* count */, + uint32_t *out, size_t &nvalue) { + uint32_t count = *(uint32_t *)in; // first 4 bytes is number of ints + nvalue = count; + if (count == 0) + return 0; + + uint8_t *keyPtr = (uint8_t *)in + 4; // full list of keys is next + uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up) + uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + nvalue = count; + return reinterpret_cast( + (reinterpret_cast( + svb_decode_avx_simple(out, keyPtr, dataPtr, count)) + + 3) & + ~3); + } + + std::string name() const { return "streamvbyte"; } +}; + +/** + * StreamVByte with integrated differential coding + */ +class StreamVByteD1 : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t count, uint32_t *out, + size_t &nvalue) { + uint32_t bytesWritten = static_cast( + svb_encode((uint8_t *)(out + 1), in, + static_cast(std::min( + count, std::numeric_limits::max())), + 1, 1)); + *out = 4 + bytesWritten; + nvalue = 1 + (bytesWritten + 3) / 4; + } + + void encodeToByteArray(uint32_t *in, const size_t count, uint8_t *out, + size_t &nvalue) { + uint32_t bytesWritten = static_cast( + svb_encode((uint8_t *)(out + 1), in, + static_cast(std::min( + count, std::numeric_limits::max())), + 1, 1)); + *out = 4 + bytesWritten; + nvalue = 4 + bytesWritten; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t /* count */, + uint32_t *out, size_t &nvalue) { + ++in; // number of encoded bytes + uint32_t count = *(uint32_t *)in; // next 4 bytes is number of ints + nvalue = count; + if (count == 0) + return 0; + + uint8_t *keyPtr = (uint8_t *)in + 4; // full list of keys is next + uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up) + uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + return reinterpret_cast( + (reinterpret_cast( + svb_decode_avx_d1_simple(out, keyPtr, dataPtr, count)) + + 3) & + ~3); + } + + const uint8_t *decodeFromByteArray(const uint8_t *in, + const size_t /* count */, uint32_t *out, + size_t &nvalue) { + in += 4; // number of encoded bytes + uint32_t count = *(uint32_t *)in; // next 4 bytes is number of ints + nvalue = count; + if (count == 0) + return 0; + + uint8_t *keyPtr = (uint8_t *)in + 4; // full list of keys is next + uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up) + uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + return svb_decode_avx_d1_simple(out, keyPtr, dataPtr, count); + } + + uint32_t select(const uint32_t *in, int slot) { + ++in; // number of encoded bytes + uint32_t count = *in; // next 4 bytes is number of ints + assert(slot < (int)count); + uint8_t *keyPtr = (uint8_t *)in + 4; // full list of keys is next + uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up) + uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + return svb_select_avx_d1_init(keyPtr, dataPtr, count, 0, slot); + } + + uint32_t findLowerBound(const uint32_t *in, uint32_t /* count */, + uint32_t key, uint32_t *presult) { + ++in; // skip number of encoded bytes + uint32_t count = *(uint32_t *)in; // next 4 bytes is number of ints + uint8_t *keyPtr = (uint8_t *)in + 4; // full list of keys is next + uint32_t keyLen = ((count + 3) / 4); // 2-bits per key (rounded up) + uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + return (uint32_t)svb_find_avx_d1_init(keyPtr, dataPtr, count, 0, key, + presult); + } + + // append a key. Keys must be in sorted order. We assume that there is + // enough room and that delta encoding was used. + // Returns the new size of the compressed array *in bytes* + size_t appendToByteArray(uint8_t *in, const size_t /* length */, + uint32_t previous_key, uint32_t key) { + uint8_t *initin = in; + size_t size = *(uint32_t *)in; + in += 4; + size_t count = *(uint32_t *)in; + in += 4; + + // if the buffer is not yet initialized: pretend that the first 8 + // bytes are already occupied + if (size == 0) + size = 8; + + uint8_t *keyPtr = (uint8_t *)in; // full list of keys is next + uint32_t keyLen = + static_cast((count + 3) / 4); // 2-bits per key (rounded up) + uint8_t *dataPtr = keyPtr + keyLen; // data starts after the keys + size = svb_append_scalar_d1(keyPtr, dataPtr, size - 8, count, + key - previous_key) - + initin; + + // update 'size' and 'count' at the beginning of the buffer + in = initin; + *(uint32_t *)in = static_cast(size); + in += 4; + *(uint32_t *)in = static_cast(count + 1); + return size; + } + + // Inserts |key| into an encoded sequence. |encodedSize| is the total + // allocated size for |in| (in bytes). + // Returns the number of values written. + uint32_t insert(uint32_t *in, uint32_t, uint32_t key) { + uint32_t bytesEncoded = *in; + uint32_t count = *(in + 1); // first 4 bytes is number of ints + uint8_t *keyPtr = (uint8_t *)(in + 2); // full list of keys is next + // keyLen: 2-bits per key (rounded up), but at least 1 byte + uint32_t keyLen = count == 0 ? 1 : ((count + 3) / 4); + uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + uint32_t dataSize = (bytesEncoded - 8) - keyLen; + + // make space for the new key? + if (count > 0 && count % 4 == 0 && keyPtr + keyLen + 1 > dataPtr) { + memmove(dataPtr + 1, dataPtr, dataSize); + dataPtr++; + } + + *(in + 1) = count + 1; + + uint32_t position; + uint32_t bytesWritten = static_cast( + svb_insert_scalar_d1_init(keyPtr, dataPtr, dataSize, count, 0, key, + &position) - + (uint8_t *)in); + *in = bytesWritten; + return (bytesWritten + 3) / 4; + } + + // Inserts |key| into an encoded sequence. |encodedSize| is the total + // allocated size for |in| (in bytes). + // Returns the number of *bytes* written. + size_t insertInByteArray(uint8_t *inbyte, uint32_t, uint32_t key) { + uint32_t *in = (uint32_t *)inbyte; + uint32_t bytesEncoded = *in; + uint32_t count = *(in + 1); // first 4 bytes is number of ints + uint8_t *keyPtr = (uint8_t *)(in + 2); // full list of keys is next + // keyLen: 2-bits per key (rounded up), but at least 1 byte + uint32_t keyLen = count == 0 ? 1 : ((count + 3) / 4); + uint8_t *dataPtr = keyPtr + keyLen; // data starts at end of keys + uint32_t dataSize = (bytesEncoded - 8) - keyLen; + + // make space for the new key? + if (count > 0 && count % 4 == 0 && keyPtr + keyLen + 1 > dataPtr) { + memmove(dataPtr + 1, dataPtr, dataSize); + dataPtr++; + } + + *(in + 1) = count + 1; + + uint32_t position; + uint32_t bytesWritten = static_cast( + svb_insert_scalar_d1_init(keyPtr, dataPtr, dataSize, count, 0, key, + &position) - + (uint8_t *)in); + *in = bytesWritten; + return bytesWritten; + } + + std::string name() const { return "streamvbyte_d1"; } +}; + +} // namespace SIMDCompressionLib + +#endif // STREAMVARIABLEBYTE_ diff --git a/include/SIMDCompressionAndIntersection/synthetic.h b/include/SIMDCompressionAndIntersection/synthetic.h new file mode 100644 index 0000000..2f0a7a5 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/synthetic.h @@ -0,0 +1,389 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_SYNTHETIC_H_ +#define SIMDCompressionAndIntersection_SYNTHETIC_H_ + +#include "common.h" +#include "util.h" +#include "mersenne.h" +#include "intersection.h" +#include "boolarray.h" + +namespace SIMDCompressionLib { + +using namespace std; + +vector generateArray(uint32_t N, const uint32_t mask = 0xFFFFFFFFU) { + vector ans(N); + for (size_t k = 0; k < N; ++k) + ans[k] = rand() & mask; + return ans; +} + +vector generateArray32(uint32_t N, + const uint32_t mask = 0xFFFFFFFFU) { + vector ans(N); + for (size_t k = 0; k < N; ++k) + ans[k] = rand() & mask; + return ans; +} + +class UniformDataGenerator { +public: + UniformDataGenerator(uint32_t seed = static_cast(time(NULL))) + : rand(seed) {} + + void negate(vector &in, vector &out, uint32_t Max) { + out.resize(Max - in.size()); + in.push_back(Max); + uint32_t i = 0; + size_t c = 0; + for (size_t j = 0; j < in.size(); ++j) { + const uint32_t v = in[j]; + for (; i < v; ++i) + out[c++] = i; + ++i; + } + assert(c == out.size()); + } + + /** + * fill the vector with N numbers uniformly picked from from 0 to Max, not + * including Max + * if it is not possible, an exception is thrown + */ + vector generateUniformHash(uint32_t N, uint32_t Max, + vector &ans) { + if (Max < N) + throw runtime_error( + "can't generate enough distinct elements in small interval"); + ans.clear(); + if (N == 0) + return ans; // nothing to do + ans.reserve(N); + assert(Max >= 1); + unordered_set s; + while (s.size() < N) + s.insert(rand.getValue(Max - 1)); + ans.assign(s.begin(), s.end()); + sort(ans.begin(), ans.end()); + assert(N == ans.size()); + return ans; + } + + void generateUniformBitmap(uint32_t N, uint32_t Max, vector &ans) { + if (Max < N) + throw runtime_error( + "can't generate enough distinct elements in small interval"); + assert(Max >= 1); + BoolArray bs(Max); + uint32_t card = 0; + while (card < N) { + uint32_t v = rand.getValue(Max - 1); + if (!bs.get(v)) { + bs.set(v); + ++card; + } + } + ans.resize(N); + bs.toArray(ans); + } + + void fastgenerateUniform(uint32_t N, uint32_t Max, vector &ans) { + if (2 * N > Max) { + vector buf(N); + fastgenerateUniform(Max - N, Max, buf); + negate(buf, ans, Max); + return; + } + if (N * 1024 > Max) { + generateUniformBitmap(N, Max, ans); + } + generateUniformHash(N, Max, ans); + } + + // Max value is excluded from range + vector generate(uint32_t N, uint32_t Max) { + vector ans; + ans.reserve(N); + fastgenerateUniform(N, Max, ans); + return ans; + } + ZRandom rand; +}; + +/* + * Reference: Vo Ngoc Anh and Alistair Moffat. 2010. Index compression using + * 64-bit words. Softw. Pract. Exper.40, 2 (February 2010), 131-147. + */ +class ClusteredDataGenerator { +public: + vector buffer; + UniformDataGenerator unidg; + ClusteredDataGenerator(uint32_t seed = static_cast(time(NULL))) + : buffer(), unidg(seed) {} + + // Max value is excluded from range + template + void fillUniform(iterator begin, iterator end, uint32_t Min, uint32_t Max) { + unidg.fastgenerateUniform(static_cast(end - begin), Max - Min, + buffer); + for (size_t k = 0; k < buffer.size(); ++k) + *(begin + k) = Min + buffer[k]; + } + + // Max value is excluded from range + // throws exception if impossible + template + void fillClustered(iterator begin, iterator end, uint32_t Min, uint32_t Max) { + const uint32_t N = static_cast(end - begin); + const uint32_t range = Max - Min; + if (range < N) + throw runtime_error("can't generate that many in small interval."); + assert(range >= N); + if ((range == N) or (N < 10)) { + fillUniform(begin, end, Min, Max); + return; + } + const uint32_t cut = N / 2 + unidg.rand.getValue(range - N); + assert(cut >= N / 2); + assert(Max - Min - cut >= N - N / 2); + const double p = unidg.rand.getDouble(); + assert(p <= 1); + assert(p >= 0); + if (p <= 0.25) { + fillUniform(begin, begin + N / 2, Min, Min + cut); + fillClustered(begin + N / 2, end, Min + cut, Max); + } else if (p <= 0.5) { + fillClustered(begin, begin + N / 2, Min, Min + cut); + fillUniform(begin + N / 2, end, Min + cut, Max); + } else { + fillClustered(begin, begin + N / 2, Min, Min + cut); + fillClustered(begin + N / 2, end, Min + cut, Max); + } + } + + // Max value is excluded from range + vector generate(uint32_t N, uint32_t Max) { + return generateClustered(N, Max); + } + + // Max value is excluded from range + vector generateClustered(uint32_t N, uint32_t Max) { + vector ans(N); + fillClustered(ans.begin(), ans.end(), 0, Max); + return ans; + } +}; + +class ZipfianGenerator { +public: + uint32_t n; + double zetan, theta; + vector proba; + + ZRandom rand; + ZipfianGenerator(uint32_t seed = static_cast(time(NULL))) + : n(0), zetan(0), theta(0), proba(n), rand(seed) {} + + void init(int _items, double _zipfianconstant = 1.0) { + n = _items; + if (_items == 0) + throw runtime_error("no items?"); + theta = _zipfianconstant; + if (theta > 0) { + zetan = 1 / zeta(n, theta); + proba.clear(); + proba.resize(n, 0); + proba[0] = zetan; + for (uint32_t i = 1; i < n; ++i) + proba[i] = proba[i - 1] + zetan / pow(i + 1, theta); + } else { + proba.resize(n, 1.0 / n); + } + } + + void seed(uint32_t s) { rand.seed(s); } + + ZipfianGenerator(int _items, double _zipfianconstant, + uint32_t seed = static_cast(time(NULL))) + : n(_items), zetan(0), theta(_zipfianconstant), proba(n), rand(seed) { + init(_items, _zipfianconstant); + } + + double zeta(int n, double theta) { + double sum = 0; + for (long i = 0; i < n; i++) { + sum += 1 / (pow(i + 1, theta)); + } + return sum; + } + int nextInt() { + // Map z to the value + const double u = rand.getDouble(); + return static_cast(lower_bound(proba.begin(), proba.end(), u) - + proba.begin()); + } +}; + +vector generateZipfianArray32(uint32_t N, double power, + const uint32_t mask = 0xFFFFFFFFU) { + vector ans(N); + ZipfianGenerator zipf; + const uint32_t MAXVALUE = 1U << 22; + zipf.init(mask > MAXVALUE - 1 ? MAXVALUE : mask + 1, power); + for (size_t k = 0; k < N; ++k) + ans[k] = zipf.nextInt(); + return ans; +} + +size_t unite(const uint32_t *set1, const size_t length1, const uint32_t *set2, + const size_t length2, uint32_t *out) { + size_t pos = 0; + size_t k1 = 0, k2 = 0; + if (0 == length1) { + for (size_t k = 0; k < length2; ++k) + out[k] = set2[k]; + return length2; + } + if (0 == length2) { + for (size_t k = 0; k < length1; ++k) + out[k] = set1[k]; + return length1; + } + while (true) { + if (set1[k1] < set2[k2]) { + out[pos++] = set1[k1]; + ++k1; + if (k1 >= length1) { + for (; k2 < length2; ++k2) + out[pos++] = set2[k2]; + break; + } + } else if (set1[k1] == set2[k2]) { + out[pos++] = set1[k1]; + ++k1; + ++k2; + if (k1 >= length1) { + for (; k2 < length2; ++k2) + out[pos++] = set2[k2]; + break; + } + if (k2 >= length2) { + for (; k1 < length1; ++k1) + out[pos++] = set1[k1]; + break; + } + } else { // if (set1[k1]>set2[k2]) { + out[pos++] = set2[k2]; + ++k2; + if (k2 >= length2) { + for (; k1 < length1; ++k1) + out[pos++] = set1[k1]; + break; + } + } + } + return pos; +} + +vector unite(const vector &x, const vector &y) { + vector ans(x.size() + y.size()); + ans.resize(unite(x.data(), x.size(), y.data(), y.size(), ans.data())); + return ans; +} + +size_t classicalintersection(const uint32_t *set1, const size_t length1, + const uint32_t *set2, const size_t length2, + uint32_t *out) { + if ((0 == length1) or (0 == length2)) + return 0; + size_t answer = 0; + size_t k1 = 0, k2 = 0; + while (true) { + if (set1[k1] < set2[k2]) { + ++k1; + if (k1 == length1) + return answer; + } else if (set2[k2] < set1[k1]) { + ++k2; + if (k2 == length2) + return answer; + } else { + // (set2[k2] == set1[k1]) + out[answer++] = set1[k1]; + ++k1; + if (k1 == length1) + break; + ++k2; + if (k2 == length2) + break; + } + } + return answer; +} + +vector intersect(const vector &x, + const vector &y) { + vector ans(x.size() + y.size()); + ans.resize(classicalintersection(x.data(), x.size(), y.data(), y.size(), + ans.data())); + return ans; +} +/** + * Generate a pair of arrays. One small, one larger. + * + * minlength: length of the smallest of the two arrays + * Max is the largest possible value + * sizeratio * minlength : length of the largest of the two arrays + * intersectionratio * minlength : length of the intersection + */ +template +pair, vector> +getPair(generator gen, uint32_t minlength, uint32_t Max, float sizeratio, + float intersectionratio) { + if (sizeratio < 1) + throw runtime_error("sizeratio should be larger or equal to 1"); + if (intersectionratio < 0) + throw runtime_error("intersectionratio should be positive"); + if (intersectionratio > 1) + throw runtime_error("intersectionratio cannot be larger than 1"); + const uint32_t maxlenth = + static_cast(round(static_cast(minlength) * sizeratio)); + if (maxlenth > Max) + throw runtime_error( + "I can't generate an array so large in such a small range."); + if (maxlenth < minlength) + throw runtime_error("something went wrong, possibly an overflow."); + // we basically assume that, if we do nothing, intersections are very small + const uint32_t intersize = static_cast( + round(static_cast(minlength) * intersectionratio)); + + vector inter = gen.generate(intersize, Max); + vector smallest = + unite(gen.generate(static_cast(minlength - inter.size()), Max), + inter); + vector largest = unite( + gen.generate(static_cast(maxlenth - inter.size()), Max), inter); + vector intersection = intersect(smallest, largest); + + if (abs(static_cast(intersection.size()) / + static_cast(smallest.size()) - + intersectionratio) > 0.05) + throw runtime_error("Bad intersection ratio. Fix me."); + + if (abs(static_cast(largest.size()) / + static_cast(smallest.size()) - + sizeratio) > 0.05) + throw runtime_error("Bad size ratio. Fix me."); + return pair, vector>(smallest, largest); +} + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_SYNTHETIC_H_ */ diff --git a/include/SIMDCompressionAndIntersection/timer.h b/include/SIMDCompressionAndIntersection/timer.h new file mode 100644 index 0000000..9a46611 --- /dev/null +++ b/include/SIMDCompressionAndIntersection/timer.h @@ -0,0 +1,85 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + */ + +#ifndef SIMDCompressionAndIntersection_TIMER_H_ +#define SIMDCompressionAndIntersection_TIMER_H_ + +#include +#include +#include + +namespace SIMDCompressionLib { + +class WallClockTimer { +public: +#ifdef _WIN32 + typedef qpc_clock clock; +#else + typedef std::chrono::high_resolution_clock clock; +#endif + + std::chrono::time_point t1, t2; + WallClockTimer() : t1(), t2() { + t1 = clock::now(); + t2 = t1; + } + void reset() { + t1 = clock::now(); + t2 = t1; + } + uint64_t elapsed() { + std::chrono::microseconds delta = + std::chrono::duration_cast(t2 - t1); + return delta.count(); + } + uint64_t split() { + t2 = clock::now(); + return elapsed(); + } +}; + +#ifndef _WIN32 + +class CPUTimer { +public: + // clock_t t1, t2; + struct rusage t1, t2; + + CPUTimer() : t1(), t2() { + getrusage(RUSAGE_SELF, &t1); + // t1 = clock(); + t2 = t1; + } + void reset() { + getrusage(RUSAGE_SELF, &t1); + t2 = t1; + } + // proxy for userelapsed + uint64_t elapsed() { return totalelapsed(); } + + uint64_t totalelapsed() { return userelapsed() + systemelapsed(); } + // returns the *user* CPU time in micro seconds (mu s) + uint64_t userelapsed() { + return ((t2.ru_utime.tv_sec - t1.ru_utime.tv_sec) * 1000ULL * 1000ULL) + + ((t2.ru_utime.tv_usec - t1.ru_utime.tv_usec)); + } + + // returns the *system* CPU time in micro seconds (mu s) + uint64_t systemelapsed() { + return ((t2.ru_stime.tv_sec - t1.ru_stime.tv_sec) * 1000ULL * 1000ULL) + + ((t2.ru_stime.tv_usec - t1.ru_stime.tv_usec)); + } + + uint64_t split() { + getrusage(RUSAGE_SELF, &t2); + return elapsed(); + } +}; +#endif + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_TIMER_H_ */ diff --git a/include/SIMDCompressionAndIntersection/usimdbitpacking.h b/include/SIMDCompressionAndIntersection/usimdbitpacking.h new file mode 100644 index 0000000..8822bea --- /dev/null +++ b/include/SIMDCompressionAndIntersection/usimdbitpacking.h @@ -0,0 +1,150 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire + */ +#ifndef SIMDCompressionAndIntersection_USIMDBITPACKING_H_ +#define SIMDCompressionAndIntersection_USIMDBITPACKING_H_ + +#include "common.h" + +namespace SIMDCompressionLib { + +void __uSIMD_fastunpack1(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack2(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack3(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack4(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack5(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack6(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack7(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack8(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack9(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack10(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack11(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack12(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack13(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack14(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack15(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack16(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack17(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack18(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack19(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack20(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack21(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack22(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack23(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack24(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack25(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack26(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack27(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack28(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack29(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack30(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack31(const __m128i *__restrict__, uint32_t *__restrict__); +void __uSIMD_fastunpack32(const __m128i *__restrict__, uint32_t *__restrict__); + +void __uSIMD_fastpackwithoutmask0(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask1(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask2(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask3(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask4(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask5(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask6(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask7(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask8(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask9(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask10(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask11(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask12(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask13(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask14(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask15(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask16(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask17(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask18(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask19(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask20(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask21(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask22(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask23(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask24(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask25(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask26(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask27(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask28(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask29(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask30(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask31(const uint32_t *__restrict__, + __m128i *__restrict__); +void __uSIMD_fastpackwithoutmask32(const uint32_t *__restrict__, + __m128i *__restrict__); + +void __uSIMD_fastpack0(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack1(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack2(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack3(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack4(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack5(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack6(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack7(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack8(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack9(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack10(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack11(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack12(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack13(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack14(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack15(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack16(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack17(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack18(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack19(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack20(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack21(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack22(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack23(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack24(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack25(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack26(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack27(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack28(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack29(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack30(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack31(const uint32_t *__restrict__, __m128i *__restrict__); +void __uSIMD_fastpack32(const uint32_t *__restrict__, __m128i *__restrict__); + +} // namespace SIMDCompressionLib + +#endif /* SIMDBITPACKING_H_ */ diff --git a/include/SIMDCompressionAndIntersection/util.h b/include/SIMDCompressionAndIntersection/util.h new file mode 100644 index 0000000..f94b7dd --- /dev/null +++ b/include/SIMDCompressionAndIntersection/util.h @@ -0,0 +1,126 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire + */ + +#ifndef SIMDCompressionAndIntersection_UTIL_H_ +#define SIMDCompressionAndIntersection_UTIL_H_ + +#include "common.h" + +namespace SIMDCompressionLib { + +inline uint32_t random(int b) { + if (b == 32) + return rand(); + return rand() % (1U << b); +} + +// taken from stackoverflow +#ifndef NDEBUG +#define ASSERT(condition, message) \ + do { \ + if (!(condition)) { \ + std::cerr << "Assertion `" #condition "` failed in " << __FILE__ \ + << " line " << __LINE__ << ": " << message << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ + } while (false) +#else +#define ASSERT(condition, message) \ + do { \ + } while (false) +#endif + +CONST_FUNCTION +inline uint32_t gccbits(const uint32_t v) { + return v == 0 ? 0 : 32 - __builtin_clz(v); +} + +/** + * Treats __m128i as 4 x 32-bit integers and asks for the max + * number of bits used (integer logarithm). + */ +inline uint32_t maxbitas32int(const __m128i accumulator) { + SIMDCOMP_ALIGNED(16) uint32_t tmparray[4]; + MM_STORE_SI_128(reinterpret_cast<__m128i *>(tmparray), accumulator); + return gccbits(tmparray[0] | tmparray[1] | tmparray[2] | tmparray[3]); +} + +static CONST_FUNCTION bool divisibleby(size_t a, uint32_t x) { + return (a % x == 0); +} + +#ifdef __GNUC__ +__attribute__((unused)) +#endif +static void +checkifdivisibleby(size_t a, uint32_t x) { + if (!divisibleby(a, x)) { + std::ostringstream convert; + convert << a << " not divisible by " << x; + throw std::logic_error(convert.str()); + } +} + +template +PURE_FUNCTION uint32_t maxbits(const iterator &begin, const iterator &end) { + uint32_t accumulator = 0; + for (iterator k = begin; k != end; ++k) { + accumulator |= *k; + } + return gccbits(accumulator); +} + +template +CONST_FUNCTION inline bool needPaddingTo128Bits(const T *inbyte) { + return (reinterpret_cast(inbyte) & 15) != 0; +} + +template +CONST_FUNCTION inline bool needPaddingTo32Bits(const T *inbyte) { + return (reinterpret_cast(inbyte) & 3) != 0; +} + +template CONST_FUNCTION T *padTo32bits(T *inbyte) { + return reinterpret_cast((reinterpret_cast(inbyte) + 3) & ~3); +} + +template CONST_FUNCTION const T *padTo32bits(const T *inbyte) { + return reinterpret_cast((reinterpret_cast(inbyte) + 3) & + ~3); +} + +#ifndef _MSC_VER +CONST_FUNCTION +inline uint32_t asmbits(const uint32_t v) { + if (v == 0) + return 0; + uint32_t answer; + __asm__("bsr %1, %0;" : "=r"(answer) : "r"(v)); + return answer + 1; +} +#else +inline uint32_t asmbits(const uint32_t v) { + unsigned long index; + return (v == 0 || _BitScanReverse(&index, v) == 0) ? 0 : (index + 1); +} +#endif + +template +bool is_strictlysorted(iterator first, iterator last) { + iterator next = first; + ++next; + while (next < last) { + if (*first >= *next) + return false; + ++first; + ++next; + } + return true; +} +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_UTIL_H_ */ diff --git a/include/SIMDCompressionAndIntersection/variablebyte.h b/include/SIMDCompressionAndIntersection/variablebyte.h new file mode 100644 index 0000000..454000d --- /dev/null +++ b/include/SIMDCompressionAndIntersection/variablebyte.h @@ -0,0 +1,1328 @@ +/** + * This code is released under the + * Apache License Version 2.0 http://www.apache.org/licenses/. + * + * (c) Daniel Lemire, http://lemire.me/en/ + */ + +#ifndef SIMDCompressionAndIntersection_VARIABLEBYTE_H_ +#define SIMDCompressionAndIntersection_VARIABLEBYTE_H_ +#include "common.h" +#include "codecs.h" +#include "util.h" + +namespace SIMDCompressionLib { + +/*** + * VariableByte and VByte are basically identical, except that + * one uses 0..0..0..1 to indicate 4 whereas the other one uses 1..1..1..0. + * The latter is maybe more common. + */ + +template class VariableByte : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + uint8_t *bout = reinterpret_cast(out); + const uint8_t *const initbout = reinterpret_cast(out); + size_t bytenvalue = nvalue * sizeof(uint32_t); + encodeToByteArray(in, length, bout, bytenvalue); + bout += bytenvalue; + while (needPaddingTo32Bits(bout)) { + *bout++ = 0; + } + const size_t storageinbytes = bout - initbout; + assert((storageinbytes % 4) == 0); + nvalue = storageinbytes / 4; + } + + // write one compressed integer (without differential coding) + // returns the number of bytes written + size_t encodeOneIntegerToByteArray(uint32_t val, uint8_t *bout) { + const uint8_t *const initbout = bout; + if (val < (1U << 7)) { + *bout = static_cast(val | (1U << 7)); + ++bout; + } else if (val < (1U << 14)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bitsmaskless<1>(val) | (1U << 7); + ++bout; + } else if (val < (1U << 21)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bitsmaskless<2>(val) | (1U << 7); + ++bout; + } else if (val < (1U << 28)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bits<2>(val); + ++bout; + *bout = extract7bitsmaskless<3>(val) | (1U << 7); + ++bout; + } else { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bits<2>(val); + ++bout; + *bout = extract7bits<3>(val); + ++bout; + *bout = extract7bitsmaskless<4>(val) | (1U << 7); + ++bout; + } + return bout - initbout; + } + + void encodeToByteArray(uint32_t *in, const size_t length, uint8_t *bout, + size_t &nvalue) { + const uint8_t *const initbout = bout; + uint32_t prev = 0; + for (size_t k = 0; k < length; ++k) { + const uint32_t val = delta ? in[k] - prev : in[k]; + if (delta) + prev = in[k]; + /** + * Code below could be shorter. Whether it could be faster + * depends on your compiler and machine. + */ + if (val < (1U << 7)) { + *bout = static_cast(val | (1U << 7)); + ++bout; + } else if (val < (1U << 14)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bitsmaskless<1>(val) | (1U << 7); + ++bout; + } else if (val < (1U << 21)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bitsmaskless<2>(val) | (1U << 7); + ++bout; + } else if (val < (1U << 28)) { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bits<2>(val); + ++bout; + *bout = extract7bitsmaskless<3>(val) | (1U << 7); + ++bout; + } else { + *bout = extract7bits<0>(val); + ++bout; + *bout = extract7bits<1>(val); + ++bout; + *bout = extract7bits<2>(val); + ++bout; + *bout = extract7bits<3>(val); + ++bout; + *bout = extract7bitsmaskless<4>(val) | (1U << 7); + ++bout; + } + } + nvalue = bout - initbout; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + decodeFromByteArray((const uint8_t *)in, length * sizeof(uint32_t), out, + nvalue); + return in + length; + } + + // determine how many padding bytes were used + int paddingBytes(const uint32_t *in, const size_t length) { + if (length == 0) + return 0; + uint32_t lastword = in[length - 1]; + if (lastword < (1U << 8)) { + return 3; + } else if (lastword < (1U << 16)) { + return 2; + } else if (lastword < (1U << 24)) { + return 1; + } + return 0; + } + + // how many bytes are required to store this integer? + int storageCost(uint32_t val) { + if (val < (1U << 7)) { + return 1; + } else if (val < (1U << 14)) { + return 2; + } else if (val < (1U << 21)) { + return 3; + } else if (val < (1U << 28)) { + return 4; + } else { + return 5; + } + } + + const uint8_t *decodeFromByteArray(const uint8_t *inbyte, const size_t length, + uint32_t *out, size_t &nvalue) { + uint32_t prev = 0; + if (length == 0) { + nvalue = 0; + return inbyte; // abort + } + const uint8_t *const endbyte = inbyte + length; + const uint32_t *const initout(out); + // this assumes that there is a value to be read + + while (endbyte > inbyte + 5) { + if (delta) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c >= 128) { + inbyte += 1; + *out++ = (prev = v + prev); + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c >= 128) { + inbyte += 2; + *out++ = (prev = v + prev); + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c >= 128) { + inbyte += 3; + *out++ = (prev = v + prev); + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c >= 128) { + inbyte += 4; + *out++ = (prev = v + prev); + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + *out++ = (prev = v + prev); + } else { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c >= 128) { + inbyte += 1; + *out++ = v; + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c >= 128) { + inbyte += 2; + *out++ = v; + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c >= 128) { + inbyte += 3; + *out++ = v; + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c >= 128) { + inbyte += 4; + *out++ = v; + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + *out++ = v; + } + } + while (endbyte > inbyte) { + unsigned int shift = 0; + for (uint32_t v = 0; endbyte > inbyte; shift += 7) { + uint8_t c = *inbyte++; + v += ((c & 127) << shift); + if ((c & 128)) { + *out++ = delta ? (prev = v + prev) : v; + break; + } + } + } + nvalue = out - initout; + return inbyte; + } + + // Performs a lower bound find in the encoded array. + // length is the size of the compressed input + // Returns the index + size_t findLowerBound(const uint32_t *in, const size_t length, uint32_t key, + uint32_t *presult) { + uint32_t prev = 0; + if (length == 0) { + return 0; // abort + } + const uint8_t *inbyte = reinterpret_cast(in); + const uint8_t *const endbyte = + reinterpret_cast(in + length); + size_t i = 0; + // this assumes that there is a value to be read + + while (endbyte > inbyte + 5) { + if (delta) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c >= 128) { + inbyte += 1; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c >= 128) { + inbyte += 2; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c >= 128) { + inbyte += 3; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c >= 128) { + inbyte += 4; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + } else { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c >= 128) { + inbyte += 1; + if (v >= key) { + *presult = v; + return i; + } + i++; + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c >= 128) { + inbyte += 2; + if (v >= key) { + *presult = v; + return i; + } + i++; + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c >= 128) { + inbyte += 3; + if (v >= key) { + *presult = v; + return i; + } + i++; + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c >= 128) { + inbyte += 4; + if (v >= key) { + *presult = v; + return i; + } + i++; + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + if (v >= key) { + *presult = v; + return i; + } + i++; + } + } + while (endbyte > inbyte) { + unsigned int shift = 0; + for (uint32_t v = 0; endbyte > inbyte; shift += 7) { + uint8_t c = *inbyte++; + v += ((c & 127) << shift); + if ((c & 128)) { + if (delta) { + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + } else { + if (v >= key) { + *presult = v; + return i; + } + } + i++; + break; + } + } + } + return i; + } + + // append a key. Keys must be in sorted order. We assume that there is + // enough room and that delta encoding was used. + // Returns the new size of the compressed array *in bytes* + size_t appendToByteArray(uint8_t *in, const size_t bytesize, + uint32_t previous_key, uint32_t key) { + assert(delta); // no performance impact expected. + uint8_t *byteininit = (uint8_t *)in; + uint8_t *bytein = (uint8_t *)in + bytesize; + bytein += encodeOneIntegerToByteArray(key - previous_key, bytein); + return bytein - byteininit; + } + + // insert the key in sorted order. We assume that there is enough room and + // that delta encoding was used. + size_t insert(uint32_t *in, const size_t length, uint32_t key) { + size_t bytesize = length * 4; + bytesize -= paddingBytes(in, length); + uint8_t *bytein = (uint8_t *)in; + uint8_t *byteininit = bytein; + size_t bl = insertInByteArray(bytein, bytesize, key); + bytein += bl; + + while (needPaddingTo32Bits(bytein)) { + *bytein++ = 0; + } + size_t storageinbytes = bytein - byteininit; + assert((storageinbytes % 4) == 0); + return storageinbytes / 4; + } + + // insert the key in sorted order. We assume that there is enough room and + // that delta encoding was used. + // the new size is returned + size_t insertInByteArray(uint8_t *inbyte, const size_t length, uint32_t key) { + uint32_t prev = 0; + assert(delta); + const uint8_t *const endbyte = + reinterpret_cast(inbyte + length); + // this assumes that there is a value to be read + + while (endbyte > inbyte + 5) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c >= 128) { + inbyte += 1; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c >= 128) { + inbyte += 2; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c >= 128) { + inbyte += 3; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c >= 128) { + inbyte += 4; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + prev = v + prev; + if (prev >= key) { + return length + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + } + while (endbyte > inbyte) { + unsigned int shift = 0; + for (uint32_t v = 0; endbyte > inbyte; shift += 7) { + uint8_t c = *inbyte++; + v += ((c & 127) << shift); + if ((c & 128)) { + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + break; + } + } + } + // if we make it here, then we need to append + assert(key >= prev); + return length + encodeOneIntegerToByteArray(key - prev, inbyte); + } + + // Returns a decompressed value in an encoded array + // could be greatly optimized in the non-differential coding case: currently + // just for delta coding + uint32_t select(uint32_t *in, size_t index) { + assert(delta); + uint32_t prev = 0; + size_t i = 0; + const uint8_t *inbyte = reinterpret_cast(in); + while (i <= index) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c >= 128) { + inbyte += 1; + prev = v + prev; + i++; + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c >= 128) { + inbyte += 2; + prev = v + prev; + i++; + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c >= 128) { + inbyte += 3; + prev = v + prev; + i++; + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c >= 128) { + inbyte += 4; + prev = v + prev; + i++; + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + prev = v + prev; + i++; + } + assert(i == index + 1); + return prev; + } + + string name() const { + if (delta) + return "VariableByteDelta"; + else + return "VariableByte"; + } + +private: + // convenience function used by insert, writes key and newvalue to compressed + // stream, and return + // extra storage used, pointer should be right after where nextvalue is right + // now + size_t __insert(uint8_t *in, uint32_t previous, uint32_t key, + uint32_t nextvalue, size_t followingbytes) { + assert(nextvalue >= key); + assert(key >= previous); + size_t oldstorage = storageCost(nextvalue - previous); + size_t newstorage = + storageCost(nextvalue - key) + storageCost(key - previous); + assert(newstorage >= oldstorage); + if (newstorage > oldstorage) + std::memmove(in + newstorage - oldstorage, in, followingbytes); + uint8_t *newin = in - oldstorage; + newin += encodeOneIntegerToByteArray(key - previous, newin); + newin += encodeOneIntegerToByteArray(nextvalue - key, newin); + assert(newin == in + newstorage - oldstorage); + return newstorage - oldstorage; + } + + template uint8_t extract7bits(const uint32_t val) { + return static_cast((val >> (7 * i)) & ((1U << 7) - 1)); + } + + template uint8_t extract7bitsmaskless(const uint32_t val) { + return static_cast((val >> (7 * i))); + } +}; + +template class VByte : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + uint8_t *bout = reinterpret_cast(out); + const uint8_t *const initbout = reinterpret_cast(out); + size_t bytenvalue = nvalue * sizeof(uint32_t); + encodeToByteArray(in, length, bout, bytenvalue); + bout += bytenvalue; + while (needPaddingTo32Bits(bout)) { + *bout++ = 0xFF; + } + const size_t storageinbytes = bout - initbout; + assert((storageinbytes % 4) == 0); + nvalue = storageinbytes / 4; + } + + void encodeToByteArray(uint32_t *in, const size_t length, uint8_t *bout, + size_t &nvalue) { + uint32_t prev = 0; + const uint8_t *const initbout = bout; + for (size_t k = 0; k < length; ++k) { + const uint32_t val = delta ? (in[k] - prev) : in[k]; + if (delta) + prev = in[k]; + /** + * Code below could be shorter. Whether it could be faster + * depends on your compiler and machine. + */ + if (val < (1U << 7)) { + *bout = val & 0x7F; + ++bout; + } else if (val < (1U << 14)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 7); + ++bout; + } else if (val < (1U << 21)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 14); + ++bout; + } else if (val < (1U << 28)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 21); + ++bout; + } else { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 21) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 28); + ++bout; + } + } + nvalue = bout - initbout; + } + + // write one compressed integer (without differential coding) + // returns the number of bytes written + size_t encodeOneIntegerToByteArray(uint32_t val, uint8_t *bout) { + const uint8_t *const initbout = bout; + if (val < (1U << 7)) { + *bout = val & 0x7F; + ++bout; + } else if (val < (1U << 14)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 7); + ++bout; + } else if (val < (1U << 21)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 14); + ++bout; + } else if (val < (1U << 28)) { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 21); + ++bout; + } else { + *bout = static_cast((val & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 7) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 14) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(((val >> 21) & 0x7F) | (1U << 7)); + ++bout; + *bout = static_cast(val >> 28); + ++bout; + } + return bout - initbout; + } + + // determine how many padding bytes were used + int paddingBytes(const uint32_t *in, const size_t length) { + if (length == 0) + return 0; + uint32_t lastword = in[length - 1]; + lastword = ~lastword; + if (lastword < (1U << 8)) { + return 3; + } else if (lastword < (1U << 16)) { + return 2; + } else if (lastword < (1U << 24)) { + return 1; + } + return 0; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + decodeFromByteArray((const uint8_t *)in, length * sizeof(uint32_t), out, + nvalue); + return in + length; + } + + // how many bytes are required to store this integer? + int storageCost(uint32_t val) { + if (val < (1U << 7)) { + return 1; + } else if (val < (1U << 14)) { + return 2; + } else if (val < (1U << 21)) { + return 3; + } else if (val < (1U << 28)) { + return 4; + } else { + return 5; + } + } + + const uint8_t *decodeFromByteArray(const uint8_t *inbyte, const size_t length, + uint32_t *out, size_t &nvalue) { + uint32_t prev = 0; + if (length == 0) { + nvalue = 0; + return inbyte; // abort + } + const uint8_t *const endbyte = inbyte + length; + const uint32_t *const initout(out); + // this assumes that there is a value to be read + + while (endbyte > inbyte + 5) { + if (delta) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c < 128) { + inbyte += 1; + *out++ = (prev = v + prev); + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c < 128) { + inbyte += 2; + *out++ = (prev = v + prev); + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c < 128) { + inbyte += 3; + *out++ = (prev = v + prev); + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c < 128) { + inbyte += 4; + *out++ = (prev = v + prev); + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + *out++ = (prev = v + prev); + } else { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c < 128) { + inbyte += 1; + *out++ = v; + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c < 128) { + inbyte += 2; + *out++ = v; + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c < 128) { + inbyte += 3; + *out++ = v; + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c < 128) { + inbyte += 4; + *out++ = v; + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + *out++ = v; + } + } + while (endbyte > inbyte) { + unsigned int shift = 0; + for (uint32_t v = 0; endbyte > inbyte; shift += 7) { + uint8_t c = *inbyte++; + v += ((c & 127) << shift); + if ((c < 128)) { + *out++ = delta ? (prev = v + prev) : v; + break; + } + } + } + nvalue = out - initout; + return inbyte; + } + + // Performs a lower bound find in the encoded array. + // length is the size of the compressed input + // Returns the index and the value found (presult) + size_t findLowerBound(const uint32_t *in, const size_t length, uint32_t key, + uint32_t *presult) { + uint32_t prev = 0; + if (length == 0) { + return 0; // abort + } + size_t i = 0; + const uint8_t *inbyte = reinterpret_cast(in); + const uint8_t *const endbyte = + reinterpret_cast(in + length); + // this assumes that there is a value to be read + + while (endbyte > inbyte + 5) { + if (delta) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c < 128) { + inbyte += 1; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c < 128) { + inbyte += 2; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c < 128) { + inbyte += 3; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c < 128) { + inbyte += 4; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + i++; + } else { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c < 128) { + inbyte += 1; + if (v >= key) { + *presult = v; + return i; + } + i++; + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c < 128) { + inbyte += 2; + if (v >= key) { + *presult = v; + return i; + } + i++; + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c < 128) { + inbyte += 3; + if (v >= key) { + *presult = v; + return i; + } + i++; + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c < 128) { + inbyte += 4; + if (v >= key) { + *presult = v; + return i; + } + i++; + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + if (v >= key) { + *presult = v; + return i; + } + i++; + } + } + while (endbyte > inbyte) { + unsigned int shift = 0; + for (uint32_t v = 0; endbyte > inbyte; shift += 7) { + uint8_t c = *inbyte++; + v += ((c & 127) << shift); + if ((c < 128)) { + if (delta) { + prev = v + prev; + if (prev >= key) { + *presult = prev; + return i; + } + } else { + if (v >= key) { + *presult = v; + return i; + } + } + i++; + break; + } + } + } + return i; + } + + // append a key. Keys must be in sorted order. We assume that there is + // enough room and that delta encoding was used. + /*size_t append(uint32_t *in, const size_t length, uint32_t previous_key, + uint32_t key) { + size_t bytesize = (length * 4) - paddingBytes(in, length); + uint8_t *byteininit = (uint8_t *)in; + uint8_t *bytein = (uint8_t *)in + bytesize; + bytein += encodeOneIntegerToByteArray(key - previous_key, bytein); + while (needPaddingTo32Bits(bytein)) { + *bytein++ = 0xFF; + } + size_t storageinbytes = bytein - byteininit; + assert((storageinbytes % 4) == 0); + return storageinbytes / 4; + }*/ + + // append a key. Keys must be in sorted order. We assume that there is + // enough room and that delta encoding was used. + // Returns the new size of the compressed array *in bytes* + size_t appendToByteArray(uint8_t *in, const size_t bytesize, + uint32_t previous_key, uint32_t key) { + assert(delta); // no performance impact expected. + uint8_t *byteininit = (uint8_t *)in; + uint8_t *bytein = (uint8_t *)in + bytesize; + bytein += encodeOneIntegerToByteArray(key - previous_key, bytein); + return bytein - byteininit; + } + + // insert the key in sorted order. We assume that there is enough room + // and that delta encoding was used. + size_t insert(uint32_t *in, const size_t length, uint32_t key) { + assert(delta); + size_t bytesize = length * 4; + bytesize -= paddingBytes(in, length); + uint8_t *bytein = (uint8_t *)in; + uint8_t *byteininit = bytein; + bytein += insertInByteArray(bytein, bytesize, key); + + while (needPaddingTo32Bits(bytein)) { + *bytein++ = 0xFF; + } + size_t storageinbytes = bytein - byteininit; + assert((storageinbytes % 4) == 0); + return storageinbytes / 4; + } + + // insert the key in sorted order. We assume that there is enough room and + // that delta encoding was used. + // the new size (in *byte) is returned + size_t insertInByteArray(uint8_t *inbyte, const size_t length, uint32_t key) { + uint32_t prev = 0; + assert(delta); + const uint8_t *const endbyte = + reinterpret_cast(inbyte + length); + // this assumes that there is a value to be read + + while (endbyte > inbyte + 5) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c < 128) { + inbyte += 1; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c < 128) { + inbyte += 2; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c < 128) { + inbyte += 3; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c < 128) { + inbyte += 4; + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + prev = v + prev; + if (prev >= key) { + return length + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + } + while (endbyte > inbyte) { + unsigned int shift = 0; + for (uint32_t v = 0; endbyte > inbyte; shift += 7) { + uint8_t c = *inbyte++; + v += ((c & 127) << shift); + if ((c < 128)) { + prev = v + prev; + if (prev >= key) { + return length + + __insert(inbyte, prev - v, key, prev, endbyte - inbyte); + } + break; + } + } + } + // if we make it here, then we need to append + assert(key >= prev); + return length + encodeOneIntegerToByteArray(key - prev, inbyte); + } + + // Returns a decompressed value in an encoded array + // could be greatly optimized in the non-differential coding case: currently + // just for delta coding + uint32_t select(uint32_t *in, size_t index) { + assert(delta); + uint32_t prev = 0; + size_t i = 0; + const uint8_t *inbyte = reinterpret_cast(in); + + while (i <= index) { + uint8_t c; + uint32_t v; + + c = inbyte[0]; + v = c & 0x7F; + if (c < 128) { + inbyte += 1; + prev = v + prev; + ++i; + continue; + } + + c = inbyte[1]; + v |= (c & 0x7F) << 7; + if (c < 128) { + inbyte += 2; + prev = v + prev; + ++i; + continue; + } + + c = inbyte[2]; + v |= (c & 0x7F) << 14; + if (c < 128) { + inbyte += 3; + prev = v + prev; + ++i; + continue; + } + + c = inbyte[3]; + v |= (c & 0x7F) << 21; + if (c < 128) { + inbyte += 4; + prev = v + prev; + ++i; + continue; + } + + c = inbyte[4]; + inbyte += 5; + v |= (c & 0x0F) << 28; + prev = v + prev; + ++i; + } + assert(i == index + 1); + return prev; + } + + std::string name() const { + if (delta) + return "VByteDelta"; + else + return "VByte"; + } + +private: + // convenience function used by insert, writes key and newvalue to compressed + // stream, and return + // extra storage used, pointer should be right after where nextvalue is right + // now + size_t __insert(uint8_t *in, uint32_t previous, uint32_t key, + uint32_t nextvalue, size_t followingbytes) { + assert(nextvalue >= key); + assert(key >= previous); + size_t oldstorage = storageCost(nextvalue - previous); + size_t newstorage = + storageCost(nextvalue - key) + storageCost(key - previous); + assert(newstorage >= oldstorage); + if (newstorage > oldstorage) + std::memmove(in + newstorage - oldstorage, in, followingbytes); + uint8_t *newin = in - oldstorage; + newin += encodeOneIntegerToByteArray(key - previous, newin); + newin += encodeOneIntegerToByteArray(nextvalue - key, newin); + assert(newin == in + newstorage - oldstorage); + return newstorage - oldstorage; + } + + template uint8_t extract7bits(const uint32_t val) { + return static_cast((val >> (7 * i)) & ((1U << 7) - 1)); + } + + template uint8_t extract7bitsmaskless(const uint32_t val) { + return static_cast((val >> (7 * i))); + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_VARIABLEBYTE_H_ */ diff --git a/include/SIMDCompressionAndIntersection/varintgb.h b/include/SIMDCompressionAndIntersection/varintgb.h new file mode 100644 index 0000000..769b75b --- /dev/null +++ b/include/SIMDCompressionAndIntersection/varintgb.h @@ -0,0 +1,929 @@ +/* + * varintgb.h + * + * Created on: Jul 25, 2013 + * Author: lemire + */ + +#ifndef SIMDCompressionAndIntersection_VARINTGB_H_ +#define SIMDCompressionAndIntersection_VARINTGB_H_ + +#include "common.h" +#include "codecs.h" +#include "variablebyte.h" + +namespace SIMDCompressionLib { + +using namespace std; + +static uint8_t group_size[] = { + 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 5, 6, 7, + 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, 8, 9, 7, 8, + 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, + 10, 11, 12, 10, 11, 12, 13, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, + 8, 9, 10, 11, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, + 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, + 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, + 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, + 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, + 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, + 14, 15, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, + 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, + 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, + 14, 12, 13, 14, 15, 13, 14, 15, 16}; + +/** + * Group VarInt. + * + * Implemented and designed by D. Lemire based on a talk by Jeff Dean (Google), + * optimized by N. Kurz. + */ + +template class VarIntGB : public IntegerCODEC { +public: + void encodeArray(uint32_t *in, const size_t length, uint32_t *out, + size_t &nvalue) { + uint8_t *bout = reinterpret_cast(out); + const uint8_t *const initbout = reinterpret_cast(out); + *out = static_cast(length); + bout += 4; + bout = headlessEncode(in, length, 0, bout); + + while (needPaddingTo32Bits(bout)) { + *bout++ = 0; + } + const size_t storageinbytes = bout - initbout; + assert((storageinbytes % 4) == 0); + nvalue = storageinbytes / 4; + } + + void encodeToByteArray(uint32_t *in, const size_t length, uint8_t *bout, + size_t &nvalue) { + const uint8_t *const initbout = bout; + *(uint32_t *)bout = static_cast(length); + bout += 4; + bout = headlessEncode(in, length, 0, bout); + nvalue = bout - initbout; + } + + const uint32_t *decodeArray(const uint32_t *in, const size_t length, + uint32_t *out, size_t &nvalue) { + if (length == 0) { + nvalue = 0; + return in; + } + const uint8_t *inbyte = reinterpret_cast(in); + nvalue = *in; + inbyte += 4; + size_t decoded = + headlessDecode(inbyte, (length - 1) * sizeof(uint32_t), 0, out, nvalue); + assert(decoded == nvalue); + return in + length; + } + + const uint8_t *decodeFromByteArray(const uint8_t *inbyte, const size_t length, + uint32_t *out, size_t &nvalue) { + if (length == 0) { + nvalue = 0; + return inbyte; + } + nvalue = *(uint32_t *)inbyte; + inbyte += 4; + size_t decoded = headlessDecode(inbyte, length - 4, 0, out, nvalue); + assert(decoded == nvalue); + return inbyte + length; + } + + // appends a key + // return the new size in bytes + size_t appendToByteArray(uint8_t *in, const size_t length, uint32_t previous, + uint32_t value) { + uint32_t num_ints = *(uint32_t *)in; + uint8_t *bout = reinterpret_cast(in + 4); + uint8_t *bend = in + (length == 0 ? 4 : length); + + if (delta) + value -= previous; + + uint8_t *keyp; + int shift; + + // fast-forward to the last block + if (num_ints % 4 != 0) { + uint32_t size = 0; + do { + bout += size; + size = 1 + group_size[*bout]; + } while (bout + size < bend); + keyp = bout; + bout = bend; + shift = (num_ints % 4) * 2; + } else { + keyp = bend; + bout = keyp + 1; + *keyp = 0; + shift = 0; + } + + if (value < (1U << 8)) { + *bout++ = static_cast(value); + } else if (value < (1U << 16)) { + *bout++ = static_cast(value); + *bout++ = static_cast(value >> 8); + *keyp |= (1 << shift); + } else if (value < (1U << 24)) { + *bout++ = static_cast(value); + *bout++ = static_cast(value >> 8); + *bout++ = static_cast(value >> 16); + *keyp |= (2 << shift); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = value; + bout += 4; + *keyp |= (3 << shift); + } + + *(uint32_t *)in = num_ints + 1; + return bout - in; + } + + // insert the key in sorted order. We assume that there is enough room and + // that delta encoding was used. + size_t insert(uint32_t *in, const size_t length, uint32_t key) { + size_t bytesize = length * 4; + uint8_t *bytein = (uint8_t *)in; + uint8_t *byteininit = bytein; + size_t bl = insertInByteArray(bytein, bytesize, key); + bytein += bl; + + while (needPaddingTo32Bits(bytein)) { + *bytein++ = 0; + } + size_t storageinbytes = bytein - byteininit; + assert((storageinbytes % 4) == 0); + return storageinbytes / 4; + } + + // insert the key in sorted order. We assume that there is enough room and + // that delta encoding was used. + // the new size is returned (in bytes) + size_t insertInByteArray(uint8_t *inbyte, size_t length, uint32_t key) { + if (length == 0) { + *((uint32_t *)inbyte) = 0; + length = 4; + } + uint8_t *finalinbyte = inbyte + length; + const uint8_t *const initinbyte = inbyte; + uint32_t nvalue = *((uint32_t *)inbyte); + *((uint32_t *)inbyte) = nvalue + 1; // incrementing + inbyte += 4; // skip nvalue + assert(delta); + uint32_t initial = 0; + size_t i = 0; + while (i + 3 < nvalue) { + uint32_t copyinitial = initial; + const uint8_t *const newinbyte = + scanGroupVarIntDelta(inbyte, ©initial); + if (copyinitial >= key) { + goto finish; + } + inbyte = (uint8_t *)newinbyte; + initial = copyinitial; + i += 4; + } + finish: + assert(finalinbyte >= inbyte); + assert(i <= nvalue); + static const int REASONABLEBUFFER = 256; + if (nvalue - i + 1 > REASONABLEBUFFER) { + if (nvalue == i) { // straight append + const uint8_t *const newfinalinbyte = + headlessEncode(&key, 1, initial, inbyte); + return newfinalinbyte - initinbyte; + } + if (nvalue - i <= 4) { + // easy case + uint32_t tmpbuffer[5]; + tmpbuffer[0] = key; + size_t decoded = headlessDecode(inbyte, finalinbyte - inbyte, initial, + tmpbuffer + 1, nvalue - i); + assert(decoded == nvalue - i); + sortinfirstvalue(tmpbuffer, nvalue - i); + const uint8_t *const newfinalinbyte = + headlessEncode(tmpbuffer, nvalue - i + 1, initial, inbyte); + return newfinalinbyte - initinbyte; + } + // harder case + // this part is a bit complicated since we need to merge in the key + uint32_t readinitial = initial; + uint32_t tmpbuffer[5]; + tmpbuffer[0] = key; + const uint8_t *readinginbyte = + decodeGroupVarIntDelta(inbyte, &readinitial, tmpbuffer + 1); + assert(tmpbuffer[4] >= key); + assert(readinginbyte > inbyte); + + sortinfirstvalue(tmpbuffer, nvalue - i); + i += 4; + + // initialize blocks + + Block b1, b2; + + Block *block1 = &b1; + Block *block2 = &b2; + Block *blocktmp; + + // load block1 + + uint8_t *fb = encodeGroupVarIntDelta(block1->data, initial, tmpbuffer); + + block1->length = static_cast(fb - block1->data); + uint32_t nextval = tmpbuffer[4] - tmpbuffer[3]; + uint32_t newsel = getByteLength(nextval) - 1; + // everything after that is just going to be shifting + while (nvalue - i >= 4) { + + // load block 2 + assert(readinginbyte >= inbyte); + readinginbyte = loadblock(block2, readinginbyte); + i += 4; + // shift in block 1 + shiftin(block2, &nextval, &newsel); + // write block1 + memcpy(inbyte, block1->data, block1->length); + inbyte += block1->length; + // block1 = block2 + blocktmp = block1; + block1 = block2; + block2 = blocktmp; + } + if (nvalue != i) { + readinginbyte = loadblockcarefully(block2, readinginbyte, nvalue - i); + finalshiftin(block2, nextval, newsel, + nvalue - i + 1); // nextval is useless here + memcpy(inbyte, block1->data, block1->length); + inbyte += block1->length; + memcpy(inbyte, block2->data, block2->length); + inbyte += block2->length; + return inbyte - initinbyte; + } else { + memcpy(inbyte, block1->data, block1->length); + inbyte += block1->length; + inbyte[0] = newsel; + inbyte++; + memcpy(inbyte, &nextval, newsel + 1); + inbyte += newsel + 1; + return inbyte - initinbyte; + } + // we are using brute force here, by decoding everything to a buffer and + // then reencoding. + } else { + uint32_t tmpbuffer[REASONABLEBUFFER]; + assert(tmpbuffer); + tmpbuffer[0] = key; + if (nvalue != i) { + size_t decoded = headlessDecode(inbyte, finalinbyte - inbyte, initial, + tmpbuffer + 1, nvalue - i); + assert(decoded == nvalue - i); + sortinfirstvalue(tmpbuffer, nvalue - i); + } + const uint8_t *const newfinalinbyte = + headlessEncode(tmpbuffer, nvalue - i + 1, initial, inbyte); + return newfinalinbyte - initinbyte; + } + } + + // Performs a lower bound find in the encoded array. + // Returns the index + // assumes delta coding was used + size_t findLowerBound(const uint32_t *in, const size_t length, uint32_t key, + uint32_t *presult) { + const uint8_t *inbyte = reinterpret_cast(in); + uint32_t out[4] = {0}; + assert(delta); + size_t i = 0; + uint32_t initial = 0; + uint32_t nvalue = *in; + + inbyte += 4; // skip nvalue + + const uint8_t *const endbyte = + reinterpret_cast(in + length); + while (i + 3 < nvalue) { + uint32_t gap1, gap2, gap3, gap4; + uint32_t gap12, gap34; + + const uint32_t sel = *inbyte++; + if (sel == 0) { + gap1 = static_cast(inbyte[0]); + gap2 = static_cast(inbyte[1]); + gap12 = gap1 + gap2; + gap3 = static_cast(inbyte[2]); + gap4 = static_cast(inbyte[3]); + gap34 = gap3 + gap4; + inbyte += 4; + } else { + const uint32_t sel1 = (sel & 3); + gap1 = *(reinterpret_cast(inbyte)) & mask[sel1]; + inbyte += sel1 + 1; + const uint32_t sel2 = ((sel >> 2) & 3); + gap2 = *(reinterpret_cast(inbyte)) & mask[sel2]; + gap12 = gap1 + gap2; + inbyte += sel2 + 1; + const uint32_t sel3 = ((sel >> 4) & 3); + gap3 = *(reinterpret_cast(inbyte)) & mask[sel3]; + inbyte += sel3 + 1; + const uint32_t sel4 = (sel >> 6); + gap4 = *(reinterpret_cast(inbyte)) & mask[sel4]; + gap34 = gap3 + gap4; + inbyte += sel4 + 1; + } + initial += gap12 + gap34; + if (key <= initial) { + if (key <= initial - gap34 - gap2) { + *presult = initial - gap34 - gap2; + return (i + 0); + } + if (key <= initial - gap34) { + *presult = initial - gap34; + return (i + 1); + } + if (key <= initial - gap4) { + *presult = initial - gap4; + return (i + 2); + } + *presult = initial; + return (i + 3); + } + i += 4; + } + if (endbyte > inbyte && nvalue > i) { + uint32_t tnvalue = static_cast(nvalue - 1 - i); + inbyte = decodeCarefully(inbyte, &initial, out, tnvalue); + assert(inbyte <= endbyte); + if (key <= out[0]) { + *presult = out[0]; + return (i + 0); + } + if (tnvalue > 0 && key <= out[1]) { + *presult = out[1]; + return (i + 1); + } + if (tnvalue > 1 && key <= out[2]) { + *presult = out[2]; + return (i + 2); + } + if (tnvalue > 2 && key <= out[3]) { + *presult = out[3]; + return (i + 3); + } + } + assert(false); + *presult = key + 1; + return (i); + } + + // Returns a decompressed value in an encoded array + // This code has been optimized for delta-encoded arrays (TODO: optimize for + // the regular case). + uint32_t select(uint32_t *in, size_t index) { + const uint8_t *inbyte = reinterpret_cast(in); + uint32_t out[4]; + out[0] = 0; + out[1] = 0; + out[2] = 0; + out[3] = 0; + size_t i = 0; + uint32_t initial = 0; + uint32_t nvalue = *in; + inbyte += 4; // skip nvalue + if (index + 3 < + nvalue) { // this common case can be done with fewer branches + while (i + 4 <= index) { + inbyte = delta ? scanGroupVarIntDelta(inbyte, &initial) + : scanGroupVarInt(inbyte); // note: delta known at + // compile time: this is not a + // branch + i += 4; + } + inbyte = delta ? decodeGroupVarIntDelta(inbyte, &initial, out) + : decodeGroupVarInt(inbyte, out); // note: delta known at + // compile time: this is + // not a branch + return (out[index - i]); + } // else + // we finish with the uncommon case + while (i + 3 < index) { // a single branch will do for this case (bulk of + // the computation) + inbyte = delta ? scanGroupVarIntDelta(inbyte, &initial) + : scanGroupVarInt(inbyte); + i += 4; + } + // lots of branching ahead... + while (i + 3 < nvalue) { + inbyte = delta ? decodeGroupVarIntDelta(inbyte, &initial, out) + : decodeGroupVarInt(inbyte, out); + i += 4; + if (i > index) + return (out[index - (i - 4)]); + } + { + nvalue = static_cast(nvalue - i); + inbyte = decodeCarefully(inbyte, &initial, out, nvalue); + if (index == i) + return (out[0]); + if (nvalue > 1 && index == i + 1) + return (out[1]); + if (nvalue > 2 && index == i + 2) + return (out[2]); + if (nvalue > 3 && index == i + 3) + return (out[3]); + } + assert(false); // we should never get here + return (0); + } + + string name() const { + if (delta) + return "varintgbdelta"; + else + return "varintgb"; + } + + uint8_t *headlessEncode(uint32_t *in, const size_t length, uint32_t prev, + uint8_t *bout) { + size_t k = 0; + for (; k + 3 < length; k += 4) { + uint8_t *keyp = bout++; + *keyp = 0; + { + const uint32_t val = delta ? in[k] - prev : in[k]; + if (delta) + prev = in[k]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp = static_cast(1); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp = static_cast(2); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp = static_cast(3); + } + } + { + const uint32_t val = delta ? in[k + 1] - prev : in[k + 1]; + if (delta) + prev = in[k + 1]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp |= static_cast(1 << 2); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp |= static_cast(2 << 2); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp |= static_cast(3 << 2); + } + } + { + const uint32_t val = delta ? in[k + 2] - prev : in[k + 2]; + if (delta) + prev = in[k + 2]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp |= static_cast(1 << 4); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp |= static_cast(2 << 4); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp |= static_cast(3 << 4); + } + } + { + const uint32_t val = delta ? in[k + 3] - prev : in[k + 3]; + if (delta) + prev = in[k + 3]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp |= static_cast(1 << 6); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp |= static_cast(2 << 6); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp |= static_cast(3 << 6); + } + } + } + if (k < length) { + uint8_t *keyp = bout++; + *keyp = 0; + for (int j = 0; k < length && j < 8; j += 2, ++k) { + const uint32_t val = delta ? in[k] - prev : in[k]; + if (delta) + prev = in[k]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp |= static_cast(1 << j); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp |= static_cast(2 << j); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp |= static_cast(3 << j); + } + } + } + return bout; + } + + // returns how many values were decoded to out, will try to decode + // desirednumber + // if input allows. + size_t headlessDecode(const uint8_t *inbyte, const size_t length, + uint32_t prev, uint32_t *out, + const size_t desirednumber) { + + uint32_t *initout = out; + const uint32_t *const endout = out + desirednumber; + const uint8_t *const endbyte = inbyte + length; + uint32_t val; + while ((endbyte > inbyte + 4 * 4)) { //&& (endout > out + 3) + inbyte = delta ? decodeGroupVarIntDelta(inbyte, &prev, out) + : decodeGroupVarInt(inbyte, out); + out += 4; + } + while (endbyte > inbyte + 1) { + uint8_t key = *inbyte++; + // printf("last key is %u \n",key); + for (int k = 0; (k < 4) && (endout > out); k++) { + const uint32_t howmanybyte = key & 3; + // printf("last key is %u howmanybyte = %u \n",key, howmanybyte+1); + + key = static_cast(key >> 2); + val = static_cast(*inbyte++); + if (howmanybyte >= 1) { + val |= (static_cast(*inbyte++) << 8); + if (howmanybyte >= 2) { + val |= (static_cast(*inbyte++) << 16); + if (howmanybyte >= 3) { + val |= (static_cast(*inbyte++) << 24); + } + } + } + // printf("decoded %u\n",val); + prev = (delta ? prev : 0) + val; + // printf("writing %u\n",prev); + + *out++ = prev; + } + assert(inbyte <= endbyte); + } + return out - initout; + } + +private: + const uint32_t mask[4] = {0xFF, 0xFFFF, 0xFFFFFF, 0xFFFFFFFF}; + + void sortinfirstvalue(uint32_t *tmpbuffer, size_t length) { + size_t top = length < 4 ? length : 4; + + for (size_t j = 0; j < top; ++j) { + if (tmpbuffer[j] > tmpbuffer[j + 1]) { + uint32_t t = tmpbuffer[j + 1]; + tmpbuffer[j + 1] = tmpbuffer[j]; + tmpbuffer[j] = t; + } + } + } + + const uint8_t *decodeGroupVarInt(const uint8_t *in, uint32_t *out) { + const uint32_t sel = *in++; + if (sel == 0) { + out[0] = static_cast(in[0]); + out[1] = static_cast(in[1]); + out[2] = static_cast(in[2]); + out[3] = static_cast(in[3]); + return in + 4; + } + const uint32_t sel1 = (sel & 3); + *out++ = *((uint32_t *)(in)) & mask[sel1]; + in += sel1 + 1; + const uint32_t sel2 = ((sel >> 2) & 3); + *out++ = *((uint32_t *)(in)) & mask[sel2]; + in += sel2 + 1; + const uint32_t sel3 = ((sel >> 4) & 3); + *out++ = *((uint32_t *)(in)) & mask[sel3]; + in += sel3 + 1; + const uint32_t sel4 = (sel >> 6); + *out++ = *((uint32_t *)(in)) & mask[sel4]; + in += sel4 + 1; + return in; + } + + const uint8_t *decodeGroupVarIntDelta(const uint8_t *in, uint32_t *val, + uint32_t *out) { + const uint32_t sel = *in++; + if (sel == 0) { + out[0] = (*val += static_cast(in[0])); + out[1] = (*val += static_cast(in[1])); + out[2] = (*val += static_cast(in[2])); + out[3] = (*val += static_cast(in[3])); + return in + 4; + } + const uint32_t sel1 = (sel & 3); + *val += *((uint32_t *)(in)) & mask[sel1]; + *out++ = *val; + in += sel1 + 1; + const uint32_t sel2 = ((sel >> 2) & 3); + *val += *((uint32_t *)(in)) & mask[sel2]; + *out++ = *val; + in += sel2 + 1; + const uint32_t sel3 = ((sel >> 4) & 3); + *val += *((uint32_t *)(in)) & mask[sel3]; + *out++ = *val; + in += sel3 + 1; + const uint32_t sel4 = (sel >> 6); + *val += *((uint32_t *)(in)) & mask[sel4]; + *out++ = *val; + in += sel4 + 1; + return in; + } + + const uint8_t *decodeCarefully(const uint8_t *inbyte, uint32_t *initial, + uint32_t *out, uint32_t count) { + uint32_t val; + uint32_t k, key = *inbyte++; + for (k = 0; k < count && k < 4; k++) { + const uint32_t howmanybyte = key & 3; + key = static_cast(key >> 2); + val = static_cast(*inbyte++); + if (howmanybyte >= 1) { + val |= (static_cast(*inbyte++) << 8); + if (howmanybyte >= 2) { + val |= (static_cast(*inbyte++) << 16); + if (howmanybyte >= 3) { + val |= (static_cast(*inbyte++) << 24); + } + } + } + if (delta) { + *initial += val; + *out = *initial; + } else { + *out = val; + } + out++; + } + return (inbyte); + } + + const uint8_t *scanGroupVarIntDelta(const uint8_t *in, uint32_t *val) { + const uint32_t sel = *in++; + if (sel == 0) { + *val += static_cast(in[0]); + *val += static_cast(in[1]); + *val += static_cast(in[2]); + *val += static_cast(in[3]); + return in + 4; + } + const uint32_t sel1 = (sel & 3); + *val += *(reinterpret_cast(in)) & mask[sel1]; + in += sel1 + 1; + const uint32_t sel2 = ((sel >> 2) & 3); + *val += *(reinterpret_cast(in)) & mask[sel2]; + in += sel2 + 1; + const uint32_t sel3 = ((sel >> 4) & 3); + *val += *(reinterpret_cast(in)) & mask[sel3]; + in += sel3 + 1; + const uint32_t sel4 = (sel >> 6); + *val += *(reinterpret_cast(in)) & mask[sel4]; + in += sel4 + 1; + return in; + } + + const uint8_t *scanGroupVarInt(const uint8_t *in) { + const uint32_t sel = *in++; + if (sel == 0) { + return in + 4; + } + const uint32_t sel1 = (sel & 3); + in += sel1 + 1; + const uint32_t sel2 = ((sel >> 2) & 3); + in += sel2 + 1; + const uint32_t sel3 = ((sel >> 4) & 3); + in += sel3 + 1; + const uint32_t sel4 = (sel >> 6); + in += sel4 + 1; + return in; + } + + // encode 4 integers + uint8_t *encodeGroupVarIntDelta(uint8_t *bout, uint32_t prev, uint32_t *in) { + uint8_t *keyp = bout++; + *keyp = 0; + { + const uint32_t val = in[0] - prev; + prev = in[0]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp = static_cast(1); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp = static_cast(2); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp = static_cast(3); + } + } + { + const uint32_t val = in[1] - prev; + prev = in[1]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp |= static_cast(1 << 2); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp |= static_cast(2 << 2); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp |= static_cast(3 << 2); + } + } + { + const uint32_t val = in[2] - prev; + prev = in[2]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp |= static_cast(1 << 4); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp |= static_cast(2 << 4); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp |= static_cast(3 << 4); + } + } + { + const uint32_t val = in[3] - prev; + prev = in[3]; + if (val < (1U << 8)) { + *bout++ = static_cast(val); + } else if (val < (1U << 16)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *keyp |= static_cast(1 << 6); + } else if (val < (1U << 24)) { + *bout++ = static_cast(val); + *bout++ = static_cast(val >> 8); + *bout++ = static_cast(val >> 16); + *keyp |= static_cast(2 << 6); + } else { + // the compiler will do the right thing + *reinterpret_cast(bout) = val; + bout += 4; + *keyp |= static_cast(3 << 6); + } + } + return bout; + } + + uint32_t getByteLength(uint32_t val) { + if (val < (1U << 8)) { + return 1; + } else if (val < (1U << 16)) { + return 2; + } else if (val < (1U << 24)) { + return 3; + } else { + return 4; + } + } + + struct Block { // should fit in two cache lines. + uint8_t data[4 * 4 + 1 + 3]; // the final +3 is a safety buffer + uint32_t length; + }; + + uint8_t lengths[256] = { + 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 6, 7, + 8, 9, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, + 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, + 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 6, 7, 8, 9, 7, 8, 9, 10, + 8, 9, 10, 11, 9, 10, 11, 12, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, + 11, 12, 10, 11, 12, 13, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, + 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, + 14, 15, 7, 8, 9, 10, 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, + 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, + 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15, 10, 11, 12, 13, + 11, 12, 13, 14, 12, 13, 14, 15, 13, 14, 15, 16, 8, 9, 10, 11, 9, 10, + 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 9, 10, 11, 12, 10, 11, 12, 13, + 11, 12, 13, 14, 12, 13, 14, 15, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, + 14, 15, 13, 14, 15, 16, 11, 12, 13, 14, 12, 13, 14, 15, 13, 14, 15, 16, + 14, 15, 16, 17}; + + const uint8_t *loadblock(Block *b, const uint8_t *readinginbyte) { + b->length = lengths[readinginbyte[0]]; + memcpy(b->data, readinginbyte, b->length); + return readinginbyte + b->length; + } + + const uint8_t *loadblockcarefully(Block *b, const uint8_t *readinginbyte, + size_t howmanyvals) { + b->length = 1; + for (size_t k = 0; k < howmanyvals; ++k) + b->length += 1 + ((readinginbyte[0] >> (2 * k)) & 3); + memcpy(b->data, readinginbyte, b->length); + return readinginbyte + b->length; + } + + void shiftin(Block *b, uint32_t *nextval, uint32_t *newsel) { + uint32_t offsettolastval = lengths[b->data[0] & 63] - 1; + uint32_t newnextsel = b->data[0] >> 6; + uint32_t newnextval; + memcpy(&newnextval, b->data + offsettolastval, 4); + newnextval &= mask[newnextsel]; + // uint32_t newnextval = *(reinterpret_cast(b->data + + // offsettolastval)) & mask[newnextsel]; + b->data[0] = (b->data[0] << 2) | *newsel; + std::memmove(b->data + 2 + *newsel, b->data + 1, + b->length - 1 - 1 - newnextsel); + b->length = offsettolastval + 1 + *newsel; + std::memcpy(b->data + 1, nextval, *newsel + 1); + *nextval = newnextval; + *newsel = newnextsel; + } + + void finalshiftin(Block *b, uint32_t nextval, uint32_t newsel, + size_t howmany) { + b->data[0] = (b->data[0] << 2) | newsel; + std::memmove(b->data + 2 + newsel, b->data + 1, b->length - 1); + b->length = 1; + for (size_t k = 0; k < howmany; ++k) + b->length += 1 + ((b->data[0] >> (2 * k)) & 3); + std::memcpy(b->data + 1, &nextval, newsel + 1); + } +}; + +} // namespace SIMDCompressionLib + +#endif /* SIMDCompressionAndIntersection_VARINTGB_H_ */ diff --git a/include/bitvector.h b/include/bitvector.h index 67728a2..e69de29 100644 --- a/include/bitvector.h +++ b/include/bitvector.h @@ -1,87 +0,0 @@ -/* - * ============================================================================ - * - * Filename: bitvector.h - * - * Description: - * - * Version: 1.0 - * Created: 2017-10-25 01:59:49 PM - * Revision: none - * Compiler: gcc - * - * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu - * Organization: Stony Brook University - * - * ============================================================================ - */ - -#ifndef _BIT_VECTOR_H_ -#define _BIT_VECTOR_H_ - -#include -#include - -#include - -#include "util.h" -#include "hashutil.h" -#include "sdsl/bit_vectors.hpp" - -class BitVector { - public: - BitVector() : bits(), size(0) {}; - BitVector(uint64_t size); - BitVector(const BitVector& bv) : bits(bv.bits), size(bv.size) - {}; - - sdsl::bit_vector get_bits() const { - return bits; - } - - void reset(); - bool operator[](uint64_t idx); - void set(const uint64_t idx); - uint64_t capacity(void) const { return bits.capacity() / 8; } - uint64_t bit_size(void) const { return bits.bit_size(); } - const uint64_t *data(void) const { return bits.data(); } - void resize(const uint64_t len); - uint64_t get_int(uint64_t startP, uint64_t len=64) {return bits.get_int(startP, len);} - bool operator==(const BitVector& b) const { return bits == b.bits; } - - private: - sdsl::bit_vector bits; - uint64_t size; -}; - -class BitVectorRRR { - public: - BitVectorRRR() : rrr_bits(), size(0) {}; - BitVectorRRR(const BitVector& bv) : rrr_bits(bv.get_bits()), - size(bv.bit_size()) {}; - BitVectorRRR(std::string& filename); - - bool operator[](uint64_t idx); - bool serialize(std::string& filename); - uint64_t bit_size(void) const { return rrr_bits.size(); } - uint64_t get_int(uint64_t startP, uint64_t len=64) { - return rrr_bits.get_int(startP, len); - } - - private: - sdsl::rrr_vector<63> rrr_bits; - uint64_t size; -}; - -template -struct sdslhash { - uint64_t operator()(const T& vector) const - { - // Using the same seed as we use in k-mer hashing. - return HashUtil::MurmurHash64A((void*)vector.data(), vector.capacity(), - 2038074743); - } -}; - -#endif - diff --git a/include/canonKmer.h b/include/canonKmer.h new file mode 100644 index 0000000..661d567 --- /dev/null +++ b/include/canonKmer.h @@ -0,0 +1,90 @@ +// +// Created by Fatemeh Almodaresi on 8/21/18. +// + +#ifndef MANTIS_CANONKMER_H +#define MANTIS_CANONKMER_H + +#include +#include +#include +#include + +#define BITMASK(nbits) ((nbits) == 64 ? 0xffffffffffffffff : (1ULL << (nbits)) \ + - 1ULL) + +namespace duplicated_dna { + +/////////////// bases ///////////////// + enum base { + C = 0, A = 1, T = 2, G = 3 + }; + + base operator-(base b); // return the complementary base + extern const base bases[4]; + extern const std::map base_from_char; + extern const std::map base_to_char; + +///////////// kmers ///////////////////// + class kmer { + public: + int len; + uint64_t val; + + kmer(void); + + kmer(base b); + + kmer(int l, uint64_t v); + + kmer(std::string s); + + // Convert to string + operator std::string() const; + }; + + bool operator<(kmer a, kmer b); + + bool operator==(kmer a, kmer b); + + bool operator!=(kmer a, kmer b); + +// Return the reverse complement of k + kmer operator-(kmer k); + + kmer canonicalize(kmer k); + +// Return the kmer of length |a| that results from shifting b into a +// from the right + kmer operator<<(kmer a, kmer b); + +// Return the kmer of length |b| that results from shifting a into b +// from the left + kmer operator>>(kmer a, kmer b); + +// Append two kmers + kmer operator+(kmer a, kmer b); + + kmer suffix(kmer k, int len); + + kmer prefix(kmer k, int len); + +// The purpose of this class is to enable us to declare containers +// as holding canonical kmers, e.g. set. Then all +// inserts/queries/etc will automatically canonicalize their +// arguments. + class canonical_kmer : public kmer { + public: + canonical_kmer(void); + + canonical_kmer(base b); + + canonical_kmer(int l, uint64_t v); + + canonical_kmer(std::string s); + + canonical_kmer(kmer k); + }; +} + +#endif //MANTIS_CANONKMER_H diff --git a/include/colorEncoder.h b/include/colorEncoder.h new file mode 100644 index 0000000..dde0fd9 --- /dev/null +++ b/include/colorEncoder.h @@ -0,0 +1,119 @@ +// +// Created by Fatemeh Almodaresi on 8/17/18. +// + +#ifndef MANTIS_COLORENCODER_H +#define MANTIS_COLORENCODER_H + +#include +#include +#include "lru/lru.hpp" +#include "deltaManager.h" +#include "sdsl/bit_vectors.hpp" +#include "cqf.h" +#include "canonKmer.h" +#include "hashutil.h" + +using LRUCacheMap = LRU::Cache>; +typedef std::pair node; +constexpr uint64_t zero = 0; + +struct Edge { + uint64_t parent; + uint64_t child; + uint64_t weight; + + Edge() { + parent = 0; child = 0; weight = 0; + } + Edge(uint64_t inParent, uint64_t inChild, uint64_t inWeight) : + parent(inParent), child(inChild), weight(inWeight) {} +}; + +struct pair_hash { + template + std::size_t operator()(const std::pair &p) const { + auto h1 = std::hash{}(p.first); + auto h2 = std::hash{}(p.second); + + // Mainly for demonstration purposes, i.e. works but is overly simple + // In the real world, use sth. like boost.hash_combine + return h1 ^ h2; + } +}; + +class ColorEncoder { +public: + struct Stats { + uint64_t cache_hits{0}; + uint64_t tot_hits{0}; + uint64_t tot_edge_access{0}; + uint64_t tot_edge_access_request{0}; + uint64_t edge_access_for_updateMST{0}; + uint64_t add_edge{0}; + uint64_t parentbv_access_for_updateMST{0}; + }; + Stats stats; + ColorEncoder(std::string prefixIn, + uint64_t numSamplesIn, + CQF &cqfIn, + uint64_t approximateClrClsesIn, + uint64_t approximateDeltaCntPerClrCls = 8) : + prefix(prefixIn), + numSamples(numSamplesIn), + cqf(cqfIn), + bvSize(approximateClrClsesIn), + parentbv(bvSize, 0, ceil(log2((double)bvSize))),//TODO take care of this constant!! + deltaM(numSamplesIn, bvSize, approximateDeltaCntPerClrCls), + colorClsCnt(1), // start with the dummy node + lru_cache(100000) + { + std::string f = prefix+"/weight.lst"; + weightDistFile = new std::ofstream(f); + std::cerr << "\nColorEncoder Constructor: bvSize: " + << bvSize << " parent size: " << parentbv.size() + << " colorClsCnt: " << colorClsCnt << "\n"; + lru_cache.monitor(); + } + + + bool addColorClass(uint64_t kmer, uint64_t eqId, const sdsl::bit_vector &bv); + + bool serialize(); + +private: + uint64_t numSamples; + uint64_t bvSize; + uint64_t colorClsCnt; + uint64_t kmerCntr{0}; + std::ofstream* weightDistFile; + sdsl::int_vector<> parentbv; + DeltaManager deltaM; + CQF &cqf; + int k = 20; + std::string prefix; + std::unordered_map, uint32_t, pair_hash> edges; + LRUCacheMap lru_cache; + + std::vector buildColor(uint64_t eqid); + + std::vector buildColor(const sdsl::bit_vector &bv); + + bool updateMST(uint64_t n1, uint64_t n2, std::vector deltas); + + std::vector hammingDist(uint64_t i, uint64_t j); + + std::unordered_set neighbors(duplicated_dna::canonical_kmer n); + + bool exists(duplicated_dna::canonical_kmer e, uint64_t &eqid); + + std::pair maxWeightsTillLCA(uint64_t n1, uint64_t n2); + + void addEdge(uint64_t i, uint64_t j, uint32_t w); + + bool hasEdge(uint64_t i, uint64_t j); + + uint32_t getEdge(uint64_t i, uint64_t j); +}; + +#endif //MANTIS_COLORENCODER_H diff --git a/include/coloreddbg.h b/include/coloreddbg.h index 8b9f1a4..f285e1f 100644 --- a/include/coloreddbg.h +++ b/include/coloreddbg.h @@ -1,16 +1,8 @@ /* * ============================================================================ * - * Filename: coloreddbg.h - * - * Description: - * - * Version: 1.0 - * Created: 2017-10-24 08:49:22 PM - * Revision: none - * Compiler: gcc - * * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu + * Mike Ferdman (), mferdman@cs.stonybrook.edu * Organization: Stony Brook University * * ============================================================================ @@ -31,39 +23,43 @@ #include "sparsepp/spp.h" #include "tsl/sparse_map.h" #include "sdsl/bit_vectors.hpp" -#include "bitvector.h" -#include "cqf.h" -#include "hashutil.h" +#include "gqf_cpp.h" +#include "gqf/hashutil.h" #include "common_types.h" #include "mantisconfig.hpp" +#include "colorEncoder.h" + +typedef sdsl::bit_vector BitVector; +typedef sdsl::rrr_vector<63> BitVectorRRR; struct hash128 { uint64_t operator()(const __uint128_t& val128) const { __uint128_t val = val128; // Using the same seed as we use in k-mer hashing. - return HashUtil::MurmurHash64A((void*)&val, sizeof(__uint128_t), - 2038074743); + return MurmurHash64A((void*)&val, sizeof(__uint128_t), + 2038074743); } }; template - using cdbg_bv_map_t = spp::sparse_hash_map; +using cdbg_bv_map_t = spp::sparse_hash_map; -using default_cdbg_bv_map_t = cdbg_bv_map_t<__uint128_t, std::pair>; +using default_cdbg_bv_map_t = cdbg_bv_map_t<__uint128_t, + std::pair>; template class ColoredDbg { - public: + public: ColoredDbg(std::string& cqf_file, std::vector& eqclass_files, std::string& sample_file); - ColoredDbg(uint64_t qbits, uint64_t key_bits, uint32_t seed, - std::string& prefix, uint64_t nqf); - + ColoredDbg(uint64_t qbits, uint64_t key_bits, enum qf_hashmode hashmode, + uint32_t seed, std::string& prefix, uint64_t nqf); + void build_sampleid_map(qf_obj *incqfs); - default_cdbg_bv_map_t& + default_cdbg_bv_map_t& construct(qf_obj *incqfs, uint64_t num_kmers); void set_console(spdlog::logger* c) { console = c; } @@ -75,7 +71,7 @@ class ColoredDbg { uint32_t seed(void) const { return dbg.seed(); } uint64_t range(void) const { return dbg.range(); } - std::unordered_map + mantis::QueryResult find_samples(const mantis::QuerySet& kmers); void serialize(); @@ -83,10 +79,12 @@ class ColoredDbg { void set_flush_eqclass_dist(void) { flush_eqclass_dis = true; } private: - // returns true if adding this k-mer increased the number of equivalence classes - // and false otherwise. - bool add_kmer(key_obj& hash, BitVector& vector); - void add_bitvector(BitVector& vector, uint64_t eq_id); + // returns true if adding this k-mer increased the number of equivalence + // classes + // and false otherwise. + bool add_kmer(const typename key_obj::kmer_t& hash, const BitVector& + vector, bool isSampling); + void add_bitvector(const BitVector& vector, uint64_t eq_id); void add_eq_class(BitVector vector, uint64_t id); uint64_t get_next_available_id(void); void bv_buffer_serialize(); @@ -105,19 +103,19 @@ class ColoredDbg { bool flush_eqclass_dis{false}; std::time_t start_time_; spdlog::logger* console; + ColorEncoder* colorEncoder; }; template class SampleObject { public: - SampleObject() : obj(), cutoff(0), sample_id(), id(0) {}; - SampleObject(T o, uint32_t c = 0, std::string& s = std::string(), - uint32_t id = 0) : obj(o), cutoff(c), sample_id(s), id(id) {}; - SampleObject(const SampleObject& o) : obj(o.obj), cutoff(o.cutoff), - sample_id(o.sample_id), id(o.id) {} ; + SampleObject() : obj(), sample_id(), id(0) {} + SampleObject(T o, std::string& s = std::string(), + uint32_t id = 0) : obj(o), sample_id(s), id(id) {} + SampleObject(const SampleObject& o) : obj(o.obj), + sample_id(o.sample_id), id(o.id) {} T obj; - uint32_t cutoff; std::string sample_id; uint32_t id; }; @@ -148,7 +146,7 @@ template uint64_t ColoredDbg::get_num_bitvectors(void) const { uint64_t total = 0; for (uint32_t i = 0; i < num_serializations; i++) - total += eqclasses[i].bit_size(); + total += eqclasses[i].size(); return total / num_samples; } @@ -157,30 +155,31 @@ template void ColoredDbg::reshuffle_bit_vectors(cdbg_bv_map_t<__uint128_t, std::pair>& map) { - BitVector new_bv_buffer(mantis::NUM_BV_BUFFER * num_samples); - for (auto& it_input : map) { - auto it_local = eqclass_map.find(it_input.first); - if (it_local == eqclass_map.end()) { - console->error("Can't find the vector hash during shuffling"); - exit(1); - } else { - assert(it_local->second.first <= mantis::NUM_BV_BUFFER && it_input.second.first - <= mantis::NUM_BV_BUFFER); - uint64_t src_idx = ((it_local->second.first - 1) * num_samples); - uint64_t dest_idx = ((it_input.second.first - 1) * num_samples); - for (uint32_t i = 0; i < num_samples; i++, src_idx++, dest_idx++) - if (bv_buffer[src_idx]) - new_bv_buffer.set(dest_idx); - } - } - bv_buffer = new_bv_buffer; -} + BitVector new_bv_buffer(mantis::NUM_BV_BUFFER * num_samples); + for (auto& it_input : map) { + auto it_local = eqclass_map.find(it_input.first); + if (it_local == eqclass_map.end()) { + console->error("Can't find the vector hash during shuffling"); + exit(1); + } else { + assert(it_local->second.first <= mantis::NUM_BV_BUFFER && + it_input.second.first <= mantis::NUM_BV_BUFFER); + uint64_t src_idx = ((it_local->second.first - 1) * num_samples); + uint64_t dest_idx = ((it_input.second.first - 1) * num_samples); + for (uint32_t i = 0; i < num_samples; i++, src_idx++, dest_idx++) + if (bv_buffer[src_idx]) + new_bv_buffer[dest_idx] = 1; + } + } + bv_buffer = new_bv_buffer; + } template void ColoredDbg::reinit(cdbg_bv_map_t<__uint128_t, - std::pair>& map) { + std::pair>& map) { dbg.reset(); - reshuffle_bit_vectors(map); + //todo fatemeh + /*reshuffle_bit_vectors(map); // Check if the current bit vector buffer is full and needs to be serialized. // This happens when the sampling phase fills up the bv buffer. if (get_num_eqclasses() % mantis::NUM_BV_BUFFER == 0) { @@ -188,22 +187,45 @@ void ColoredDbg::reinit(cdbg_bv_map_t<__uint128_t, console->info("Serializing bit vector with {} eq classes.", get_num_eqclasses()); bv_buffer_serialize(); - } + }*/ eqclass_map = map; } template -bool ColoredDbg::add_kmer(key_obj& k, BitVector& - vector) { +bool ColoredDbg::add_kmer(const typename key_obj::kmer_t& key, + const BitVector& vector, + bool isSampling) { + //todo fatemeh // A kmer (hash) is seen only once during the merge process. // So we insert every kmer in the dbg - uint64_t eq_id = 1; - __uint128_t vec_hash = HashUtil::MurmurHash128A((void*)vector.data(), - vector.capacity(), 2038074743, - 2038074751); + uint64_t eq_id; + __uint128_t vec_hash = MurmurHash128A((void*)vector.data(), + vector.capacity()/8, 2038074743, + 2038074751); auto it = eqclass_map.find(vec_hash); - bool added_eq_class{false}; + bool added_eq_class{false}; + // Find if the eqclass of the kmer is already there. + // If it is there then increment the abundance. + // Else create a new eq class. + if (it == eqclass_map.end()) { + // eq class is seen for the first time. + eq_id = get_next_available_id(); + eqclass_map.emplace(std::piecewise_construct, + std::forward_as_tuple(vec_hash), + std::forward_as_tuple(eq_id, 1));added_eq_class = true; + } else { // eq class is seen before so increment the abundance. + eq_id = it->second.first; + // with standard map + it->second.second += 1; // update the abundance. + } + + if (!isSampling) { + colorEncoder->addColorClass(key, eq_id, vector); + } + +/* auto it = eqclass_map.find(vec_hash); + bool added_eq_class{false}; // Find if the eqclass of the kmer is already there. // If it is there then increment the abundance. // Else create a new eq class. @@ -219,44 +241,66 @@ bool ColoredDbg::add_kmer(key_obj& k, BitVector& eq_id = it->second.first; // with standard map it->second.second += 1; // update the abundance. + }*/ + + // we use the count to store the eqclass ids + int ret = dbg.insert(KeyObject(key,0,eq_id), QF_NO_LOCK | QF_KEY_IS_HASH); + if (ret == QF_NO_SPACE) { + // This means that auto_resize failed. + console->error("The CQF is full and auto resize failed. Please rerun build with a bigger size."); + exit(1); } - k.count = eq_id; // we use the count to store the eqclass ids - dbg.insert(k); - return added_eq_class; + return added_eq_class; } template -void ColoredDbg::add_bitvector(BitVector& vector, uint64_t - eq_id) { +void ColoredDbg::add_bitvector(const BitVector& vector, + uint64_t eq_id) { uint64_t start_idx = (eq_id % mantis::NUM_BV_BUFFER) * num_samples; - for (uint32_t i = 0; i < num_samples; i++, start_idx++) - if (vector[i]) - bv_buffer.set(start_idx); + for (uint32_t i = 0; i < num_samples/64*64; i+=64) + bv_buffer.set_int(start_idx+i, vector.get_int(i, 64), 64); + if (num_samples%64) + bv_buffer.set_int(start_idx+num_samples/64*64, + vector.get_int(num_samples/64*64, num_samples%64), + num_samples%64); } template void ColoredDbg::bv_buffer_serialize() { BitVector bv_temp(bv_buffer); if (get_num_eqclasses() % mantis::NUM_BV_BUFFER > 0) { - bv_temp.resize((get_num_eqclasses() % mantis::NUM_BV_BUFFER) * num_samples); + bv_temp.resize((get_num_eqclasses() % mantis::NUM_BV_BUFFER) * + num_samples); } BitVectorRRR final_com_bv(bv_temp); std::string bv_file(prefix + std::to_string(num_serializations) + "_" + - mantis::EQCLASS_FILE); - final_com_bv.serialize(bv_file); - bv_buffer.reset(); + mantis::EQCLASS_FILE); + sdsl::store_to_file(final_com_bv, bv_file); + bv_buffer = BitVector(bv_buffer.bit_size()); num_serializations++; } template void ColoredDbg::serialize() { + + //todo fatemeh + // serialize the bv buffer last time if needed + /*if (get_num_eqclasses() % mantis::NUM_BV_BUFFER > 1) + bv_buffer_serialize();*/ + std::cerr << "cache hits: " << colorEncoder->stats.cache_hits << " " + << colorEncoder->stats.tot_hits << " " + << (colorEncoder->stats.cache_hits*100)/colorEncoder->stats.tot_hits << "%\n"; + + colorEncoder->serialize(); + console->info("Done serializing the color class info"); // serialize the CQF + console->info("Serializing the CQF .. "); dbg.serialize(prefix + mantis::CQF_FILE); // serialize the bv buffer last time if needed - if (get_num_eqclasses() % mantis::NUM_BV_BUFFER > 1) + if (get_num_eqclasses() % mantis::NUM_BV_BUFFER > 0) bv_buffer_serialize(); //serialize the eq class id map @@ -269,25 +313,26 @@ void ColoredDbg::serialize() { // dump eq class abundance dist for further analysis. std::ofstream tmpfile(prefix + "eqclass_dist.lst"); for (auto sample : eqclass_map) - tmpfile << sample.second.first << " " << sample.second.second << std::endl; + tmpfile << sample.second.first << " " << sample.second.second << + std::endl; tmpfile.close(); } } template -std::unordered_map +mantis::QueryResult ColoredDbg::find_samples(const mantis::QuerySet& kmers) { // Find a list of eq classes and the number of kmers that belong those eq // classes. std::unordered_map query_eqclass_map; for (auto k : kmers) { key_obj key(k, 0, 0); - uint64_t eqclass = dbg.query(key); + uint64_t eqclass = dbg.query(key, 0); if (eqclass) query_eqclass_map[eqclass] += 1; } - std::unordered_map sample_map; + mantis::QueryResult sample_vec(num_samples, 0); for (auto it = query_eqclass_map.begin(); it != query_eqclass_map.end(); ++it) { auto eqclass_id = it->first; @@ -301,101 +346,118 @@ ColoredDbg::find_samples(const mantis::QuerySet& kmers) { uint64_t wrd = eqclasses[bucket_idx].get_int(bucket_offset, len); for (uint32_t i = 0, sCntr = w * 64; i < len; i++, sCntr++) if ((wrd >> i) & 0x01) - sample_map[sCntr] += count; + sample_vec[sCntr] += count; bucket_offset += len; } } - return sample_map; + return sample_vec; } template cdbg_bv_map_t<__uint128_t, std::pair>& ColoredDbg::construct(qf_obj *incqfs, uint64_t num_kmers) { - uint32_t nqf = 0; uint64_t counter = 0; - bool is_sampling = (num_kmers < std::numeric_limits::max()); + bool is_sampling = (num_kmers < std::numeric_limits::max()); - // merge all input CQFs into the final QF - std::vector::Iterator> it_incqfs; - it_incqfs.reserve(num_samples); + struct Iterator { + QFi qfi; + typename key_obj::kmer_t kmer; + uint32_t id; + Iterator(uint32_t id, const QF* cqf): id(id) { + if (qf_iterator_from_position(cqf, &qfi, 0) != QFI_INVALID) + get_key(); + } + void next() { + qfi_next(&qfi); + get_key(); + } + bool end() const { + return qfi_end(&qfi); + } + bool operator>(const Iterator& rhs) const { + return key() > rhs.key(); + } + const typename key_obj::kmer_t& key() const { return kmer; } + private: + void get_key() { + uint64_t value, count; + qfi_get_hash(&qfi, &kmer, &value, &count); + } + }; - // Initialize all iterators with sample specific cutoffs. - for (uint32_t i = 0; i < num_samples; i++) { - it_incqfs.emplace_back(incqfs[i].obj->begin(incqfs[i].cutoff)); - } + struct Minheap_PQ { + void push(const Iterator& obj) { + c.emplace_back(obj); + std::push_heap(c.begin(), c.end(), std::greater()); + } + void pop() { + std::pop_heap(c.begin(), c.end(), std::greater()); + c.pop_back(); + } + void replace_top(const Iterator& obj) { + c.emplace_back(obj); + pop(); + } + Iterator& top() { return c.front(); } + bool empty() const { return c.empty(); } + private: + std::vector c; + }; + Minheap_PQ minheap; - std::priority_queue, - std::vector>, compare> minheap; - // Insert the first key from each CQF in minheap. for (uint32_t i = 0; i < num_samples; i++) { - if (it_incqfs[i].done()) - continue; - KeyObject key = *it_incqfs[i]; - minheap.emplace(key, incqfs[i].cutoff, incqfs[i].sample_id, i); - nqf++; + Iterator qfi(i, incqfs[i].obj->get_cqf()); + if (qfi.end()) continue; + minheap.push(qfi); } while (!minheap.empty()) { - assert(minheap.size() == nqf); - SampleObject cur; BitVector eq_class(num_samples); - // Get the smallest key from minheap and update the eqclass vector - cur = minheap.top(); - eq_class.set(cur.id); - minheap.pop(); - // Keep poping keys from minheap until you see a different key. - // While poping keys build the eq class for cur. - // Increment iterators for all CQFs whose keys are popped. - while (!minheap.empty() && cur.obj.key == minheap.top().obj.key) { - uint32_t id = minheap.top().id; - eq_class.set(id); - minheap.pop(); - ++it_incqfs[id]; - if (it_incqfs[id].done()) // If the iterator is done then decrement nqf - nqf--; - else { // Insert the current iterator head in minHeap - KeyObject key = *it_incqfs[id]; - minheap.emplace(key, incqfs[id].cutoff, incqfs[id].sample_id, id); - } - } - // Move the iterator of the smallest key. - ++it_incqfs[cur.id]; - if (it_incqfs[cur.id].done()) // If the iterator is done then decrement nqf - nqf--; - else { // Insert the current iterator head in minHeap - KeyObject key = *it_incqfs[cur.id]; - minheap.emplace(key, incqfs[cur.id].cutoff, incqfs[cur.id].sample_id, cur.id); - } - // Add in the cdbg - bool added_eq_class = add_kmer(cur.obj, eq_class); - counter++; + KeyObject::kmer_t last_key; + do { + Iterator& cur = minheap.top(); + last_key = cur.key(); + eq_class[cur.id] = 1; + cur.next(); + minheap.replace_top(cur); + } while(!minheap.empty() && last_key == minheap.top().key()); + + bool added_eq_class = add_kmer(last_key, eq_class, is_sampling); + ++counter; // Progress tracker static uint64_t last_size = 0; - if (dbg.size() % 10000000 == 0 && - dbg.size() != last_size) { - last_size = dbg.size(); + if (dbg.dist_elts() % 10000000 == 0 && + dbg.dist_elts() != last_size) { + last_size = dbg.dist_elts(); console->info("Kmers merged: {} Num eq classes: {} Total time: {}", - dbg.size(), get_num_eqclasses(), time(nullptr) - start_time_); + dbg.dist_elts(), get_num_eqclasses(), time(nullptr) - + start_time_); } // Check if the bit vector buffer is full and needs to be serialized. - if (added_eq_class and (get_num_eqclasses() % mantis::NUM_BV_BUFFER == 0)) { + if (added_eq_class and (get_num_eqclasses() % mantis::NUM_BV_BUFFER == 0)) + { // Check if the process is in the sampling phase. if (is_sampling) { break; - } else { + } else { + } + //todo fatemeh + /*else { // The bit vector buffer is full. console->info("Serializing bit vector with {} eq classes.", get_num_eqclasses()); bv_buffer_serialize(); - } + }*/ } else if (counter > num_kmers) { - // Check if the sampling phase is finished based on the number of k-mers. + // Check if the sampling phase is finished based on the number of k-mers. break; - } + } + + while(!minheap.empty() && minheap.top().end()) minheap.pop(); } return eqclass_map; @@ -411,17 +473,22 @@ void ColoredDbg::build_sampleid_map(qf_obj *incqfs) { template ColoredDbg::ColoredDbg(uint64_t qbits, uint64_t key_bits, + enum qf_hashmode hashmode, uint32_t seed, std::string& prefix, uint64_t nqf) : - dbg(qbits, key_bits, seed), bv_buffer(mantis::NUM_BV_BUFFER * nqf), - prefix(prefix), num_samples(nqf), num_serializations(0), start_time_(std::time(nullptr)) {} + dbg(qbits, key_bits, hashmode, seed), bv_buffer(mantis::NUM_BV_BUFFER * nqf), + prefix(prefix), num_samples(nqf), num_serializations(0), + start_time_(std::time(nullptr)) { + dbg.set_auto_resize(); + colorEncoder = new ColorEncoder(prefix, num_samples, dbg, num_samples*100000, ceil(log2(num_samples))-3); + } template ColoredDbg::ColoredDbg(std::string& cqf_file, std::vector& eqclass_files, std::string& sample_file) - : dbg(cqf_file, false), bv_buffer(), start_time_(std::time(nullptr)) { +: dbg(cqf_file, CQF_FREAD), bv_buffer(), start_time_(std::time(nullptr)) { num_samples = 0; num_serializations = 0; @@ -432,8 +499,10 @@ ColoredDbg::ColoredDbg(std::string& cqf_file, } eqclasses.reserve(sorted_files.size()); + BitVectorRRR bv; for (auto file : sorted_files) { - eqclasses.push_back(BitVectorRRR(file.second)); + sdsl::load_from_file(bv, file.second); + eqclasses.push_back(bv); num_serializations++; } @@ -446,6 +515,7 @@ ColoredDbg::ColoredDbg(std::string& cqf_file, num_samples++; } sampleid.close(); + } #endif diff --git a/include/common_types.h b/include/common_types.h index 63b6040..b1c622c 100644 --- a/include/common_types.h +++ b/include/common_types.h @@ -4,6 +4,7 @@ #include #include #include +#include "tsl/hopscotch_map.h" namespace mantis { using KmerHash = uint64_t; @@ -16,7 +17,7 @@ namespace mantis { }; - using QueryResult = std::unordered_map; + using QueryResult = std::vector;//std::unordered_map;//tsl::hopscotch_map; using QueryResults = std::vector; } diff --git a/include/compressedSetBit.h b/include/compressedSetBit.h new file mode 100644 index 0000000..0fc43d2 --- /dev/null +++ b/include/compressedSetBit.h @@ -0,0 +1,90 @@ +#ifndef __COMPRESSED_SET_BIT_H__ +#define __COMPRESSED_SET_BIT_H__ +#include "codecfactory.h" +#include "intersection.h" + +#include +#include /* for sort, random_shuffle */ + +using namespace SIMDCompressionLib; + +template +class CompressedSetBit { +public: + CompressedSetBit(std::vector idxList) { + nSetInts = idxList.size(); + + std::sort(idxList.begin(), idxList.end()); + //std::cout << "\nAfter sorting:\n"; + //for (size_t i = 0; i < idxList.size(); i++) std::cout << idxList[i] << " "; + //std::cout << "\n"; + // We pick a CODEC + //IntegerCODEC &codec = *CODECFactory::getFromName("s4-bp128-dm"); + IntegerCODEC &codec = *CODECFactory::getFromName("s4-fastpfor-d1"); + + std::vector dat(idxList.size() + 1024); + //deltaCompressedSetList.resize(idxList.size() + 1024); + size_t compressedsize = dat.size(); + codec.encodeArray(idxList.data(), idxList.size(), dat.data(), + compressedsize); + + dat.resize(compressedsize); + dat.shrink_to_fit(); + nCompressedInts = compressedsize; + data_.reset(new uint32_t[compressedsize]); + std::copy(dat.begin(), dat.end(), data_.get()); + } + + void uncompress(std::vector& idxList) { + //IntegerCODEC &codec = *CODECFactory::getFromName("s4-bp128-dm"); + IntegerCODEC &codec = *CODECFactory::getFromName("s4-fastpfor-d1"); + idxList.resize(nSetInts); + size_t compressedSize = nCompressedInts;//deltaCompressedSetList.size(); + size_t originalSize = nSetInts; + codec.decodeArray(&data_[0], compressedSize, + idxList.data(), originalSize); + idxList.resize(originalSize); //?? why do we need this? + } + + size_t size_in_bytes() { + //std::cout << " size: " << deltaCompressedSetList.size() << " "; + return sizeof(*this) + (sizeof(uint32_t) * nCompressedInts); + } + + bool serialize(std::ostream& output) { + output.write(reinterpret_cast(&nSetInts), sizeof(nSetInts)); + output.write(reinterpret_cast(&nCompressedInts), sizeof(nCompressedInts)); + output.write(reinterpret_cast(&data_[0]), sizeof(uint32_t) * nCompressedInts); + return true; + } + + bool deserialize(std::istream& input) { + input.read(reinterpret_cast(&nSetInts), sizeof(nSetInts)); + input.read(reinterpret_cast(&nCompressedInts), sizeof(nCompressedInts)); + data_.reset(new uint32_t[nCompressedInts]); + input.read(reinterpret_cast(&data_[0]), sizeof(uint32_t) * nCompressedInts); + return true; + } + +private: + template + friend inline bool operator==(const CompressedSetBit& lhs, const CompressedSetBit& rhs); + template + friend inline bool operator!=(const CompressedSetBit& lhs, const CompressedSetBit& rhs); + //std::vector deltaCompressedSetList; + std::unique_ptr data_; + IndexSizeT nSetInts; + IndexSizeT nCompressedInts; +}; + +template +inline bool operator==(const CompressedSetBit& lhs, const CompressedSetBit& rhs) { + return (lhs.nCompressedInts == rhs.nCompressedInts) ? (std::memcmp(&lhs.data_[0], &rhs.data_[0], lhs.nCompressedInts) == 0) : false; +} + +template +inline bool operator!=(const CompressedSetBit& lhs, const CompressedSetBit& rhs) { + return !(lhs == rhs); +} + +#endif diff --git a/include/cqf.h b/include/cqf.h deleted file mode 100644 index 39fccec..0000000 --- a/include/cqf.h +++ /dev/null @@ -1,344 +0,0 @@ -/* - * ============================================================================ - * - * Filename: cqf.h - * - * Description: - * - * Version: 1.0 - * Created: 2017-10-26 11:50:04 AM - * Revision: none - * Compiler: gcc - * - * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu - * Organization: Stony Brook University - * - * ============================================================================ - */ - -#ifndef _CQF_H_ -#define _CQF_H_ - -#include -#include -#include - -#include -#include -#include -#include - -#include "cqf/gqf.h" -#include "util.h" - -#define NUM_HASH_BITS 28 -#define NUM_Q_BITS 20 -#define PAGE_DROP_GRANULARITY (1ULL << 21) -#define PAGE_BUFFER_SIZE 4096 - -static uint64_t tmp_sum_local; - -template -class CQF { - public: - CQF(); - CQF(uint64_t qbits, uint64_t key_bits, uint32_t seed); - CQF(std::string& filename, bool flag); - CQF(const CQF& copy_cqf); - - void insert(const key_obj& k); - - /* Will return the count. */ - uint64_t query(const key_obj& k); - - uint64_t get_index(const key_obj& k); - - void serialize(std::string filename) { - qf_serialize(&cqf, filename.c_str()); - } - - uint64_t range(void) const { return cqf.metadata->range; } - uint32_t seed(void) const { return cqf.metadata->seed; } - uint32_t keybits(void) const { return cqf.metadata->key_bits; } - uint64_t size(void) const { return cqf.metadata->ndistinct_elts; } - uint64_t capacity() const {return cqf.metadata->nslots; } - //uint64_t set_size(void) const { return set.size(); } - void reset(void) { qf_reset(&cqf); } - - void dump_metadata(void) const { DEBUG_DUMP(&cqf); } - - void drop_pages(uint64_t cur); - - class Iterator { - public: - Iterator(QFi it, uint32_t cutoff, uint64_t end_hash); - ~Iterator(); - key_obj operator*(void) const; - void operator++(void); - bool done(void) const; - - QFi iter; - int64_t last_prefetch_offset; - uint64_t buffer_size; - private: - /* global buffer to perform read ahead */ - unsigned char *buffer; - uint32_t num_pages; - uint32_t cutoff; - uint64_t end_hash; - struct aiocb aiocb; - }; - - Iterator limits(uint64_t start_hash, uint64_t end_hash, uint32_t cutoff) - const; - Iterator begin(uint32_t cutoff) const; - Iterator end(uint32_t cutoff) const; - - private: - QF cqf; - //std::unordered_set set; -}; - -class KeyObject { - public: - KeyObject() : key(0), value(0), count(0) {}; - - KeyObject(uint64_t k, uint64_t v, uint64_t c) : key(k), - value(v), count(c) {}; - - KeyObject(const KeyObject& k) : key(k.key), value(k.value), count(k.count) {}; - - bool operator==(KeyObject k) { return key == k.key; } - - uint64_t key; - uint64_t value; - uint64_t count; -}; - -template -CQF::CQF() { - qf_init(&cqf, 1ULL << NUM_Q_BITS, NUM_HASH_BITS, 0, true, "", 23423); -} - -template -CQF::CQF(uint64_t qbits, uint64_t key_bits, uint32_t seed) { - qf_init(&cqf, 1ULL << qbits, key_bits, 0, true, "", seed); -} - -template -CQF::CQF(std::string& filename, bool flag) { - if (flag) - qf_read(&cqf, filename.c_str()); - else - qf_deserialize(&cqf, filename.c_str()); -} - -template -CQF::CQF(const CQF& copy_cqf) { - memcpy(reinterpret_cast(&cqf), reinterpret_cast(const_cast(©_cqf.cqf)), sizeof(QF)); -} - -template -void CQF::insert(const key_obj& k) { - qf_insert(&cqf, k.key, k.value, k.count, LOCK_AND_SPIN); - // To validate the CQF - //set.insert(k.key); -} - -template -uint64_t CQF::query(const key_obj& k) { - return qf_count_key_value(&cqf, k.key, k.value); -} - -template -uint64_t CQF::get_index(const key_obj& k) { - return get_bucket_index(k.key); -} - -template -CQF::Iterator::Iterator(QFi it, uint32_t cutoff, uint64_t end_hash) - : iter(it), last_prefetch_offset(LLONG_MIN), cutoff(cutoff), - end_hash(end_hash) { - buffer_size = (((it.qf->metadata->size / 2048 - - (rand() % (it.qf->metadata->size / 4096))) - + 4095) / 4096) * 4096; - buffer = (unsigned char*)mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (buffer == MAP_FAILED) { - perror("buffer malloc"); - std::cerr << "Can't allocate buffer space." << std::endl; - exit(1); - } - }; - -template -CQF::Iterator::~Iterator(void) { - struct aiocb *aio_list[1]; - aio_list[0] = &aiocb; - int ret = aio_suspend(aio_list, 1, 0); - if (ret < 0) { - perror("aio_suspend"); - exit(1); - } -} - -template -key_obj CQF::Iterator::operator*(void) const { - uint64_t key = 0, value = 0, count = 0; - qfi_get(&iter, &key, &value, &count); - return key_obj(key, value, count); -} - -// This function read one byte from each page in the iterator buffer. -//void handler_function(union sigval sv); - -template -void CQF::Iterator::operator++(void) { - uint64_t last_read_offset; - qfi_nextx(&iter, &last_read_offset); - - // Read next "buffer_size" bytes from the file offset. - if ((int64_t)last_read_offset >= last_prefetch_offset) { - //DEBUG_CDBG("last_read_offset>last_prefetch_offset for " << iter.qf->mem->fd - //<< " " << last_read_offset << ">" << last_prefetch_offset); - if (aiocb.aio_buf) { - int res = aio_error(&aiocb); - if (res == EINPROGRESS) { - //DEBUG_CDBG("didn't read fast enough for " << aiocb.aio_fildes << - //" at " << last_read_offset << "(until " << last_prefetch_offset << - //" buffer size: "<< buffer_size << ")..."); - const struct aiocb *const aiocb_list[1] = {&aiocb}; - aio_suspend(aiocb_list, 1, NULL); - //DEBUG_CDBG(" finished it"); - } else if (res > 0) { - //DEBUG_CDBG("aio_error() returned " << std::dec << res); - } else if (res == 0) { - //DEBUG_CDBG("prefetch was OK for " << aiocb.aio_fildes << " at " << - //std::hex << aiocb.aio_offset << std::dec); - } else if (res == 0) { - unsigned char *start = (unsigned char*)(iter.qf->metadata) + - last_prefetch_offset; - unsigned char *counter = (unsigned char*)(iter.qf->metadata) + - last_prefetch_offset; - for (;counter < start + buffer_size; counter += 4096) { - tmp_sum_local += *counter; - } - } - } - - if ((last_prefetch_offset - (int64_t)buffer_size) > 0) { - madvise((unsigned char *)(iter.qf->metadata) + last_prefetch_offset - - buffer_size, buffer_size, MADV_DONTNEED); - posix_fadvise(iter.qf->mem->fd, (off_t)(last_prefetch_offset - - (int64_t)buffer_size), - buffer_size, POSIX_FADV_DONTNEED); - } - - memset(&aiocb, 0, sizeof(struct aiocb)); - aiocb.aio_fildes = iter.qf->mem->fd; - aiocb.aio_buf = (volatile void*)buffer; - aiocb.aio_nbytes = buffer_size; - if ((last_prefetch_offset + (int64_t)buffer_size) < - (int64_t)last_read_offset) { - if (last_prefetch_offset != 0) - //DEBUG_CDBG("resetting.. lpo:" << last_prefetch_offset << " lro:" << - //last_read_offset); - last_prefetch_offset = ( last_read_offset & ~(4095ULL) ) + - PAGE_BUFFER_SIZE; - } else { - last_prefetch_offset += buffer_size; - } - aiocb.aio_offset = (__off_t)last_prefetch_offset; - //DEBUG_CDBG("prefetch in " << iter.qf->mem->fd << " from " << std::hex << - //last_prefetch_offset << std::dec << " ... " << " buffer size: " - //<< buffer_size << " into buffer at " << std::hex << - //((uint64_t)buffer) << std::dec); - // to touch each page in the buffer. - //aiocb.aio_sigevent.sigev_notify = SIGEV_THREAD; - //aiocb.aio_sigevent.sigev_notify_function = &handler_function; - //aiocb.aio_sigevent.sigev_value.sival_ptr = (void*)this; - - uint32_t ret = aio_read(&aiocb); - //uint32_t ret = posix_fadvise(iter.qf->mem->fd, last_read_offset, - //buffer_size, POSIX_FADV_WILLNEED); - //DEBUG_CDBG("prefetch issued"); - if (ret) { - std::cerr << "aio_read failed at " << iter.current << " total size " << - iter.qf->metadata->nslots << std::endl; - perror("aio_read"); - } - } - - // Skip past the cutoff - do { - uint64_t key = 0, value = 0, count = 0; - qfi_get(&iter, &key, &value, &count); - if (count < cutoff) - qfi_next(&iter); - else - break; - } while(!qfi_end(&iter)); - - // drop pages of the last million slots. - //static uint64_t last_marker = 1; - //if (iter.current / PAGE_DROP_GRANULARITY > last_marker + 1) { - //uint64_t start_idx = last_marker * PAGE_DROP_GRANULARITY; - //uint64_t end_idx = (last_marker + 1) * PAGE_DROP_GRANULARITY; - //qf_drop_pages(iter.qf, start_idx, end_idx); - //last_marker += 1; - //} -} - -/* Currently, the iterator only traverses forward. So, we only need to check - * the right side limit. - */ -template -bool CQF::Iterator::done(void) const { - uint64_t key = 0, value = 0, count = 0; - qfi_get(&iter, &key, &value, &count); - return key >= end_hash || qfi_end(&iter); -} - -template -typename CQF::Iterator CQF::begin(uint32_t cutoff) const { - QFi qfi; - qf_iterator(&this->cqf, &qfi, 0); - // Skip past the cutoff - do { - uint64_t key = 0, value = 0, count = 0; - qfi_get(&qfi, &key, &value, &count); - if (count < cutoff) - qfi_next(&qfi); - else - break; - } while(!qfi_end(&qfi)); - - return Iterator(qfi, cutoff, UINT64_MAX); -} - -template -typename CQF::Iterator CQF::end(uint32_t cutoff) const { - QFi qfi; - qf_iterator(&this->cqf, &qfi, 0xffffffffffffffff); - return Iterator(qfi, cutoff, UINT64_MAX); -} - -template -typename CQF::Iterator CQF::limits(uint64_t start_hash, - uint64_t end_hash, - uint32_t cutoff) const { - QFi qfi; - qf_iterator_hash(&this->cqf, &qfi, start_hash); - // Skip past the cutoff - do { - uint64_t key = 0, value = 0, count = 0; - qfi_get(&qfi, &key, &value, &count); - if (count < cutoff) - qfi_next(&qfi); - else - break; - } while(!qfi_end(&qfi)); - - return Iterator(qfi, cutoff, end_hash); -} -#endif diff --git a/include/cqf/gqf.h b/include/cqf/gqf.h index cc954e1..e69de29 100644 --- a/include/cqf/gqf.h +++ b/include/cqf/gqf.h @@ -1,198 +0,0 @@ -#ifndef QF_H -#define QF_H - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - - /* Can be - 0 (choose size at run-time), - 8, 16, 32, or 64 (for optimized versions), - or other integer <= 56 (for compile-time-optimized bit-shifting-based versions) - */ -#define BITS_PER_SLOT 0 - -#define BITMASK(nbits) ((nbits) == 64 ? 0xffffffffffffffff : (1ULL << (nbits)) \ - - 1ULL) - - struct __attribute__ ((__packed__)) qfblock; - typedef struct qfblock qfblock; - - enum lock { - LOCK_NO_SPIN, - LOCK_AND_SPIN, - NO_LOCK - }; - - typedef struct { - uint64_t total_time_single; - uint64_t total_time_spinning; - uint64_t locks_taken; - uint64_t locks_acquired_single_attempt; - } wait_time_data; - - typedef struct quotient_filter_mem { - int fd; - volatile int metadata_lock; - volatile int *locks; - wait_time_data *wait_times; - } quotient_filter_mem; - - typedef quotient_filter_mem qfmem; - - typedef struct quotient_filter_metadata { - uint64_t size; - uint32_t seed; - uint64_t nslots; - uint64_t xnslots; - uint64_t key_bits; - uint64_t value_bits; - uint64_t key_remainder_bits; - uint64_t bits_per_slot; - __uint128_t range; - uint64_t nblocks; - uint64_t nelts; - uint64_t ndistinct_elts; - uint64_t noccupied_slots; - uint64_t num_locks; - } quotient_filter_metadata; - - typedef quotient_filter_metadata qfmetadata; - - typedef struct quotient_filter { - qfmem *mem; - qfmetadata *metadata; - qfblock *blocks; - } quotient_filter; - - typedef quotient_filter QF; - - typedef struct { - uint64_t start_index; - uint16_t length; - } cluster_data; - - typedef struct quotient_filter_iterator { - const QF *qf; - uint64_t run; - uint64_t current; - uint64_t cur_start_index; - uint16_t cur_length; - uint32_t num_clusters; - cluster_data *c_info; - } quotient_filter_iterator; - - typedef quotient_filter_iterator QFi; - - /* Forward declaration for the macro. */ - void qf_dump_metadata(const QF *qf); - -#define DEBUG_CQF(fmt, ...) \ - do { if (PRINT_DEBUG) fprintf(stderr, fmt, __VA_ARGS__); } while (0) - -#define DEBUG_DUMP(qf) \ - do { if (PRINT_DEBUG) qf_dump_metadata(qf); } while (0) - - void qf_init(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t - value_bits, bool mem, const char *path, uint32_t seed); - - void qf_reset(QF *qf); - - void qf_destroy(QF *qf, bool mem); - - void qf_copy(QF *dest, QF *src); - - /* Increment the counter for this key/value pair by count. */ - bool qf_insert(QF *qf, uint64_t key, uint64_t value, uint64_t count, - enum lock flag); - - /* Remove count instances of this key/value combination. */ - void qf_remove(QF *qf, uint64_t key, uint64_t value, uint64_t count, enum - lock flag); - - /* Remove all instances of this key/value pair. */ - void qf_delete_key_value(QF *qf, uint64_t key, uint64_t value); - - /* Remove all instances of this key. */ - void qf_delete_key(QF *qf, uint64_t key); - - /* Replace the association (key, oldvalue, count) with the association - (key, newvalue, count). If there is already an association (key, - newvalue, count'), then the two associations will be merged and - their counters will be summed, resulting in association (key, - newvalue, count' + count). */ - void qf_replace(QF *qf, uint64_t key, uint64_t oldvalue, uint64_t newvalue); - - /* Lookup the value associated with key. Returns the count of that - key/value pair in the QF. If it returns 0, then, the key is not - present in the QF. Only returns the first value associated with key - in the QF. If you want to see others, use an iterator. */ - uint64_t qf_query(const QF *qf, uint64_t key, uint64_t *value); - - /* Return the number of times key has been inserted, with any value, - into qf. */ - uint64_t qf_count_key(const QF *qf, uint64_t key); - - /* Return the number of times key has been inserted, with the given - value, into qf. */ - uint64_t qf_count_key_value(const QF *qf, uint64_t key, uint64_t value); - - /* Initialize an iterator */ - bool qf_iterator(const QF *qf, QFi *qfi, uint64_t position); - - /* Initialize an iterator and position it at the smallest index containing a - * hash value greater than or equal to "hash". */ - bool qf_iterator_hash(const QF *qf, QFi *qfi, uint64_t hash); - - /* Returns 0 if the iterator is still valid (i.e. has not reached the - end of the QF. */ - int qfi_get(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t *count); - - /* Advance to next entry. Returns whether or not another entry is - found. */ - int qfi_next(QFi *qfi); - int qfi_nextx(QFi *qfi, uint64_t* read_offset); - - /* Check to see if the if the end of the QF */ - int qfi_end(const QFi *qfi); - - /* For debugging */ - void qf_dump(const QF *); - - /* write data structure of to the disk */ - void qf_serialize(const QF *qf, const char *filename); - - /* read data structure off the disk */ - void qf_deserialize(QF *qf, const char *filename); - - /* mmap the QF from disk. */ - void qf_read(QF *qf, const char *path); - - /* merge two QFs into the third one. */ - void qf_merge(QF *qfa, QF *qfb, QF *qfc, enum lock flag); - - /* merge multiple QFs into the final QF one. */ - void qf_multi_merge(QF *qf_arr[], int nqf, QF *qfr, enum lock flag); - - /* find cosine similarity between two QFs. */ - uint64_t qf_inner_product(QF *qfa, QF *qfb); - - /* magnitude of a QF. */ - uint64_t qf_magnitude(QF *qf); - - /* use madvice to drop pages corresponding to blocks from start_idx to - * last_slot_idx. */ - void qf_drop_pages(const QF *qf, uint64_t start_idx, uint64_t end_idx); - - /* return the addr of the slot in qfblock */ - const unsigned char *qf_get_addr(const QF *qf, uint64_t idx); - -#ifdef __cplusplus -} -#endif - -#endif /* QF_H */ diff --git a/include/deltaManager.h b/include/deltaManager.h new file mode 100644 index 0000000..91ebbf2 --- /dev/null +++ b/include/deltaManager.h @@ -0,0 +1,262 @@ +// +// Created by Fatemeh Almodaresi on 8/17/18. +// + +#ifndef MANTIS_DELTAMANAGER_H +#define MANTIS_DELTAMANAGER_H + +#include +#include +#include +//#include +#include +#include +#include "sdsl/bit_vectors.hpp" + +struct DeltaManagerException : public std::exception { +private: + std::string message_; +public: + + DeltaManagerException(const std::string& message) : message_(message) {} + const char * what () const throw () { + return message_.c_str(); + } +}; + +class DeltaManager { +public: + + DeltaManager(uint64_t numSamplesIn, + uint64_t approximateColorClsCnt, + uint64_t approximateAvgDeltaCntPerColorCls) : numSamples(numSamplesIn) { + slotWidth = ceil(log2(numSamples)); + if (slotWidth * (numSamples+1) < 64) { + slotsPerColorCls = numSamples + 1; + } else if (slotWidth * (approximateAvgDeltaCntPerColorCls+1) < 64) { + slotsPerColorCls = (64 / slotWidth + 1) + 1; + } else { + slotsPerColorCls = approximateAvgDeltaCntPerColorCls + 1; // 1 for storing count of deltas per index + } + deltas.reserve(approximateColorClsCnt * slotsPerColorCls); + // assumption: count of slots * their width is greater than 64 bits + slotsPerColorClsWithPtrs = ((slotsPerColorCls - 1) * slotWidth - 64) / slotWidth + 1; + colorCnt = 0; + } + + void insertDeltas(uint64_t colorId, const std::vector &dlta) { + + // see if we need to split deltas between main DS and heap + // TODO not an expected behaviour, but we have no choice + //if (colorId > colorCnt) throw DeltaManagerException("colorId > colorCnt"); + + auto startBit = colorId*slotsPerColorCls*slotWidth; + auto nextStartBit = (colorId+1)*slotsPerColorCls*slotWidth; + // We always want to assign slotsPerColorCls slots to each index + // even if deltas in that index are fewer than the avg num of deltas + if (colorId >= colorCnt) { + while (deltas.size() < nextStartBit/64+1) { + deltas.push_back(0); + } + colorCnt = colorId+1; + } else { + // take care of deleting the pointer in case of previously creating one here: + deletePtr(colorId); + } + + // now assuming we've already reserved the space for the new colorId, insert deltas + uint64_t mainDSDeltaCnt = dlta.size() < slotsPerColorCls ? dlta.size() : slotsPerColorClsWithPtrs - 1; + if (dlta.size() > numSamples) { + std::string msg = "number of deltas is greater than num_samples. val:"+ + std::to_string(dlta.size())+ + " num_samples:" +std::to_string(numSamples); + throw DeltaManagerException(msg); + } + insertValIntoDeltaV(startBit, dlta.size(), slotWidth); // insert count of deltas + totDeltaCnt += dlta.size(); + startBit += slotWidth; + for (uint64_t i = 0; i < mainDSDeltaCnt; i++) { // insert values into main DS + if (dlta[i] >= numSamples) { + std::string msg = "delta index is larger than num_samples. val:"+ + std::to_string(dlta[i])+ + " num_samples:" +std::to_string(numSamples); + throw DeltaManagerException(msg); + } + insertValIntoDeltaV(startBit, dlta[i], slotWidth); + startBit += slotWidth; + } + if (mainDSDeltaCnt < dlta.size()) { // in case count of deltas exceeds the reserved count + // store the rest in an array in heap + uint64_t *theRest = new uint64_t[(uint64_t)std::ceil((double)((dlta.size() - mainDSDeltaCnt) * slotWidth) / 64.0)](); + insertValIntoDeltaV(startBit, reinterpret_cast(theRest), + 64); // store the pointer to the heap in the main DS + insertValIntoHeap(dlta, mainDSDeltaCnt, theRest, slotWidth); + } + } + + std::vector getDeltas(uint64_t colorId) { + if (colorId > colorCnt) throw DeltaManagerException("colorId > colorCnt"); + std::vector res; + uint64_t startBit = colorId * slotWidth * slotsPerColorCls; // index for next color + uint64_t deltaCnt = getValFromMDeltaV(startBit, slotWidth); + uint64_t mainDSDeltaCnt = deltaCnt < slotsPerColorCls ? deltaCnt : slotsPerColorClsWithPtrs - 1; + startBit += slotWidth; + for (uint64_t i = 0; i < mainDSDeltaCnt; i++) { // insert values into main DS + uint64_t delta = getValFromMDeltaV(startBit, slotWidth); + res.push_back(delta); + startBit += slotWidth; + } + if (mainDSDeltaCnt < deltaCnt) { // in case count of deltas exceeds the reserved count + // fetch the pointer to the heap in the main DS + uint64_t *theRestV = reinterpret_cast(getValFromMDeltaV(startBit, 64)); + getValFromHeap(res, deltaCnt - mainDSDeltaCnt, theRestV, slotWidth); + } + return res; + } + + void swapDeltas(uint64_t colorId1, uint64_t colorId2) { + std::vector c1deltas = getDeltas(colorId1); + std::vector c2deltas = getDeltas(colorId2); + insertDeltas(colorId1, c2deltas); + insertDeltas(colorId2, c1deltas); + } + + bool serialize(std::string prefix) { + std::cerr << "\nSerializing Deltas:\nTotal delta count:" << + totDeltaCnt << " Total color count:" << colorCnt << "\n"; + + std::string deltabv_file = prefix + "/deltas.bv"; + std::string boundarybv_file = prefix + "/boundary.bv"; + + sdsl::int_vector<> deltabv(totDeltaCnt, 0, slotWidth); + sdsl::bit_vector boundarybv(totDeltaCnt, 0); + uint64_t j = 1; + boundarybv[0] = 1; // TODO careful to add an if in case we're gonna change zero to something other than 0 + for (uint64_t i = 1; i < colorCnt; i++) { + auto dltas = getDeltas(i); + for (auto dlt : dltas) { + deltabv[j] = dlt; + j++; + } + boundarybv[j-1] = 1; + } + std::cerr << "\n"; + bool deltabvSuccessfullyStored = sdsl::store_to_file(deltabv, deltabv_file); + bool boundarybvSuccessfullyStored = sdsl::store_to_file(boundarybv, boundarybv_file); + + return deltabvSuccessfullyStored and boundarybvSuccessfullyStored; + } + + uint64_t getDeltaCnt() {return totDeltaCnt;} + +private: + uint64_t numSamples; + uint64_t slotWidth; + uint64_t slotsPerColorCls; + uint64_t slotsPerColorClsWithPtrs; + std::vector deltas; + uint64_t colorCnt; + uint64_t totDeltaCnt{0}; + + void deletePtr(uint64_t colorId) { + uint64_t startBit = colorId * slotWidth * slotsPerColorCls; // index for next color + uint64_t deltaCnt = getValFromMDeltaV(startBit, slotWidth); // get num of deltas in this slot + totDeltaCnt -= deltaCnt; + uint64_t mainDSDeltaCnt = deltaCnt < slotsPerColorCls ? deltaCnt : slotsPerColorClsWithPtrs - 1; + if (mainDSDeltaCnt < deltaCnt) { // in case count of deltas exceeds the reserved count + // fetch the pointer to the heap in the main DS and DELETE it + startBit += slotWidth * (mainDSDeltaCnt + 1); + uint64_t *theRestV = reinterpret_cast(getValFromMDeltaV(startBit, 64)); + delete theRestV; + } + } + // width is limited to 64 (word size) + bool insertValIntoDeltaV(uint64_t startBit, uint64_t val, uint64_t width) { + uint64_t mask = width < 64? (((uint64_t)1 << width) - 1) : -1; + if (startBit >= deltas.size() * 64) throw DeltaManagerException("startBit exceeds bit_size"); + uint64_t &wrd = deltas[startBit / 64]; + uint64_t startBitInwrd = startBit % 64; + wrd &= ~(mask << startBitInwrd); + wrd |= (val << startBitInwrd); + uint64_t shiftRight = 64 - startBitInwrd; + if (shiftRight < width) { + if (startBit >= (deltas.size() - 1) * 64) throw DeltaManagerException("startBit exceeds bit_size"); + auto &nextWrd = deltas[startBit/64+1]; + nextWrd &= ~(mask >> shiftRight); + nextWrd |= (val >> shiftRight); + } + + return true; + } + + // width is limited to 64 (word size) + uint64_t getValFromMDeltaV(uint64_t startBit, uint64_t width) { + if (startBit >= deltas.size() * 64) throw DeltaManagerException("startBit exceeds bit_size"); + uint64_t mask = width < 64? (((uint64_t)1 << width) - 1) : -1; + uint64_t res{0}; + uint64_t wrd = deltas[startBit / 64]; + uint64_t startBitInwrd = startBit % 64; + res |= (mask & (wrd >> startBitInwrd)); + + uint64_t shiftLeft = 64 - startBitInwrd; + if (shiftLeft < width) { + if (startBit >= (deltas.size() - 1) * 64) throw DeltaManagerException("startBit exceeds bit_size"); + wrd = deltas[startBit / 64 + 1]; + res |= (mask & (wrd << shiftLeft)); + } + return res; + } + + bool insertValIntoHeap(const std::vector &dlta, + uint64_t startIdx, + uint64_t *vecPtr, + uint64_t width) { + uint64_t startBit{0}; + for (uint64_t i = startIdx; i < dlta.size(); i++) { + uint64_t val = dlta[i]; + if (width == slotWidth and val >= numSamples) { + std::string msg = "delta index is larger than num_samples. val:"+ + std::to_string(val)+ + " num_samples:" +std::to_string(numSamples); + throw DeltaManagerException(msg); + } + + uint64_t &wrd = vecPtr[startBit / 64]; + uint64_t startBitInwrd = startBit % 64; + wrd |= (val << startBitInwrd); + uint64_t shiftRight = 64 - startBitInwrd; + + if (shiftRight < width) { + auto &nextWrd = vecPtr[startBit / 64 + 1]; + nextWrd |= (val >> shiftRight); + } + startBit += width; + } + return true; + } + + bool getValFromHeap(std::vector &dlta, + uint64_t cnt, + uint64_t *vecPtr, + uint64_t width) { + uint64_t startBit{0}; + uint64_t mask = width < 64? (((uint64_t)1 << width) - 1) : -1; + for (uint64_t i = 0; i < cnt; i++) { + uint64_t res{0}; + uint64_t wrd = vecPtr[startBit / 64]; + uint64_t startBitInwrd = startBit % 64; + res |= (mask & (wrd >> startBitInwrd)); + uint64_t shiftRight = 64 - startBitInwrd; + if (shiftRight < width) { + wrd = vecPtr[startBit / 64 + 1]; + res |= (mask & (wrd << shiftRight)); + } + startBit += width; + dlta.push_back(res); + } + return true; + } + +}; + +#endif //MANTIS_DELTAMANAGER_H diff --git a/include/gqf/gqf.h b/include/gqf/gqf.h new file mode 100644 index 0000000..8fe0e7a --- /dev/null +++ b/include/gqf/gqf.h @@ -0,0 +1,331 @@ +/* + * ============================================================================ + * + * Authors: Prashant Pandey + * Rob Johnson + * + * ============================================================================ + */ + +#ifndef _GQF_H_ +#define _GQF_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + typedef struct quotient_filter quotient_filter; + typedef quotient_filter QF; + + /* CQFs support three hashing modes: + + - DEFAULT uses a hash that may introduce false positives, but + this can be useful when inserting large keys that need to be + hashed down to a small fingerprint. With this type of hash, + you can iterate over the hash values of all the keys in the + CQF, but you cannot iterate over the keys themselves. + + - INVERTIBLE has no false positives, but the size of the hash + output must be the same as the size of the hash input, + e.g. 17-bit keys hashed to 17-bit outputs. So this mode is + generally only useful when storing small keys in the CQF. With + this hashing mode, you can use iterators to enumerate both all + the hashes in the CQF, or all the keys. + + - NONE, for when you've done the hashing yourself. WARNING: the + CQF can exhibit very bad performance if you insert a skewed + distribution of intputs. + */ + + enum qf_hashmode { + QF_HASH_DEFAULT, + QF_HASH_INVERTIBLE, + QF_HASH_NONE + }; + + /* The CQF supports concurrent insertions and queries. Only the + portion of the CQF being examined or modified is locked, so it + supports high throughput even with many threads. + + The CQF operations support 3 locking modes: + + - NO_LOCK: for single-threaded applications or applications + that do their own concurrency management. + + - WAIT_FOR_LOCK: Spin until you get the lock, then do the query + or update. + + - TRY_ONCE_LOCK: If you can't grab the lock on the first try, + return with an error code. + */ +#define QF_NO_LOCK (0x01) +#define QF_TRY_ONCE_LOCK (0x02) +#define QF_WAIT_FOR_LOCK (0x04) + + /* It is sometimes useful to insert a key that has already been + hashed. */ +#define QF_KEY_IS_HASH (0x08) + + /****************************************** + The CQF defines low-level constructor and destructor operations + that are designed to enable the application to manage the memory + used by the CQF. + *******************************************/ + + /* + * Create an empty CQF in "buffer". If there is not enough space at + * buffer then it will return the total size needed in bytes to + * initialize the CQF. This function takes ownership of buffer. + */ + uint64_t qf_init(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t + value_bits, enum qf_hashmode hash, uint32_t seed, void* + buffer, uint64_t buffer_len); + + /* Create a CQF in "buffer". Note that this does not initialize the + contents of bufferss Use this function if you have read a CQF, e.g. + off of disk or network, and want to begin using that stream of + bytes as a CQF. The CQF takes ownership of buffer. */ + uint64_t qf_use(QF* qf, void* buffer, uint64_t buffer_len); + + /* Destroy this CQF. Returns a pointer to the memory that the CQF was + using (i.e. passed into qf_init or qf_use) so that the application + can release that memory. */ + void *qf_destroy(QF *qf); + + /* Allocate a new CQF using "nslots" at "buffer" and copy elements from "qf" + * into it. + * If there is not enough space at buffer then it will return the total size + * needed in bytes to initialize the new CQF. + * */ + uint64_t qf_resize(QF* qf, uint64_t nslots, void* buffer, uint64_t + buffer_len); + + /*********************************** + The following convenience functions create and destroy CQFs by + using malloc/free to obtain and release the memory for the CQF. + ************************************/ + + /* Initialize the CQF and allocate memory for the CQF. */ + bool qf_malloc(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t + value_bits, enum qf_hashmode hash, uint32_t seed); + + bool qf_free(QF *qf); + + /* Resize the QF to the specified number of slots. Uses malloc() to + * obtain the new memory, and calls free() on the old memory. + * Return value: + * >= 0: number of keys copied during resizing. + * */ + int64_t qf_resize_malloc(QF *qf, uint64_t nslots); + + /* Turn on automatic resizing. Resizing is performed by calling + qf_resize_malloc, so the CQF must meet the requirements of that + function. */ + void qf_set_auto_resize(QF* qf, bool enabled); + + /*********************************** + Functions for modifying the CQF. + ***********************************/ + +#define QF_NO_SPACE (-1) +#define QF_COULDNT_LOCK (-2) +#define QF_DOESNT_EXIST (-3) + + /* Increment the counter for this key/value pair by count. + * Return value: + * >= 0: distance from the home slot to the slot in which the key is + * inserted (or 0 if count == 0). + * == QF_NO_SPACE: the CQF has reached capacity. + * == QF_COULDNT_LOCK: TRY_ONCE_LOCK has failed to acquire the lock. + */ + int qf_insert(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t + flags); + + /* Set the counter for this key/value pair to count. + Return value: Same as qf_insert. + Returns 0 if new count is equal to old count. + */ + int qf_set_count(QF *qf, uint64_t key, uint64_t value, uint64_t count, + uint8_t flags); + + /* Remove up to count instances of this key/value combination. + * If the CQF contains <= count instances, then they will all be + * removed, which is not an error. + * Return value: + * >= 0: number of slots freed. + * == QF_DOESNT_EXIST: Specified item did not exist. + * == QF_COULDNT_LOCK: TRY_ONCE_LOCK has failed to acquire the lock. + */ + int qf_remove(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t + flags); + + /* Remove all instances of this key/value pair. */ + int qf_delete_key_value(QF *qf, uint64_t key, uint64_t value, uint8_t flags); + + /* Remove all instances of this key. */ + /* NOT IMPLEMENTED YET. */ + //void qf_delete_key(QF *qf, uint64_t key); + + /* Replace the association (key, oldvalue, count) with the association + (key, newvalue, count). If there is already an association (key, + newvalue, count'), then the two associations will be merged and + their counters will be summed, resulting in association (key, + newvalue, count' + count). */ + /* NOT IMPLEMENTED YET. */ + //void qf_replace(QF *qf, uint64_t key, uint64_t oldvalue, uint64_t newvalue); + + /**************************************** + Query functions + ****************************************/ + + /* Lookup the value associated with key. Returns the count of that + key/value pair in the QF. If it returns 0, then, the key is not + present in the QF. Only returns the first value associated with key + in the QF. If you want to see others, use an iterator. + May return QF_COULDNT_LOCK if called with QF_TRY_LOCK. */ + uint64_t qf_query(const QF *qf, uint64_t key, uint64_t *value, uint8_t + flags); + + /* Return the number of times key has been inserted, with any value, + into qf. */ + /* NOT IMPLEMENTED YET. */ + //uint64_t qf_count_key(const QF *qf, uint64_t key); + + /* Return the number of times key has been inserted, with the given + value, into qf. + May return QF_COULDNT_LOCK if called with QF_TRY_LOCK. */ + uint64_t qf_count_key_value(const QF *qf, uint64_t key, uint64_t value, + uint8_t flags); + + /* Returns a unique index corresponding to the key in the CQF. Note + that this can change if further modifications are made to the + CQF. + + If the key is not found then returns QF_DOESNT_EXIST. + May return QF_COULDNT_LOCK if called with QF_TRY_LOCK. + */ + int64_t qf_get_unique_index(const QF *qf, uint64_t key, uint64_t value, + uint8_t flags); + + + /**************************************** + Metadata accessors. + ****************************************/ + + /* Hashing info */ + enum qf_hashmode qf_get_hashmode(const QF *qf); + uint64_t qf_get_hash_seed(const QF *qf); + __uint128_t qf_get_hash_range(const QF *qf); + + /* Space usage info. */ + bool qf_is_auto_resize_enabled(const QF *qf); + uint64_t qf_get_total_size_in_bytes(const QF *qf); + uint64_t qf_get_nslots(const QF *qf); + uint64_t qf_get_num_occupied_slots(const QF *qf); + + /* Bit-sizes info. */ + uint64_t qf_get_num_key_bits(const QF *qf); + uint64_t qf_get_num_value_bits(const QF *qf); + uint64_t qf_get_num_key_remainder_bits(const QF *qf); + uint64_t qf_get_bits_per_slot(const QF *qf); + + /* Number of (distinct) key-value pairs. */ + uint64_t qf_get_sum_of_counts(const QF *qf); + uint64_t qf_get_num_distinct_key_value_pairs(const QF *qf); + + /**************************************** + Iterators + *****************************************/ + + typedef struct quotient_filter_iterator quotient_filter_iterator; + typedef quotient_filter_iterator QFi; + +#define QF_INVALID (-4) +#define QFI_INVALID (-5) + + /* Initialize an iterator starting at the given position. + * Return value: + * >= 0: iterator is initialized and positioned at the returned slot. + * = QFI_INVALID: iterator has reached end. + */ + int64_t qf_iterator_from_position(const QF *qf, QFi *qfi, uint64_t position); + + /* Initialize an iterator and position it at the smallest index + * containing a key-value pair whose hash is greater than or equal + * to the specified key-value pair. + * Return value: + * >= 0: iterator is initialized and position at the returned slot. + * = QFI_INVALID: iterator has reached end. + */ + int64_t qf_iterator_from_key_value(const QF *qf, QFi *qfi, uint64_t key, + uint64_t value, uint8_t flags); + + /* Requires that the hash mode of the CQF is INVERTIBLE or NONE. + * If the hash mode is DEFAULT then returns QF_INVALID. + * Return value: + * = 0: Iterator is still valid. + * = QFI_INVALID: iterator has reached end. + * = QF_INVALID: hash mode is QF_DEFAULT_HASH + */ + int qfi_get_key(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t + *count); + + /* Return value: + * = 0: Iterator is still valid. + * = QFI_INVALID: iterator has reached end. + */ + int qfi_get_hash(const QFi *qfi, uint64_t *hash, uint64_t *value, uint64_t + *count); + + /* Advance to next entry. + * Return value: + * = 0: Iterator is still valid. + * = QFI_INVALID: iterator has reached end. + */ + int qfi_next(QFi *qfi); + + /* Check to see if the if the end of the QF */ + bool qfi_end(const QFi *qfi); + + /************************************ + Miscellaneous convenience functions. + *************************************/ + + /* Reset the CQF to an empty filter. */ + void qf_reset(QF *qf); + + /* The caller should call qf_init on the dest QF using the same + * parameters as the src QF before calling this function. Note: src + * and dest must be exactly the same, including number of slots. */ + void qf_copy(QF *dest, const QF *src); + + /* merge two QFs into the third one. Note: merges with any existing + values in qfc. */ + void qf_merge(const QF *qfa, const QF *qfb, QF *qfc); + + /* merge multiple QFs into the final QF one. */ + void qf_multi_merge(const QF *qf_arr[], int nqf, QF *qfr); + + /* find cosine similarity between two QFs. */ + uint64_t qf_inner_product(const QF *qfa, const QF *qfb); + + /* square of the L_2 norm of a QF (i.e. sum of squares of counts of + all items in the CQF). */ + uint64_t qf_magnitude(const QF *qf); + + /*********************************** + Debugging functions. + ************************************/ + + void qf_dump(const QF *); + void qf_dump_metadata(const QF *qf); + + +#ifdef __cplusplus +} +#endif + +#endif /* _GQF_H_ */ diff --git a/include/gqf/gqf_file.h b/include/gqf/gqf_file.h new file mode 100644 index 0000000..c4fdf87 --- /dev/null +++ b/include/gqf/gqf_file.h @@ -0,0 +1,45 @@ +/* + * ============================================================================ + * + * Authors: Prashant Pandey + * Rob Johnson + * + * ============================================================================ + */ + +#ifndef _GQF_FILE_H_ +#define _GQF_FILE_H_ + +#include +#include +#include + +#include "gqf.h" + +#ifdef __cplusplus +extern "C" { +#endif + + /* Initialize a file-backed CQF at "filename". */ + bool qf_initfile(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t + value_bits, enum qf_hashmode hash, uint32_t seed, char* + filename, int prot); + + /* Read "filename" into "qf". */ + uint64_t qf_usefile(QF* qf, const char* filename, int prot); + + bool qf_closefile(QF* qf); + + bool qf_deletefile(QF* qf); + + /* write data structure of to the disk */ + uint64_t qf_serialize(const QF *qf, const char *filename); + + /* read data structure off the disk */ + uint64_t qf_deserialize(QF *qf, const char *filename); + +#ifdef __cplusplus +} +#endif + +#endif // _GQF_FILE_H_ diff --git a/include/gqf/gqf_int.h b/include/gqf/gqf_int.h new file mode 100644 index 0000000..ca34e8d --- /dev/null +++ b/include/gqf/gqf_int.h @@ -0,0 +1,133 @@ +/* + * ============================================================================ + * + * Authors: Prashant Pandey + * Rob Johnson + * + * ============================================================================ + */ + +#ifndef _GQF_INT_H_ +#define _GQF_INT_H_ + +#include +#include + +#include "gqf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAGIC_NUMBER 1018874902021329732 + +/* Can be + 0 (choose size at run-time), + 8, 16, 32, or 64 (for optimized versions), + or other integer <= 56 (for compile-time-optimized bit-shifting-based versions) +*/ +#define QF_BITS_PER_SLOT 0 + +/* Must be >= 6. 6 seems fastest. */ +#define QF_BLOCK_OFFSET_BITS (6) + +#define QF_SLOTS_PER_BLOCK (1ULL << QF_BLOCK_OFFSET_BITS) +#define QF_METADATA_WORDS_PER_BLOCK ((QF_SLOTS_PER_BLOCK + 63) / 64) + + typedef struct __attribute__ ((__packed__)) qfblock { + /* Code works with uint16_t, uint32_t, etc, but uint8_t seems just as fast as + * anything else */ + uint8_t offset; + uint64_t occupieds[QF_METADATA_WORDS_PER_BLOCK]; + uint64_t runends[QF_METADATA_WORDS_PER_BLOCK]; + +#if QF_BITS_PER_SLOT == 8 + uint8_t slots[QF_SLOTS_PER_BLOCK]; +#elif QF_BITS_PER_SLOT == 16 + uint16_t slots[QF_SLOTS_PER_BLOCK]; +#elif QF_BITS_PER_SLOT == 32 + uint32_t slots[QF_SLOTS_PER_BLOCK]; +#elif QF_BITS_PER_SLOT == 64 + uint64_t slots[QF_SLOTS_PER_BLOCK]; +#elif QF_BITS_PER_SLOT != 0 + uint8_t slots[QF_SLOTS_PER_BLOCK * QF_BITS_PER_SLOT / 8]; +#else + uint8_t slots[]; +#endif + } qfblock; + + typedef struct file_info { + int fd; + char *filepath; + } file_info; + + // The below struct is used to instrument the code. + // It is not used in normal operations of the CQF. + typedef struct { + uint64_t total_time_single; + uint64_t total_time_spinning; + uint64_t locks_taken; + uint64_t locks_acquired_single_attempt; + } wait_time_data; + + typedef struct quotient_filter_runtime_data { + file_info f_info; + uint64_t num_locks; + volatile int metadata_lock; + volatile int *locks; + wait_time_data *wait_times; + } quotient_filter_runtime_data; + + typedef quotient_filter_runtime_data qfruntime; + + typedef struct quotient_filter_metadata { + uint64_t magic_endian_number; + enum qf_hashmode hash_mode; + uint32_t auto_resize; + uint64_t total_size_in_bytes; + uint32_t seed; + uint64_t nslots; + uint64_t xnslots; + uint64_t key_bits; + uint64_t value_bits; + uint64_t key_remainder_bits; + uint64_t bits_per_slot; + __uint128_t range; + uint64_t nblocks; + uint64_t nelts; + uint64_t ndistinct_elts; + uint64_t noccupied_slots; + } quotient_filter_metadata; + + typedef quotient_filter_metadata qfmetadata; + + typedef struct quotient_filter { + qfruntime *runtimedata; + qfmetadata *metadata; + qfblock *blocks; + } quotient_filter; + + typedef quotient_filter QF; + + // The below struct is used to instrument the code. + // It is not used in normal operations of the CQF. + typedef struct { + uint64_t start_index; + uint16_t length; + } cluster_data; + + typedef struct quotient_filter_iterator { + const QF *qf; + uint64_t run; + uint64_t current; + uint64_t cur_start_index; + uint16_t cur_length; + uint32_t num_clusters; + cluster_data *c_info; + } quotient_filter_iterator; + +#ifdef __cplusplus +} +#endif + +#endif /* _GQF_INT_H_ */ diff --git a/include/gqf/hashutil.h b/include/gqf/hashutil.h new file mode 100644 index 0000000..06d1b1a --- /dev/null +++ b/include/gqf/hashutil.h @@ -0,0 +1,42 @@ +/* + * ============================================================================ + * + * Authors: Prashant Pandey + * Rob Johnson + * + * ============================================================================ + */ + +#ifndef _HASHUTIL_H_ +#define _HASHUTIL_H_ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// MurmurHash2 +uint32_t MurmurHash(const void *buf, size_t length, uint32_t seed); +uint64_t MurmurHash64B ( const void * key, int len, unsigned int + seed ); +uint64_t MurmurHash64A ( const void * key, int len, unsigned int + seed ); +__uint128_t MurmurHash128A ( const void * key, int len, unsigned + int seed, unsigned int seed2 ); + +// AES hash +uint64_t AES_HASH(uint64_t x); + +uint64_t hash_64(uint64_t key, uint64_t mask); +uint64_t hash_64i(uint64_t key, uint64_t mask); + +#ifdef __cplusplus +} +#endif + +#endif // #ifndef _HASHUTIL_H_ + + diff --git a/include/gqf_cpp.h b/include/gqf_cpp.h new file mode 100644 index 0000000..c64e703 --- /dev/null +++ b/include/gqf_cpp.h @@ -0,0 +1,232 @@ +/* + * ============================================================================ + * + * Authors: Prashant Pandey + * Rob Johnson + * Rob Patro (rob.patro@cs.stonybrook.edu) + * + * ============================================================================ + */ + +#ifndef _CQF_H_ +#define _CQF_H_ + +#include +#include +#include + +#include +#include +#include +#include + +#include "gqf/gqf.h" +#include "gqf/gqf_int.h" +#include "gqf/gqf_file.h" +#include "util.h" + +#define NUM_HASH_BITS 24 +#define NUM_Q_BITS 16 +#define SEED 2038074761 + +enum readmode { + CQF_MMAP, + CQF_FREAD +}; + +template +class CQF { + public: + CQF(); + CQF(uint64_t q_bits, uint64_t key_bits, enum qf_hashmode hash, uint32_t seed); + CQF(std::string& filename, enum readmode flag); + CQF(const CQF& copy_cqf); + + int insert(const key_obj& k, uint8_t flags); + + /* Will return the count. */ + uint64_t query(const key_obj& k, uint8_t flags); + + uint64_t inner_prod(const CQF& in_cqf); + + void serialize(std::string filename) { + qf_serialize(&cqf, filename.c_str()); + } + + void set_auto_resize(void) { qf_set_auto_resize(&cqf, true); } + int64_t get_unique_index(const key_obj& k, uint8_t flags) const { + return qf_get_unique_index(&cqf, k.key, k.value, flags); + } + + bool is_exact(void) const; + enum qf_hashmode hash_mode(void) const { return cqf.metadata->hash_mode; } + bool check_similarity(const CQF *other_cqf) const; + + const QF* get_cqf(void) const { return &cqf; } + uint64_t range(void) const { return cqf.metadata->range; } + uint32_t seed(void) const { return cqf.metadata->seed; } + uint64_t numslots(void) const { return cqf.metadata->nslots; } + uint32_t keybits(void) const { return cqf.metadata->key_bits; } + uint64_t total_elts(void) const { return cqf.metadata->nelts; } + uint64_t dist_elts(void) const { return cqf.metadata->ndistinct_elts; } + //uint64_t set_size(void) const { return set.size(); } + void reset(void) { qf_reset(&cqf); } + + void dump_metadata(void) const { qf_dump_metadata(&cqf); } + + void drop_pages(uint64_t cur); + + class Iterator { + public: + Iterator(QFi it); + key_obj operator*(void) const; + void operator++(void); + bool done(void) const; + + key_obj get_cur_hash(void) const; + + QFi iter; + private: + uint64_t end_hash; + }; + + Iterator begin(void) const; + Iterator end(void) const; + + private: + QF cqf; + //std::unordered_set set; +}; + +class KeyObject { + public: + KeyObject() : key(0), value(0), count(0) {}; + + KeyObject(uint64_t k, uint64_t v, uint64_t c) : key(k), + value(v), count(c) {}; + + KeyObject(const KeyObject& k) : key(k.key), value(k.value), count(k.count) {}; + + bool operator==(KeyObject k) { return key == k.key; } + + typedef uint64_t kmer_t; + kmer_t key; + uint64_t value; + uint64_t count; +}; + +template +CQF::CQF() { + if (!qf_malloc(&cqf, 1ULL << NUM_Q_BITS, NUM_HASH_BITS, 0, QF_HASH_DEFAULT, + SEED)) { + ERROR("Can't allocate the CQF"); + exit(EXIT_FAILURE); + } +} + +template +CQF::CQF(uint64_t q_bits, uint64_t key_bits, enum qf_hashmode hash, + uint32_t seed) { + if (!qf_malloc(&cqf, 1ULL << q_bits, key_bits, 0, hash, SEED)) { + ERROR("Can't allocate the CQF"); + exit(EXIT_FAILURE); + } +} + +template +CQF::CQF(std::string& filename, enum readmode flag) { + uint64_t size = 0; + if (flag == CQF_MMAP) + size = qf_usefile(&cqf, filename.c_str(), PROT_READ); + else + size = qf_deserialize(&cqf, filename.c_str()); + + if (size == 0) { + ERROR("Can't read/deserialize the CQF"); + exit(EXIT_FAILURE); + } +} + +template CQF::CQF(const CQF& copy_cqf) { + memcpy(reinterpret_cast(&cqf), + reinterpret_cast(const_cast(©_cqf.cqf)), sizeof(QF)); +} + +template +int CQF::insert(const key_obj& k, uint8_t flags) { + return qf_insert(&cqf, k.key, k.value, k.count, flags); + // To validate the CQF + //set.insert(k.key); +} + +template +uint64_t CQF::query(const key_obj& k, uint8_t flags) { + return qf_count_key_value(&cqf, k.key, k.value, flags); +} + +template +uint64_t CQF::inner_prod(const CQF& in_cqf) { + return qf_inner_product(&cqf, in_cqf.get_cqf()); +} + +template +bool CQF::is_exact(void) const { + if (cqf.metadata->hash_mode == QF_HASH_INVERTIBLE) + return true; + return false; +} + +template +bool CQF::check_similarity(const CQF *other_cqf) const { + if (hash_mode() != other_cqf->hash_mode() || seed() != other_cqf->seed() || + keybits() != other_cqf->keybits() || range() != other_cqf->range()) + return false; + return true; +} + +template +CQF::Iterator::Iterator(QFi it) + : iter(it) {}; + +template +key_obj CQF::Iterator::operator*(void) const { + uint64_t key = 0, value = 0, count = 0; + qfi_get_key(&iter, &key, &value, &count); + return key_obj(key, value, count); +} + +template +key_obj CQF::Iterator::get_cur_hash(void) const { + uint64_t key = 0, value = 0, count = 0; + qfi_get_hash(&iter, &key, &value, &count); + return key_obj(key, value, count); +} + +template +void CQF::Iterator::operator++(void) { + qfi_next(&iter); +} + +/* Currently, the iterator only traverses forward. So, we only need to check + * the right side limit. + */ +template +bool CQF::Iterator::done(void) const { + return qfi_end(&iter); +} + +template +typename CQF::Iterator CQF::begin(void) const { + QFi qfi; + qf_iterator_from_position(&this->cqf, &qfi, 0); + return Iterator(qfi); +} + +template +typename CQF::Iterator CQF::end(void) const { + QFi qfi; + qf_iterator_from_position(&this->cqf, &qfi, 0xffffffffffffffff); + return Iterator(qfi, UINT64_MAX); +} + +#endif diff --git a/include/hashutil.h b/include/hashutil.h deleted file mode 100644 index 472d458..0000000 --- a/include/hashutil.h +++ /dev/null @@ -1,36 +0,0 @@ -/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -#ifndef _HASHUTIL_H_ -#define _HASHUTIL_H_ - -#include -#include -#include -#include - -class HashUtil { - public: - - // MurmurHash2 - static uint32_t MurmurHash(const void *buf, size_t length, uint32_t seed = - 0); - static uint32_t MurmurHash(const std::string &s, uint32_t seed = 0); - static uint64_t MurmurHash64B ( const void * key, int len, unsigned int - seed ); - static uint64_t MurmurHash64A ( const void * key, int len, unsigned int - seed ); - static __uint128_t MurmurHash128A ( const void * key, int len, unsigned - int seed, unsigned int seed2 ); - - // AES hash - static uint64_t AES_HASH(uint64_t x); - - static uint64_t hash_64(uint64_t key, uint64_t mask); - static uint64_t hash_64i(uint64_t key, uint64_t mask); - - private: - HashUtil(); -}; - -#endif // #ifndef _HASHUTIL_H_ - - diff --git a/include/kmer.h b/include/kmer.h index c7dbdea..97f4bfb 100644 --- a/include/kmer.h +++ b/include/kmer.h @@ -22,7 +22,6 @@ #include #include -#include "hashutil.h" #include "common_types.h" #define BITMASK(nbits) ((nbits) == 64 ? 0xffffffffffffffff : (1ULL << (nbits)) \ @@ -33,18 +32,15 @@ using namespace std; class Kmer { public: - static inline char map_int(uint8_t base); - static inline uint8_t map_base(char base); - static uint64_t str_to_int(string str); - static string int_to_str(uint64_t kmer, uint64_t kmer_size); - static inline int reverse_complement_base(int x); - static uint64_t reverse_complement(uint64_t kmer, uint64_t kmer_size); - static inline bool compare_kmers(uint64_t kmer, uint64_t kmer_rev); - static inline unsigned __int128 word_reverse_complement(unsigned __int128 w); - static inline int64_t word_reverse_complement(uint64_t w); - static inline uint32_t word_reverse_complement(uint32_t w); + static char map_int(uint8_t base); + static uint8_t map_base(char base); + static __int128_t str_to_int(std::string str); + static std::string int_to_str(__int128_t kmer, uint64_t kmer_size); + static int reverse_complement_base(int x); + static __int128_t reverse_complement(__int128_t kmer, uint64_t kmer_size); + static bool compare_kmers(__int128_t kmer, __int128_t kmer_rev); + static mantis::QuerySets parse_kmers(const char *filename, - uint32_t seed, uint64_t range, uint64_t kmer_size, uint64_t& total_kmers); static std::string generate_random_string(uint64_t len); diff --git a/include/lru/cache-tags.hpp b/include/lru/cache-tags.hpp new file mode 100644 index 0000000..924a669 --- /dev/null +++ b/include/lru/cache-tags.hpp @@ -0,0 +1,40 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_CACHE_TAGS_HPP +#define LRU_CACHE_TAGS_HPP + +namespace LRU { +namespace Tag { +struct BasicCache {}; +struct TimedCache {}; +} // namespace Tag + +namespace Lowercase { +namespace tag { +using basic_cache = ::LRU::Tag::BasicCache; +using timed_cache = ::LRU::Tag::TimedCache; +} // namespace tag +} // namespace Lowercase + +} // namespace LRU + +#endif // LRU_CACHE_TAGS_HPP diff --git a/include/lru/cache.hpp b/include/lru/cache.hpp new file mode 100644 index 0000000..7150bfc --- /dev/null +++ b/include/lru/cache.hpp @@ -0,0 +1,207 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_CACHE_HPP +#define LRU_CACHE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace LRU { +namespace Internal { +template +using UntimedCacheBase = Internal::BaseCache; +} // namespace Internal + +/// A basic LRU cache implementation. +/// +/// An LRU cache is a fixed-size cache that remembers the order in which +/// elements were inserted into it. When the size of the cache exceeds its +/// capacity, the "least-recently-used" (LRU) element is erased. In our +/// implementation, usage is defined as insertion, but not lookup. That is, +/// looking up an element does not move it to the "front" of the cache (making +/// the operation faster). Only insertions (and erasures) can change the order +/// of elements. The capacity of the cache can be modified at any time. +/// +/// \see LRU::TimedCache +template , + typename KeyEqual = std::equal_to> +class Cache + : public Internal::UntimedCacheBase { + private: + using super = Internal::UntimedCacheBase; + using PRIVATE_BASE_CACHE_MEMBERS; + + public: + using PUBLIC_BASE_CACHE_MEMBERS; + using typename super::size_t; + + /// \copydoc BaseCache::BaseCache(size_t,const HashFunction&,const KeyEqual&) + /// \detailss The capacity defaults to an internal constant, currently 128. + explicit Cache(size_t capacity = Internal::DEFAULT_CAPACITY, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(capacity, hash, equal) { + } + + /// \copydoc BaseCache(size_t,Iterator,Iterator,const HashFunction&,const + /// KeyEqual&) + template + Cache(size_t capacity, + Iterator begin, + Iterator end, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(capacity, begin, end, hash, equal) { + } + + /// \copydoc BaseCache(Iterator,Iterator,const HashFunction&,const + /// KeyEqual&) + template + Cache(Iterator begin, + Iterator end, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(begin, end, hash, equal) { + } + + /// Constructor. + /// + /// \param capacity The capacity of the cache. + /// \param range A range to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + template > + Cache(size_t capacity, + Range&& range, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(capacity, std::forward(range), hash, equal) { + } + + /// Constructor. + /// + /// \param range A range to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + template > + explicit Cache(Range&& range, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(std::forward(range), hash, equal) { + } + + /// \copydoc BaseCache(InitializerList,const HashFunction&,const + /// KeyEqual&) + Cache(InitializerList list, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) // NOLINT(runtime/explicit) + : super(list, hash, equal) { + } + + /// \copydoc BaseCache(size_t,InitializerList,const HashFunction&,const + /// KeyEqual&) + Cache(size_t capacity, + InitializerList list, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) // NOLINT(runtime/explicit) + : super(capacity, list, hash, equal) { + } + + /// \copydoc BaseCache::find(const Key&) + UnorderedIterator find(const Key& key) override { + auto iterator = _map.find(key); + if (iterator != _map.end()) { + _register_hit(key, iterator->second.value); + _move_to_front(iterator->second.order); + _last_accessed = iterator; + } else { + _register_miss(key); + } + + return {*this, iterator}; + } + + /// \copydoc BaseCache::find(const Key&) const + UnorderedConstIterator find(const Key& key) const override { + auto iterator = _map.find(key); + if (iterator != _map.end()) { + _register_hit(key, iterator->second.value); + _move_to_front(iterator->second.order); + _last_accessed = iterator; + } else { + _register_miss(key); + } + + return {*this, iterator}; + } + + /// \returns The most-recently inserted element. + const Key& front() const noexcept { + if (is_empty()) { + throw LRU::Error::EmptyCache("front"); + } else { + // The queue is reversed for natural order of iteration. + return _order.back(); + } + } + + /// \returns The least-recently inserted element. + const Key& back() const noexcept { + if (is_empty()) { + throw LRU::Error::EmptyCache("back"); + } else { + // The queue is reversed for natural order of iteration. + return _order.front(); + } + } +}; + +namespace Lowercase { +template +using cache = Cache; +} // namespace Lowercase + +} // namespace LRU + +#endif // LRU_CACHE_HPP diff --git a/include/lru/entry.hpp b/include/lru/entry.hpp new file mode 100644 index 0000000..425d7eb --- /dev/null +++ b/include/lru/entry.hpp @@ -0,0 +1,134 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_PAIR_HPP +#define LRU_PAIR_HPP + +#include +#include +#include + +namespace LRU { + +/// A entry of references to the key and value of an entry in a cache. +/// +/// Instances of this class are usually the result of dereferencing an iterator. +/// +/// \tparam Key The key type of the pair. +/// \tparam Value The value type of the pair. +template +struct Entry final { + using KeyType = Key; + using ValueType = Value; + using first_type = Key; + using second_type = Value; + + /// Constructor. + /// + /// \param key The key of the entry. + /// \param value The value of the entry. + Entry(const Key& key, Value& value) : first(key), second(value) { + } + + /// Generalized copy constructor. + /// + /// Mainly for conversion from non-const values to const values. + /// + /// \param other The entry to construct from. + template ::value && + std::is_convertible::value)>> + Entry(const Entry& other) + : first(other.first), second(other.second) { + } + + /// Compares two entrys for equality. + /// + /// \param first The first entry to compare. + /// \param second The second entry to compare. + /// \returns True if the firest entry equals the second, else false. + template + friend bool operator==(const Entry& first, const Pair& second) noexcept { + return first.first == second.first && first.second == second.second; + } + + /// Compares two entrys for equality. + /// + /// \param first The first entry to compare. + /// \param second The second entry to compare. + /// \returns True if the first entry equals the second, else false. + template + friend bool operator==(const Pair& first, const Entry& second) noexcept { + return second == first; + } + + /// Compares two entrys for inequality. + /// + /// \param first The first entry to compare. + /// \param second The second entry to compare. + /// \returns True if the first entry does not equal the second, else false. + template + friend bool operator!=(const Entry& first, const Pair& second) noexcept { + return !(first == second); + } + + /// Compares two entrys for inequality. + /// + /// \param first The first entry to compare. + /// \param second The second entry to compare.fdas + /// \returns True if the first entry does not equal the second, else false. + template + friend bool operator!=(const Pair& first, const Entry& second) noexcept { + return second != first; + } + + /// \returns A `std::pair` instance with the key and value of this entry. + operator std::pair() noexcept { + return {first, second}; + } + + /// \returns The key of the entry (`first`). + const Key& key() const noexcept { + return first; + } + + /// \returns The value of the entry (`second`). + Value& value() noexcept { + return second; + } + + /// \returns The value of the entry (`second`). + const Value& value() const noexcept { + return second; + } + + /// The key of the entry. + const Key& first; + + /// The value of the entry. + Value& second; +}; +} // namespace LRU + + +#endif // LRU_PAIR_HPP diff --git a/include/lru/error.hpp b/include/lru/error.hpp new file mode 100644 index 0000000..9d1c865 --- /dev/null +++ b/include/lru/error.hpp @@ -0,0 +1,107 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_ERRORS_HPP +#define LRU_INTERNAL_ERRORS_HPP + +#include +#include + +namespace LRU { +namespace Error { + +/// Exception thrown when the value of an invalid key was requested. +struct KeyNotFound : public std::runtime_error { + using super = std::runtime_error; + + KeyNotFound() : super("Failed to find key") { + } + + explicit KeyNotFound(const std::string& key) + : super("Failed to find key: " + key) { + } +}; + +/// Exception thrown when the value of an expired key was requested. +struct KeyExpired : public std::runtime_error { + using super = std::runtime_error; + + explicit KeyExpired(const std::string& key) + : super("Key found, but expired: " + key) { + } + + KeyExpired() : super("Key found, but expired") { + } +}; + +/// Exception thrown when requesting the front or end key of an empty cache. +struct EmptyCache : public std::runtime_error { + using super = std::runtime_error; + explicit EmptyCache(const std::string& what_was_expected) + : super("Requested " + what_was_expected + " of empty cache") { + } +}; + +/// Exception thrown when attempting to convert an invalid unordered iterator to +/// an ordered iterator. +struct InvalidIteratorConversion : public std::runtime_error { + using super = std::runtime_error; + InvalidIteratorConversion() + : super("Cannot convert past-the-end unordered to ordered iterator") { + } +}; + +/// Exception thrown when attempting to erase the past-the-end iterator. +struct InvalidIterator : public std::runtime_error { + using super = std::runtime_error; + InvalidIterator() : super("Past-the-end iterator is invalid here") { + } +}; + +/// Exception thrown when requesting statistics about an unmonitored key. +struct UnmonitoredKey : public std::runtime_error { + using super = std::runtime_error; + UnmonitoredKey() : super("Requested statistics for unmonitored key") { + } +}; + +/// Exception thrown when requesting the statistics object of a cache when none +/// was registered. +struct NotMonitoring : public std::runtime_error { + using super = std::runtime_error; + NotMonitoring() : super("Statistics monitoring not enabled for this cache") { + } +}; + +namespace Lowercase { +using key_not_found = KeyNotFound; +using key_expired = KeyExpired; +using empty_cache = EmptyCache; +using invalid_iterator_conversion = InvalidIteratorConversion; +using invalid_iterator = InvalidIterator; +using unmonitored_key = UnmonitoredKey; +using not_monitoring = NotMonitoring; +} // namespace Lowercase + +} // namespace Error +} // namespace LRU + +#endif // LRU_INTERNAL_ERRORS_HPP diff --git a/include/lru/insertion-result.hpp b/include/lru/insertion-result.hpp new file mode 100644 index 0000000..846bc08 --- /dev/null +++ b/include/lru/insertion-result.hpp @@ -0,0 +1,76 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INSERTION_RESULT_HPP +#define LRU_INSERTION_RESULT_HPP + +#include +#include +#include + +namespace LRU { + +/// The result of an insertion into a cache. +/// +/// This is a semantically nicer alternative to a generic `std::pair`, as is +/// returned by `std::unordered_map` or so. It still has the same static +/// interface as the `std::pair` (with `first` and `second` members), but adds +/// nicer `was_inserted()` and `iterator()` accessors. +/// +/// \tparam Iterator The class of the iterator contained in the result. +template +struct InsertionResult final { + using IteratorType = Iterator; + + /// Constructor. + /// + /// \param result Whether the result was successful. + /// \param iterator The iterator pointing to the inserted or updated key. + InsertionResult(bool result, Iterator iterator) + : first(result), second(iterator) { + } + + /// \returns True if the key was newly inserted, false if it was only updated. + bool was_inserted() const noexcept { + return first; + } + + /// \returns The iterator pointing to the inserted or updated key. + Iterator iterator() const noexcept { + return second; + } + + /// \copydoc was_inserted + explicit operator bool() const noexcept { + return was_inserted(); + } + + /// Whether the result was successful. + bool first; + + /// The iterator pointing to the inserted or updated key. + Iterator second; +}; + +} // namespace LRU + + +#endif // LRU_INSERTION_RESULT_HPP diff --git a/include/lru/internal/base-cache.hpp b/include/lru/internal/base-cache.hpp new file mode 100644 index 0000000..fe49a72 --- /dev/null +++ b/include/lru/internal/base-cache.hpp @@ -0,0 +1,1588 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_BASE_CACHE_HPP +#define LRU_INTERNAL_BASE_CACHE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace LRU { +namespace Internal { + +// Macros are bad, but also more readable sometimes: +// Without this macro, it becomes a pain to have a `using` directive for every +// new member we add to the `BaseCache` and rename or remove every such +// directive when we make a change to the `BaseCache`. +// With this macro, you can simply do: +// using super = BaseCache; +// using BASE_CACHE_MEMBERS; +#define PUBLIC_BASE_CACHE_MEMBERS \ + super::is_full; \ + using super::is_empty; \ + using super::clear; \ + using super::end; \ + using super::cend; \ + using super::operator=; \ + using typename super::Information; \ + using typename super::UnorderedIterator; \ + using typename super::UnorderedConstIterator; \ + using typename super::OrderedIterator; \ + using typename super::OrderedConstIterator; \ + using typename super::InitializerList; + +#define PRIVATE_BASE_CACHE_MEMBERS \ + super::_map; \ + using typename super::Map; \ + using typename super::MapIterator; \ + using typename super::MapConstIterator; \ + using typename super::Queue; \ + using typename super::QueueIterator; \ + using super::_order; \ + using super::_last_accessed; \ + using super::_capacity; \ + using super::_erase; \ + using super::_erase_lru; \ + using super::_move_to_front; \ + using super::_value_from_result; \ + using super::_last_accessed_is_ok; \ + using super::_register_miss; \ + using super::_register_hit; + +/// The base class for the LRU::Cache and LRU::TimedCache. +/// +/// This base class (base as opposed to abstract, because it is not intended to +/// be used polymorphically) provides the great bulk of the implementation of +/// both the LRU::Cache and the timed version. For example, it builds the +/// `contains()`, `lookup()` and `operator[]()` functions on top of the pure +/// virtual `find()` methods, making the final implementation of the LRU::Cache +/// much less strenuous. +/// +/// This class also defines all concrete iterator classes and provides the main +/// iterator interface of all caches via ordered and unordered iterators and +/// appropriate `begin()`, `end()` and similar methods. +/// +/// Lastly, the `BaseCache` provides a statistics interface to register and +/// access shared or owned statistics. +/// +/// \tparam Key The key type of the cache. +/// \tparam Value The value type of the cache. +/// \tparam InformationType The internal information class to be used. +/// \tparam HashFunction The hash function type for the internal map. +/// \tparam KeyEqual The type of the key equality function for the internal map. +/// \tparam TagType The cache tag type of the concrete derived class. +template class InformationType, + typename HashFunction, + typename KeyEqual, + typename TagType> +class BaseCache { + protected: + using Information = InformationType; + using Queue = Internal::Queue; + using QueueIterator = typename Queue::const_iterator; + + using Map = Internal::Map; + using MapIterator = typename Map::iterator; + using MapConstIterator = typename Map::const_iterator; + + using CallbackManagerType = CallbackManager; + using HitCallback = typename CallbackManagerType::HitCallback; + using MissCallback = typename CallbackManagerType::MissCallback; + using AccessCallback = typename CallbackManagerType::AccessCallback; + using HitCallbackContainer = + typename CallbackManagerType::HitCallbackContainer; + using MissCallbackContainer = + typename CallbackManagerType::MissCallbackContainer; + using AccessCallbackContainer = + typename CallbackManagerType::AccessCallbackContainer; + + public: + using Tag = TagType; + using InitializerList = std::initializer_list>; + using StatisticsPointer = std::shared_ptr>; + using size_t = std::size_t; + + static constexpr Tag tag() noexcept { + return {}; + } + + ///////////////////////////////////////////////////////////////////////////// + // ITERATORS CLASSES + ///////////////////////////////////////////////////////////////////////////// + + /// A non-const unordered iterator. + /// + /// Unordered iterators provide faster lookup than ordered iterators because + /// they have direct access to the underlying map. Also, they can convert to + /// ordered iterators cheaply. + struct UnorderedIterator + : public BaseUnorderedIterator { + using super = BaseUnorderedIterator; + friend BaseCache; + + /// Default constructor. + UnorderedIterator() = default; + + /// Constructs a new UnorderedIterator from an unordered base iterator. + /// + /// \param iterator The iterator to initialize this one from. + UnorderedIterator(BaseUnorderedIterator + iterator) // NOLINT(runtime/explicit) + : super(std::move(iterator)) { + // Note that this only works because these derived iterator + // classes dont' have any members of their own. + // It is necessary because the increment operators return base iterators. + } + + /// Constructs a new UnorderedIterator. + /// + /// \param cache The cache this iterator references. + /// \param iterator The underlying map iterator. + UnorderedIterator(BaseCache& cache, + MapIterator iterator) // NOLINT(runtime/explicit) + : super(cache, iterator) { + } + }; + + /// A const unordered iterator. + /// + /// Unordered iterators provide faster lookup than ordered iterators because + /// they have direct access to the underlying map. Also, they can convert to + /// ordered iterators cheaply. + struct UnorderedConstIterator + : public BaseUnorderedIterator { + using super = BaseUnorderedIterator; + friend BaseCache; + + /// Default constructor. + UnorderedConstIterator() = default; + + /// Constructs a new UnorderedConstIterator from any unordered base + /// iterator. + /// + /// \param iterator The iterator to initialize this one from. + template + UnorderedConstIterator( + BaseUnorderedIterator iterator) + : super(std::move(iterator)) { + // Note that this only works because these derived iterator + // classes dont' have any members of their own. + } + + /// Constructs a new UnorderedConstIterator from a non-const iterator. + /// + /// \param iterator The non-const iterator to initialize this one from. + UnorderedConstIterator( + UnorderedIterator iterator) // NOLINT(runtime/explicit) + : super(std::move(iterator)) { + } + + /// Constructs a new UnorderedConstIterator. + /// + /// \param cache The cache this iterator references. + /// \param iterator The underlying map iterator. + UnorderedConstIterator( + const BaseCache& cache, + MapConstIterator iterator) // NOLINT(runtime/explicit) + : super(cache, iterator) { + } + }; + + /// An ordered iterator. + /// + /// Ordered iterators have a performance disadvantage compared to unordered + /// iterators the first time they are dereferenced. However, they may be + /// constructed or assigned from unordered iterators (of compatible + /// qualifiers). + struct OrderedIterator + : public BaseOrderedIterator { + using super = BaseOrderedIterator; + using UnderlyingIterator = typename super::UnderlyingIterator; + friend BaseCache; + + /// Default constructor. + OrderedIterator() = default; + + /// Constructs an ordered iterator from an unordered iterator. + /// + /// \param unordered_iterator The unordered iterator to construct from. + explicit OrderedIterator(UnorderedIterator unordered_iterator) + : super(std::move(unordered_iterator)) { + } + + /// Constructs a new OrderedIterator from an unordered base iterator. + /// + /// \param iterator The iterator to initialize this one from. + OrderedIterator(BaseOrderedIterator + iterator) // NOLINT(runtime/explicit) + : super(std::move(iterator)) { + // Note that this only works because these derived iterator + // classes dont' have any members of their own. + // It is necessary because the increment operators return base iterators. + } + + /// Constructs a new ordered iterator. + /// + /// \param cache The cache this iterator references. + /// \param iterator The underlying iterator. + OrderedIterator(BaseCache& cache, UnderlyingIterator iterator) + : super(cache, iterator) { + } + }; + + /// A const ordered iterator. + /// + /// Ordered iterators have a performance disadvantage compared to unordered + /// iterators the first time they are dereferenced. However, they may be + /// constructed or assigned from unordered iterators (of compatible + /// qualifiers). + struct OrderedConstIterator + : public BaseOrderedIterator { + using super = BaseOrderedIterator; + using UnderlyingIterator = typename super::UnderlyingIterator; + + friend BaseCache; + + /// Default constructor. + OrderedConstIterator() = default; + + /// Constructs a new OrderedConstIterator from a compatible ordered + /// iterator. + /// + /// \param iterator The iterator to initialize this one from. + template + OrderedConstIterator(BaseOrderedIterator + iterator) // NOLINT(runtime/explicit) + : super(iterator) { + // Note that this only works because these derived iterator + // classes dont' have any members of their own. + } + + /// Constructs a new const ordered iterator from a non-const one. + /// + /// \param iterator The non-const ordered iterator to construct from. + OrderedConstIterator(OrderedIterator iterator) // NOLINT(runtime/explicit) + : super(std::move(iterator)) { + } + + /// Constructs a new const ordered iterator from an unordered iterator. + /// + /// \param unordered_iterator The unordered iterator to construct from. + explicit OrderedConstIterator(UnorderedIterator unordered_iterator) + : super(std::move(unordered_iterator)) { + } + + /// Constructs a new const ordered iterator from a const unordered iterator. + /// + /// \param unordered_iterator The unordered iterator to construct from. + explicit OrderedConstIterator( + UnorderedConstIterator unordered_iterator) // NOLINT(runtime/explicit) + : super(std::move(unordered_iterator)) { + } + + /// Constructs a new const ordered iterator. + /// + /// \param cache The cache this iterator references. + /// \param iterator The underlying iterator. + OrderedConstIterator(const BaseCache& cache, UnderlyingIterator iterator) + : super(cache, iterator) { + } + }; + + using InsertionResultType = InsertionResult; + + // Can't put these in LRU::Lowercase because they are nested, unfortunately + using ordered_iterator = OrderedIterator; + using ordered_const_iterator = OrderedConstIterator; + using unordered_iterator = UnorderedIterator; + using unordered_const_iterator = UnorderedConstIterator; + + ///////////////////////////////////////////////////////////////////////////// + // SPECIAL MEMBER FUNCTIONS + ///////////////////////////////////////////////////////////////////////////// + + /// Constructor. + /// + /// \param capacity The capacity of the cache. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + BaseCache(size_t capacity, + const HashFunction& hash, + const KeyEqual& key_equal) + : _map(0, hash, key_equal), _capacity(capacity), _last_accessed(key_equal) { + } + + /// Constructor. + /// + /// \param capacity The capacity of the cache. + /// \param begin The start of a range to construct the cache with. + /// \param end The end of a range to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + template + BaseCache(size_t capacity, + Iterator begin, + Iterator end, + const HashFunction& hash, + const KeyEqual& key_equal) + : BaseCache(capacity, hash, key_equal) { + insert(begin, end); + } + + /// Constructor. + /// + /// The capacity is inferred from the distance between the two iterators and + /// lower-bounded by an internal constant $c_0$, usually 128 (i.e. the actual + /// capacity will be $\max(\text{distance}, c_0)$). + /// This may be expensive for iterators that are not random-access. + /// + /// \param begin The start of a range to construct the cache with. + /// \param end The end of a range to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + template + BaseCache(Iterator begin, + Iterator end, + const HashFunction& hash, + const KeyEqual& key_equal) + // This may be expensive + : BaseCache(std::max(std::distance(begin, end), + Internal::DEFAULT_CAPACITY), + begin, + end, + hash, + key_equal) { + } + + /// Constructor. + /// + /// \param capacity The capacity of the cache. + /// \param range A range to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + template > + BaseCache(size_t capacity, + Range& range, + const HashFunction& hash, + const KeyEqual& key_equal) + : BaseCache(capacity, hash, key_equal) { + insert(range); + } + + /// Constructor. + /// + /// The capacity is inferred from the distance between the beginning and end + /// of the range. This may be expensive for iterators that are not + /// random-access. + /// + /// \param range A range to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + template > + explicit BaseCache(Range& range, + const HashFunction& hash, + const KeyEqual& key_equal) + : BaseCache(std::begin(range), std::end(range), hash, key_equal) { + } + + /// Constructor. + /// + /// Elements of the range will be moved into the cache. + /// + /// \param capacity The capacity of the cache. + /// \param range A range to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + template > + BaseCache(size_t capacity, + Range&& range, + const HashFunction& hash, + const KeyEqual& key_equal) + : BaseCache(capacity, hash, key_equal) { + insert(std::move(range)); + } + + /// Constructor. + /// + /// The capacity is inferred from the distance between the beginning and end + /// of the range. This may be expensive for iterators that are not + /// random-access. + /// + /// Elements of the range will be moved into the cache. + /// + /// \param range A range to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + template > + explicit BaseCache(Range&& range, + const HashFunction& hash, + const KeyEqual& key_equal) + : BaseCache(std::distance(std::begin(range), std::end(range)), + std::move(range), + hash, + key_equal) { + } + + /// Constructor. + /// + /// \param capacity The capacity of the cache. + /// \param list The initializer list to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + BaseCache(size_t capacity, + InitializerList list, + const HashFunction& hash, + const KeyEqual& key_equal) + : BaseCache(capacity, list.begin(), list.end(), hash, key_equal) { + } + + /// Constructor. + /// + /// \param list The initializer list to construct the cache with. + /// \param hash The hash function to use for the internal map. + /// \param key_equal The key equality function to use for the internal map. + BaseCache(InitializerList list, + const HashFunction& hash, + const KeyEqual& key_equal) // NOLINT(runtime/explicit) + : BaseCache(list.size(), list.begin(), list.end(), hash, key_equal) { + } + + /// Copy constructor. + BaseCache(const BaseCache& other) + : _map(other._map) + , _order(other._order) + , _stats(other._stats) + , _last_accessed(other._last_accessed) + , _callback_manager(other._callback_manager) + , _capacity(other._capacity) { + _reassign_references(); + } + + /// Move constructor. + BaseCache(BaseCache&& other) { + // Following the copy-swap idiom. + swap(other); + } + + /// Copy assignment operator. + BaseCache& operator=(const BaseCache& other) noexcept { + if (this != &other) { + _map = other._map; + _order = other._order; + _stats = other._stats; + _last_accessed = other._last_accessed; + _callback_manager = other._callback_manager; + _capacity = other._capacity; + _reassign_references(); + } + + return *this; + } + + /// Move assignment operator. + BaseCache& operator=(BaseCache&& other) noexcept { + // Following the copy-swap idiom. + swap(other); + return *this; + } + + /// Destructor. + virtual ~BaseCache() = default; + + /// Sets the contents of the cache to a range. + /// + /// If the size of the range is greater than the current capacity, + /// the capacity is increased to match the range's size. If the size of + /// the range is less than the current capacity, the cache's capacity is *not* + /// changed. + /// + /// \param range A range of pairs to assign to the cache. + /// \returns The cache instance. + template > + BaseCache& operator=(const Range& range) { + _clear_and_increase_capacity(range); + insert(range); + return *this; + } + + /// Sets the contents of the cache to an rvalue range. + /// + /// Pairs of the range are moved into the cache. + /// + /// \param range A range of pairs to assign to the cache. + /// \returns The cache instance. + template > + BaseCache& operator=(Range&& range) { + _clear_and_increase_capacity(range); + insert(std::move(range)); + return *this; + } + + /// Sets the contents of the cache to pairs from a list. + /// + /// \param list The list to assign to the cache. + /// \returns The cache instance. + BaseCache& operator=(InitializerList list) { + return operator=(list); + } + + /// Swaps the contents of the cache with another cache. + /// + /// \param other The other cache to swap with. + virtual void swap(BaseCache& other) noexcept { + using std::swap; + + swap(_order, other._order); + swap(_map, other._map); + swap(_last_accessed, other._last_accessed); + swap(_capacity, other._capacity); + } + + /// Swaps the contents of one cache with another cache. + /// + /// \param first The first cache to swap. + /// \param second The second cache to swap. + friend void swap(BaseCache& first, BaseCache& second) noexcept { + first.swap(second); + } + + /// Compares the cache for equality with another cache. + /// + /// \complexity O(N) + /// \param other The other cache to compare with. + /// \returns True if the keys __and values__ of the cache are identical to the + /// other, else false. + bool operator==(const BaseCache& other) const noexcept { + if (this == &other) return true; + if (this->_map != other._map) return false; + // clang-format off + return std::equal( + this->_order.begin(), + this->_order.end(), + other._order.begin(), + other._order.end(), + [](const auto& first, const auto& second) { + return first.get() == second.get(); + }); + // clang-format on + } + + /// Compares the cache for inequality with another cache. + /// + /// \complexity O(N) + /// \param other The other cache to compare with. + /// \returns True if there is any mismatch in keys __or their values__ + /// betweent + /// the two caches, else false. + bool operator!=(const BaseCache& other) const noexcept { + return !(*this == other); + } + + ///////////////////////////////////////////////////////////////////////////// + // ITERATOR INTERFACE + ///////////////////////////////////////////////////////////////////////////// + + /// \returns An unordered iterator to the beginning of the cache (this need + /// not be the first key inserted). + UnorderedIterator unordered_begin() noexcept { + return {*this, _map.begin()}; + } + + /// \returns A const unordered iterator to the beginning of the cache (this + /// need not be the key least recently inserted). + UnorderedConstIterator unordered_begin() const noexcept { + return unordered_cbegin(); + } + + /// \returns A const unordered iterator to the beginning of the cache (this + /// need not be the key least recently inserted). + UnorderedConstIterator unordered_cbegin() const noexcept { + return {*this, _map.cbegin()}; + } + + /// \returns An unordered iterator to the end of the cache (this + /// need not be one past the key most recently inserted). + UnorderedIterator unordered_end() noexcept { + return {*this, _map.end()}; + } + + /// \returns A const unordered iterator to the end of the cache (this + /// need not be one past the key most recently inserted). + UnorderedConstIterator unordered_end() const noexcept { + return unordered_cend(); + } + + /// \returns A const unordered iterator to the end of the cache (this + /// need not be one past the key most recently inserted). + UnorderedConstIterator unordered_cend() const noexcept { + return {*this, _map.cend()}; + } + + /// \returns An ordered iterator to the beginning of the cache (the key least + /// recently inserted). + OrderedIterator ordered_begin() noexcept { + return {*this, _order.begin()}; + } + + /// \returns A const ordered iterator to the beginning of the cache (the key + /// least recently inserted). + OrderedConstIterator ordered_begin() const noexcept { + return ordered_cbegin(); + } + + /// \returns A const ordered iterator to the beginning of the cache (the key + /// least recently inserted). + OrderedConstIterator ordered_cbegin() const noexcept { + return {*this, _order.cbegin()}; + } + + /// \returns An ordered iterator to the end of the cache (one past the key + /// most recently inserted). + OrderedIterator ordered_end() noexcept { + return {*this, _order.end()}; + } + + /// \returns A const ordered iterator to the end of the cache (one past the + /// key least recently inserted). + OrderedConstIterator ordered_end() const noexcept { + return ordered_cend(); + } + + /// \returns A const ordered iterator to the end of the cache (one past the + /// key least recently inserted). + OrderedConstIterator ordered_cend() const noexcept { + return {*this, _order.cend()}; + } + + /// \copydoc unordered_begin() + UnorderedIterator begin() noexcept { + return unordered_begin(); + } + + /// \copydoc unordered_cbegin() + UnorderedConstIterator begin() const noexcept { + return cbegin(); + } + + /// \copydoc unordered_cbegin() + UnorderedConstIterator cbegin() const noexcept { + return unordered_begin(); + } + + /// \copydoc unordered_end() const + UnorderedIterator end() noexcept { + return unordered_end(); + } + + /// \copydoc unordered_cend() const + UnorderedConstIterator end() const noexcept { + return cend(); + } + + /// \copydoc unordered_cend() const + UnorderedConstIterator cend() const noexcept { + return unordered_cend(); + } + + /// \returns True if the given iterator may be safely dereferenced, else + /// false. + /// \details Behavior is undefined if the iterator does not point into this + /// cache. + /// \param unordered_iterator The iterator to check. + virtual bool is_valid(UnorderedConstIterator unordered_iterator) const + noexcept { + return unordered_iterator != unordered_end(); + } + + /// \returns True if the given iterator may be safely dereferenced, else + /// false. + /// \details Behavior is undefined if the iterator does not point into this + /// cache. + /// \param ordered_iterator The iterator to check. + virtual bool is_valid(OrderedConstIterator ordered_iterator) const noexcept { + return ordered_iterator != ordered_end(); + } + + /// Checks if the given iterator may be dereferencend and throws an exception + /// if not. + /// + /// The exception thrown, if any, depends on the state of the iterator. + /// + /// \param unordered_iterator The iterator to check. + /// \throws LRU::Error::InvalidIterator if the iterator is the end iterator. + virtual void + throw_if_invalid(UnorderedConstIterator unordered_iterator) const { + if (unordered_iterator == unordered_end()) { + throw LRU::Error::InvalidIterator(); + } + } + + /// Checks if the given iterator may be dereferencend and throws an exception + /// if not. + /// + /// The exception thrown, if any, depends on the state of the iterator. + /// + /// \param ordered_iterator The iterator to check. + /// \throws LRU::Error::InvalidIterator if the iterator is the end iterator. + virtual void throw_if_invalid(OrderedConstIterator ordered_iterator) const { + if (ordered_iterator == ordered_end()) { + throw LRU::Error::InvalidIterator(); + } + } + + ///////////////////////////////////////////////////////////////////////////// + // CACHE INTERFACE + ///////////////////////////////////////////////////////////////////////////// + + /// Tests if the given key is contained in the cache. + /// + /// This function may return false even if the key is actually currently + /// stored in the cache, but the concrete cache class places some additional + /// constraint as to when a key may be accessed (such as a time limit). + /// + /// \complexity O(1) expected and amortized. + /// \param key The key to check for. + /// \returns True if the key's value may be accessed via `lookup()` without an + /// error, else false. + virtual bool contains(const Key& key) const { + if (key == _last_accessed) { + if (_last_accessed_is_ok(key)) { + _register_hit(key, _last_accessed.value()); + // If this is the last accessed key, it's at the front anyway + return true; + } else { + return false; + } + } + + return find(key) != end(); + } + + /// Looks up the value for the given key. + /// + /// If the key is found in the cache, it is moved to the front. Any iterators + /// pointing to that key are still valid, but the subsequent order of + /// iteration may be different from what it was before. + /// + /// \complexity O(1) expected and amortized. + /// \param key The key whose value to look for. + /// \throws LRU::Error::KeyNotFound if the key's value may not be accessed. + /// \returns The value stored in the cache for the given key. + /// \see contains() + virtual const Value& lookup(const Key& key) const { + if (key == _last_accessed) { + auto& value = _value_for_last_accessed(); + _register_hit(key, value); + // If this is the last accessed key, it's at the front anyway + return value; + } + + auto iterator = find(key); + if (iterator == end()) { + throw LRU::Error::KeyNotFound(); + } else { + return iterator.value(); + } + } + + /// Looks up the value for the given key. + /// + /// If the key is found in the cache, it is moved to the front. Any iterators + /// pointing to that key are still valid, but the subsequent order of + /// iteration may be different from what it was before. + /// + /// \complexity O(1) expected and amortized. + /// \param key The key whose value to look for. + /// \throws LRU::Error::KeyNotFound if the key's value may not be accessed. + /// \returns The value stored in the cache for the given key. + /// \see contains() + virtual Value& lookup(const Key& key) { + if (key == _last_accessed) { + auto& value = _value_for_last_accessed(); + _register_hit(key, value); + // If this is the last accessed key, it's at the front anyway + return value; + } + + auto iterator = find(key); + if (iterator == end()) { + throw LRU::Error::KeyNotFound(); + } else { + return iterator.value(); + } + } + + /// Attempts to return an iterator to the given key in the cache. + /// + /// If the key is found in the cache, it is moved to the front. Any iterators + /// pointing to that key are still valid, but the subsequent order of + /// iteration may be different from what it was before. + /// + /// \complexity O(1) expected and amortized. + /// \param key The key whose value to look for. + /// \returns An iterator pointing to the entry with the given key, if one + /// exists, else the end iterator. + virtual UnorderedIterator find(const Key& key) = 0; + + /// Attempts to return a const iterator to the given key in the cache. + /// + /// If the key is found in the cache, it is moved to the front. Any iterators + /// pointing to that key are still valid, but the subsequent order of + /// iteration may be different from what it was before. + /// + /// \complexity O(1) expected and amortized. + /// \param key The key whose value to look for. + /// \returns A const iterator pointing to the entry with the given key, if one + /// exists, else the end iterator. + virtual UnorderedConstIterator find(const Key& key) const = 0; + + /// \copydoc lookup(const Key&) + virtual Value& operator[](const Key& key) { + return lookup(key); + } + + /// \copydoc lookup(const Key&) const + virtual const Value& operator[](const Key& key) const { + return lookup(key); + } + + /// Inserts the given `(key, value)` pair into the cache. + /// + /// If the cache's capacity is reached, the most recently used element will be + /// evicted. Any iterators pointing to that element will be invalidated. + /// Iterators pointing to other elements are not affected. + /// + /// \complexity O(1) expected and amortized. + /// \param key The key to insert. + /// \param value The value to insert with the key. + /// \returns An `InsertionResult`, holding a boolean indicating whether the + /// key was newly inserted (true) or only updated (false) as well as an + /// iterator pointing to the entry for the key. + virtual InsertionResultType insert(const Key& key, const Value& value) { + if (_capacity == 0) return {false, end()}; + + auto iterator = _map.find(key); + + // To insert, we first check if the key is already present in the cache + // and if so, update its value and move its order iterator to the front + // of the queue. Else, we insert the key at the end of the queue and + // possibly pop the front if the cache has reached its capacity. + + if (iterator == _map.end()) { + auto result = _map.emplace(key, Information(value)); + assert(result.second); + auto order = _insert_new_key(result.first->first); + result.first->second.order = order; + + _last_accessed = result.first; + return {true, {*this, result.first}}; + } else { + _move_to_front(iterator, value); + _last_accessed = iterator; + return {false, {*this, iterator}}; + } + } + + /// Inserts a range of `(key, value)` pairs. + /// + /// If, at any point, the cache's capacity is reached, the most recently used + /// element will be evicted. Any iterators pointing to that element will + /// be invalidated. Iterators pointing to other elements are not affected. + /// + /// Note: This operation has no performance benefits over + /// element-wise insertion via `insert()`. + /// + /// \param begin An iterator for the start of the range to insert. + /// \param end An iterator for the end of the range to insert. + /// \returns The number of elements newly inserted (as opposed to only + /// updated). + template > + size_t insert(Iterator begin, Iterator end) { + size_t newly_inserted = 0; + for (; begin != end; ++begin) { + const auto result = insert(begin->first, begin->second); + newly_inserted += result.was_inserted(); + } + + return newly_inserted; + } + + /// Inserts a range of `(key, value)` pairs. + /// + /// If, at any point, the cache's capacity is reached, the most recently used + /// element will be evicted. Any iterators pointing to that element will + /// be invalidated. Iterators pointing to other elements are not affected. + /// + /// This operation has no performance benefits over + /// element-wise insertion via `insert()`. + /// + /// \param range The range of `(key, value)` pairs to insert. + /// \returns The number of elements newly inserted (as opposed to only + /// updated). + template > + size_t insert(Range& range) { + using std::begin; + using std::end; + + return insert(begin(range), end(range)); + } + + /// Moves the elements of the range into the cache. + /// + /// If, at any point, the cache's capacity is reached, the most recently used + /// element will be evicted. Any iterators pointing to that element will + /// be invalidated. Iterators pointing to other elements are not affected. + /// + /// \param range The range of `(key, value)` pairs to move into the cache. + /// \returns The number of elements newly inserted (as opposed to only + /// updated). + template > + size_t insert(Range&& range) { + size_t newly_inserted = 0; + for (auto& pair : range) { + const auto result = + emplace(std::move(pair.first), std::move(pair.second)); + newly_inserted += result.was_inserted(); + } + + return newly_inserted; + } + + /// Inserts a list `(key, value)` pairs. + /// + /// If the cache's capacity is reached, the most recently used element will be + /// evicted (one or more times). Any iterators pointing to that element will + /// be invalidated. Iterators pointing to other elements are not affected. + /// + /// This operation has no performance benefits over + /// element-wise insertion via `insert()`. + /// + /// \param list The list of `(key, value)` pairs to insert. + /// \returns The number of elements newly inserted (as opposed to only + /// updated). + virtual size_t insert(InitializerList list) { + return insert(list.begin(), list.end()); + } + + /// Emplaces a new `(key, value)` pair into the cache. + /// + /// This emplacement function allows perfectly forwarding an arbitrary number + /// of arguments to the constructor of both the key and value type, via + /// appropriate tuples. The intended usage is with `std::forward_as_tuple`, + /// for example: + /// \code{.cpp} + /// struct A { A(int, const std::string&) { } }; + /// struct B { B(double) {} }; + /// + /// LRU::Cache cache; + /// + /// cache.emplace( + /// std::piecewise_construct, + /// std::forward_as_tuple(1, "hello"), + /// std::forward_as_tuple(5.0), + /// ); + /// \endcode + /// + /// There is a convenience overload that requires much less overhead, if both + /// constructors expect only a single argument. + /// + /// If the cache's capacity is reached, the most recently used element will be + /// evicted. Any iterators pointing to that element will be invalidated. + /// Iterators pointing to other elements are not affected. + /// + /// \complexity O(1) expected and amortized. + /// \param _ A dummy parameter to work around overload resolution. + /// \param key_arguments A tuple of arguments to construct a key object with. + /// \param value_arguments A tuple of arguments to construct a value object + /// with. + /// \returns An `InsertionResult`, holding a boolean indicating whether the + /// key was newly inserted (true) or only updated (false) as well as an + /// iterator pointing to the entry for the key. + template + InsertionResultType emplace(std::piecewise_construct_t _, + const std::tuple& key_arguments, + const std::tuple& value_arguments) { + if (_capacity == 0) return {false, end()}; + + auto key = Internal::construct_from_tuple(key_arguments); + auto iterator = _map.find(key); + + if (iterator == _map.end()) { + auto result = _map.emplace(std::move(key), Information(value_arguments)); + auto order = _insert_new_key(result.first->first); + result.first->second.order = order; + assert(result.second); + + _last_accessed = result.first; + return {true, {*this, result.first}}; + } else { + auto value = Internal::construct_from_tuple(value_arguments); + _move_to_front(iterator, value); + _last_accessed = iterator; + return {false, {*this, iterator}}; + } + } + + /// Emplaces a `(key, value)` pair. + /// + /// This is a convenience overload removing the necessity for + /// `std::piecewise_construct` and `std::forward_as_tuple` that may be used in + /// the case that both the key and value have constructors expecting only a + /// single argument. + /// + /// If the cache's capacity is reached, the most recently used element will be + /// evicted. Any iterators pointing to that element will be invalidated. + /// Iterators pointing to other elements are not affected. + /// + /// \param key_argument The argument to construct a key object with. + /// \param value_argument The argument to construct a value object with. + /// \returns An `InsertionResult`, holding a boolean indicating whether the + /// key was newly inserted (true) or only updated (false) as well as an + /// iterator pointing to the entry for the key. + template + InsertionResultType emplace(K&& key_argument, V&& value_argument) { + auto key_tuple = std::forward_as_tuple(std::forward(key_argument)); + auto value_tuple = std::forward_as_tuple(std::forward(value_argument)); + return emplace(std::piecewise_construct, key_tuple, value_tuple); + } + + /// Erases the given key from the cache, if it is present. + /// + /// If the key is not present in the cache, this is a no-op. + /// All iterators pointing to the given key are invalidated. + /// Other iterators are not affected. + /// + /// \param key The key to erase. + /// \returns True if the key was erased, else false. + virtual bool erase(const Key& key) { + // No need to use _last_accessed_is_ok here, because even + // if it has expired, it's no problem to erase it anyway + if (_last_accessed == key) { + _erase(_last_accessed.key(), _last_accessed.information()); + return true; + } + + auto iterator = _map.find(key); + if (iterator != _map.end()) { + _erase(iterator); + return true; + } + + return false; + } + + /// Erases the key pointed to by the given iterator. + /// + /// \param iterator The iterator whose key to erase. + /// \throws LRU::Error::InvalidIterator if the iterator is the end iterator. + virtual void erase(UnorderedConstIterator iterator) { + /// We have this overload to avoid the extra conversion-construction from + /// unordered to ordered iterator (and renewed hash lookup) + if (iterator == unordered_cend()) { + throw LRU::Error::InvalidIterator(); + } else { + _erase(iterator._iterator); + } + } + + + /// Erases the key pointed to by the given iterator. + /// + /// \param iterator The iterator whose key to erase. + /// \throws LRU::Error::InvalidIterator if the iterator is the end iterator. + virtual void erase(OrderedConstIterator iterator) { + if (iterator == ordered_cend()) { + throw LRU::Error::InvalidIterator(); + } else { + _erase(_map.find(iterator.key())); + } + } + + /// Clears the cache entirely. + virtual void clear() { + _map.clear(); + _order.clear(); + _last_accessed.invalidate(); + } + + /// Requests shrinkage of the cache to the given size. + /// + /// If the passed size is 0, this operation is equivalent to `clear()`. If the + /// size is greater than the current size, it is a no-op. Otherwise, the size + /// of the cache is reduzed to the given size by repeatedly removing the least + /// recent element. + /// + /// \param new_size The size to (maybe) shrink to. + virtual void shrink(size_t new_size) { + if (new_size >= size()) return; + if (new_size == 0) { + clear(); + return; + } + + while (size() > new_size) { + _erase_lru(); + } + } + + ///////////////////////////////////////////////////////////////////////////// + // SIZE AND CAPACITY INTERFACE + ///////////////////////////////////////////////////////////////////////////// + + /// \returns The number of keys present in the cache. + virtual size_t size() const noexcept { + return _map.size(); + } + + /// Sets the capacity of the cache to the given value. + /// + /// If the given capacity is less than the current capacity of the cache, + /// the least-recently inserted element is removed repeatedly until the + /// capacity is equal to the given value. + /// + /// \param new_capacity The capacity to shrink or grow to. + virtual void capacity(size_t new_capacity) { + // Pop the front of the cache if we have to resize + while (size() > new_capacity) { + _erase_lru(); + } + _capacity = new_capacity; + } + + /// Returns the current capacity of the cache. + virtual size_t capacity() const noexcept { + return _capacity; + } + + /// \returns the number of slots left in the cache. + /// + /// \details After this number of elements have been inserted, the next one + /// insertion is preceded by an erasure of the least-recently inserted + /// element. + virtual size_t space_left() const noexcept { + return _capacity - size(); + } + + /// \returns True if the cache contains no elements, else false. + virtual bool is_empty() const noexcept { + return size() == 0; + } + + /// \returns True if the cache's size equals its capacity, else false. + /// + /// \details If `is_full()` returns `true`, the next insertion is preceded by + /// an erasure of the least-recently inserted element. + virtual bool is_full() const noexcept { + return size() == _capacity; + } + + /// \returns The function used to hash keys. + virtual HashFunction hash_function() const { + return _map.hash_function(); + } + + /// \returns The function used to compare keys. + virtual KeyEqual key_equal() const { + return _map.key_eq(); + } + + ///////////////////////////////////////////////////////////////////////////// + // STATISTICS INTERFACE + ///////////////////////////////////////////////////////////////////////////// + + /// Registers the given statistics object for monitoring. + /// + /// This method is useful if the statistics object is to + /// be shared between caches. + /// + /// Ownership of the statistics object remains with the user and __not__ with + /// the cache object. Also, behavior is undefined if the lifetime of the cache + /// exceeds that of the registered statistics object. + /// + /// \param statistics The statistics object to register. + virtual void monitor(const StatisticsPointer& statistics) { + _stats = statistics; + } + + /// Registers the given statistics object for monitoring. + /// + /// Ownership of the statistics object is transferred to the cache. + /// + /// \param statistics The statistics object to register. + virtual void monitor(StatisticsPointer&& statistics) { + _stats = std::move(statistics); + } + + /// Constructs a new statistics in-place in the cache. + /// + /// This method is useful if the cache is to have exclusive ownership of the + /// statistics and out-of-place construction and move is inconvenient. + /// + /// \param args Arguments to be forwarded to the constructor of the statistics + /// object. + template >> + void monitor(Args&&... args) { + _stats = std::make_shared>(std::forward(args)...); + } + + /// Stops any monitoring being performed with a statistics object. + /// + /// If the cache is not currently monitoring at all, this is a no-op. + virtual void stop_monitoring() { + _stats.reset(); + } + + /// \returns True if the cache is currently monitoring statistics, else + /// false. + bool is_monitoring() const noexcept { + return _stats.has_stats(); + } + + /// \returns The statistics object currently in use by the cache. + /// \throws LRU::Error::NotMonitoring if the cache is currently not + /// monitoring. + virtual Statistics& stats() { + if (!is_monitoring()) { + throw LRU::Error::NotMonitoring(); + } + return _stats.get(); + } + + /// \returns The statistics object currently in use by the cache. + /// \throws LRU::Error::NotMonitoring if the cache is currently not + /// monitoring. + virtual const Statistics& stats() const { + if (!is_monitoring()) { + throw LRU::Error::NotMonitoring(); + } + return _stats.get(); + } + + /// \returns A `shared_ptr` to the statistics currently in use by the cache. + virtual StatisticsPointer& shared_stats() { + return _stats.shared(); + } + + /// \returns A `shared_ptr` to the statistics currently in use by the cache. + virtual const StatisticsPointer& shared_stats() const { + return _stats.shared(); + } + + ///////////////////////////////////////////////////////////////////////////// + // CALLBACK INTERFACE + ///////////////////////////////////////////////////////////////////////////// + + /// Registers a new hit callback. + /// + /// \param hit_callback The hit callback function to register with the cache. + template > + void hit_callback(Callback&& hit_callback) { + _callback_manager.hit_callback(std::forward(hit_callback)); + } + + /// Registers a new miss callback. + /// + /// \param miss_callback The miss callback function to register with the + /// cache. + template > + void miss_callback(Callback&& miss_callback) { + _callback_manager.miss_callback(std::forward(miss_callback)); + } + + /// Registers a new access callback. + /// + /// \param access_callback The access callback function to register with the + /// cache. + template > + void access_callback(Callback&& access_callback) { + _callback_manager.access_callback(std::forward(access_callback)); + } + + /// Clears all hit callbacks. + void clear_hit_callbacks() { + _callback_manager.clear_hit_callbacks(); + } + + /// Clears all miss callbacks. + void clear_miss_callbacks() { + _callback_manager.clear_miss_callbacks(); + } + + /// Clears all access callbacks. + void clear_access_callbacks() { + _callback_manager.clear_access_callbacks(); + } + + /// Clears all callbacks. + void clear_all_callbacks() { + _callback_manager.clear(); + } + + /// \returns All hit callbacks. + const HitCallbackContainer& hit_callbacks() const noexcept { + return _callback_manager.hit_callbacks(); + } + + /// \returns All miss callbacks. + const MissCallbackContainer& miss_callbacks() const noexcept { + return _callback_manager.miss_callbacks(); + } + + /// \returns All access callbacks. + const AccessCallbackContainer& access_callbacks() const noexcept { + return _callback_manager.access_callbacks(); + } + + protected: + // The ordered iterators need to perform lookups without changing + // the order of elements or affecting statistics. + template + friend class BaseOrderedIterator; + + using MapInsertionResult = decltype(Map().emplace()); + using LastAccessed = + typename Internal::LastAccessed; + + /// Moves the key pointed to by the iterator to the front of the order. + /// + /// \param iterator The iterator pointing to the key to move. + virtual void _move_to_front(QueueIterator iterator) const { + if (size() == 1) return; + // Extract the current linked-list node and insert (splice it) at the end + // The original iterator is not invalidated and now points to the new + // position (which is still the same node). + _order.splice(_order.end(), _order, iterator); + } + + /// Moves the key pointed to by the iterator to the front of the order and + /// assigns a new value. + /// + /// \param iterator The iterator pointing to the key to move. + /// \param new_value The updated value to move the key with. + virtual void _move_to_front(MapIterator iterator, const Value& new_value) { + // Extract the current linked-list node and insert (splice it) at the end + // The original iterator is not invalidated and now points to the new + // position (which is still the same node). + _move_to_front(iterator->second.order); + iterator->second.value = new_value; + } + + /// Erases the element most recently inserted into the cache. + virtual void _erase_lru() { + _erase(_map.find(_order.front())); + } + + /// Erases the element pointed to by the iterator. + /// + /// \param iterator The iterator pointing to the key to erase. + virtual void _erase(MapConstIterator iterator) { + if (_last_accessed == iterator) { + _last_accessed.invalidate(); + } + + _order.erase(iterator->second.order); + _map.erase(iterator); + } + + /// Erases the given key. + /// + /// This method is useful if the key and information are already present, to + /// avoid an additional hash lookup to get an iterator to the corresponding + /// map entry. + /// + /// \param key The key to erase. + /// \param information The information associated with the key to erase. + virtual void _erase(const Key& key, const Information& information) { + if (key == _last_accessed) { + _last_accessed.invalidate(); + } + + // To be sure, we should do this first, since the order stores a reference + // to the key in the map. + _order.erase(information.order); + + // Requires an additional hash-lookup, whereas erase(iterator) doesn't + _map.erase(key); + } + + /// Convenience methhod to get the value for an insertion result into a map. + /// \returns The value for the given result. + virtual Value& _value_from_result(MapInsertionResult& result) noexcept { + // `result.first` is the map iterator (to a pair), whose `second` member + // is + // the information object, whose `value` member is the value stored. + return result.first->second.value; + } + + /// The main use of this method is that it may be override by a base class + /// if + /// there are any stronger constraints (such as time expiration) as to when + /// the last-accessed object may be used to access a key. + /// + /// \param key The key to compare the last accessed object against. + /// \returns True if the last-accessed object is valid. + virtual bool _last_accessed_is_ok(const Key& key) const noexcept { + return true; + } + + /// \copydoc _value_for_last_accessed() const + virtual Value& _value_for_last_accessed() { + return _last_accessed.value(); + } + + /// Attempts to access the last accessed key's value. + /// \returns The value of the last accessed object. + /// \details This method exists so that derived classes may perform + /// additional + /// checks (and possibly throw exceptions) or perform other operations to + /// retrieve the value. + virtual const Value& _value_for_last_accessed() const { + return _last_accessed.value(); + } + + /// Registers a hit for the key and performs appropriate actions. + /// \param key The key to register a hit for. + /// \param value The value that was found for the key. + virtual void _register_hit(const Key& key, const Value& value) const { + if (is_monitoring()) { + _stats.register_hit(key); + } + + _callback_manager.hit(key, value); + } + + /// Registers a miss for the key and performs appropriate actions. + /// \param key The key to register a miss for. + virtual void _register_miss(const Key& key) const { + if (is_monitoring()) { + _stats.register_miss(key); + } + + _callback_manager.miss(key); + } + + /// The common part of both range assignment operators. + /// + /// \param range The range to assign to. + template + void _clear_and_increase_capacity(const Range& range) { + using std::begin; + using std::end; + + clear(); + + auto distance = std::distance(begin(range), end(range)); + if (distance > _capacity) { + _capacity = distance; + } + } + + /// Looks up each key in the queue and re-assigns it to the proper key in the + /// map. + /// + /// After a copy, the reference (wrappers) in the order queue point + /// to the keys of the other cache's map. Thus we need to re-assign them. + void _reassign_references() noexcept { + for (auto& key_reference : _order) { + key_reference = std::ref(_map.find(key_reference)->first); + } + } + + /// Inserts a new key into the queue. + /// + /// If the cache is full, the LRU node is re-used. + /// Else a node is inserted at the order. + /// + /// \returns The resulting iterator. + QueueIterator _insert_new_key(const Key& key) { + if (_is_too_full()) { + _evict_lru_for(key); + } else { + _order.emplace_back(key); + } + + return std::prev(_order.end()); + } + + /// Evicts the LRU element for the given new key. + /// + /// \param key The new key to insert into the queue. + void _evict_lru_for(const Key& key) { + _map.erase(_order.front()); + _order.front() = std::ref(key); + _move_to_front(_order.begin()); + } + + /// \returns True if the cache is too full and an element must be evicted, + /// else false. + bool _is_too_full() const noexcept { + return size() > _capacity; + } + + /// The map from keys to information objects. + Map _map; + + /// The queue keeping track of the insertion order of elements. + mutable Queue _order; + + /// The object to mutate statistics if any are registered. + mutable StatisticsMutator _stats; + + /// The last-accessed cache object. + mutable LastAccessed _last_accessed; + + /// The callback manager to store any callbacks. + mutable CallbackManagerType _callback_manager; + + /// The current capacity of the cache. + size_t _capacity; +}; +} // namespace Internal +} // namespace LRU + +#endif // LRU_INTERNAL_BASE_CACHE_HPP diff --git a/include/lru/internal/base-iterator.hpp b/include/lru/internal/base-iterator.hpp new file mode 100644 index 0000000..b695a72 --- /dev/null +++ b/include/lru/internal/base-iterator.hpp @@ -0,0 +1,216 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_BASE_ITERATOR_HPP +#define LRU_INTERNAL_BASE_ITERATOR_HPP + +#include +#include + +#include +#include + +#define PUBLIC_BASE_ITERATOR_MEMBERS \ + typename super::Entry; \ + using typename super::KeyType; \ + using typename super::ValueType; + +#define PRIVATE_BASE_ITERATOR_MEMBERS \ + super::_iterator; \ + using super::_entry; \ + using super::_cache; + + +namespace LRU { +namespace Internal { + +/// The base class for all (ordered and unordered) iterators. +/// +/// All iterators over our LRU caches store a reference to the cache they point +/// into, an underlying iterator they adapt (e.g. a map iterator or list +/// iterator) as well as a entry, a reference to which is returned when +/// dereferencing the iterator. +/// +/// \tparam IteratorTag A standard iterator category tag. +/// \tparam Key The key type over which instances of the iterator iterate. +/// \tparam Value The value type over which instances of the iterator iterate. +/// \tparam Cache The type of the cache instances of the iterator point into. +/// \tparam UnderlyingIterator The underlying iterator class used to implement +/// the iterator. +template +class BaseIterator : public std::iterator> { + public: + using KeyType = Key; + using ValueType = + std::conditional_t::value, const Value, Value>; + using Entry = LRU::Entry; + + /// Default constructor. + BaseIterator() noexcept : _cache(nullptr) { + } + + /// Constructor. + /// + /// \param cache The cache this iterator points into. + /// \param iterator The underlying iterator to adapt. + BaseIterator(Cache& cache, const UnderlyingIterator& iterator) noexcept + : _iterator(iterator), _cache(&cache) { + } + + /// Copy constructor. + /// + /// Differs from the default copy constructor in that it does not copy the + /// entry. + /// + /// \param other The base iterator to copy. + BaseIterator(const BaseIterator& other) noexcept + : _iterator(other._iterator), _cache(other._cache) { + // Note: we do not copy the entry, as it would require a new allocation. + // Since iterators are often taken by value, this may incur a high cost. + // As such we delay the retrieval of the entry to the first call to entry(). + } + + /// Copy assignment operator. + /// + /// Differs from the default copy assignment + /// operator in that it does not copy the entry. + /// + /// \param other The base iterator to copy. + /// \return The base iterator instance. + BaseIterator& operator=(const BaseIterator& other) noexcept { + if (this != &other) { + _iterator = other._iterator; + _cache = other._cache; + _entry.reset(); + } + return *this; + } + + /// Move constructor. + BaseIterator(BaseIterator&& other) noexcept = default; + + /// Move assignment operator. + BaseIterator& operator=(BaseIterator&& other) noexcept = default; + + /// Generalized copy constructor. + /// + /// Mainly necessary for non-const to const conversion. + /// + /// \param other The base iterator to copy from. + template + BaseIterator(const BaseIterator& other) + : _iterator(other._iterator), _entry(other._entry), _cache(other._cache) { + } + + /// Generalized move constructor. + /// + /// Mainly necessary for non-const to const conversion. + /// + /// \param other The base iterator to move into this one. + template + BaseIterator(BaseIterator&& other) noexcept + : _iterator(std::move(other._iterator)) + , _entry(std::move(other._entry)) + , _cache(std::move(other._cache)) { + } + + /// Destructor. + virtual ~BaseIterator() = default; + + /// Swaps this base iterator with another one. + /// + /// \param other The other iterator to swap with. + void swap(BaseIterator& other) noexcept { + // Enable ADL + using std::swap; + + swap(_iterator, other._iterator); + swap(_entry, other._entry); + swap(_cache, other._cache); + } + + /// Swaps two base iterator. + /// + /// \param first The first iterator to swap. + /// \param second The second iterator to swap. + friend void swap(BaseIterator& first, BaseIterator& second) noexcept { + first.swap(second); + } + + /// \returns A reference to the current entry pointed to by the iterator. + virtual Entry& operator*() noexcept = 0; + + /// \returns A pointer to the current entry pointed to by the iterator. + Entry* operator->() noexcept { + return &(**this); + } + + /// \copydoc operator*() + virtual Entry& entry() = 0; + + /// \returns A reference to the value of the entry currently pointed to by the + /// iterator. + virtual ValueType& value() = 0; + + /// \returns A reference to the key of the entry currently pointed to by the + /// iterator. + virtual const Key& key() = 0; + + protected: + template + friend class BaseIterator; + + /// The underlying iterator this iterator class adapts. + UnderlyingIterator _iterator; + + /// The entry optionally being stored. + Optional _entry; + + /// A pointer to the cache this iterator points into. + /// Pointer and not reference because it's cheap to copy. + /// Pointer and not `std::reference_wrapper` because the class needs to be + /// default-constructible. + Cache* _cache; +}; +} // namespace Internal +} // namespace LRU + +#endif // LRU_INTERNAL_BASE_ITERATOR_HPP diff --git a/include/lru/internal/base-ordered-iterator.hpp b/include/lru/internal/base-ordered-iterator.hpp new file mode 100644 index 0000000..a0ed951 --- /dev/null +++ b/include/lru/internal/base-ordered-iterator.hpp @@ -0,0 +1,338 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef BASE_ORDERED_ITERATOR_HPP +#define BASE_ORDERED_ITERATOR_HPP + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace LRU { +namespace Internal { + +template +using BaseForBaseOrderedIterator = + BaseIterator::const_iterator>; + +/// The base class for all const and non-const ordered iterators. +/// +/// Ordered iterators are bidirectional iterators that iterate over the keys of +/// a cache in the order in which they were inserted into the cache. As they +/// only iterate over the keys, they must perform hash lookups to retrieve the +/// value the first time they are dereferenced. This makes them slightly less +/// efficient than unordered iterators. However, they also have the additional +/// property that they may be constructed from unordered iterators, and that +/// they can be decremented. +/// +/// \tparam Key The key type over which instances of the iterator iterate. +/// \tparam Value The value type over which instances of the iterator iterate. +/// \tparam Cache The type of the cache instances of the iterator point into. +template +class BaseOrderedIterator + : public BaseForBaseOrderedIterator { + protected: + using super = BaseForBaseOrderedIterator; + using PRIVATE_BASE_ITERATOR_MEMBERS; + using UnderlyingIterator = typename Queue::const_iterator; + + public: + using Tag = LRU::Tag::OrderedIterator; + using PUBLIC_BASE_ITERATOR_MEMBERS; + + /// Constructor. + BaseOrderedIterator() noexcept = default; + + /// \copydoc BaseIterator::BaseIterator(Cache,UnderlyingIterator) + BaseOrderedIterator(Cache& cache, UnderlyingIterator iterator) + : super(cache, iterator) { + } + + /// Generalized copy constructor. + /// + /// \param other The ordered iterator to contruct from. + template + BaseOrderedIterator( + const BaseOrderedIterator& other) + : super(other) { + } + + /// Generalized move constructor. + /// + /// \param other The ordered iterator to move into this one. + template + BaseOrderedIterator(BaseOrderedIterator&& other) + : super(std::move(other)) { + } + + /// Generalized conversion copy constructor. + /// + /// \param unordered_iterator The unordered iterator to construct from. + template < + typename AnyCache, + typename UnderlyingIterator, + typename = std::enable_if_t< + std::is_same, std::decay_t>::value>> + BaseOrderedIterator(const BaseUnorderedIterator& + unordered_iterator) { + // Atomicity + _throw_if_at_invalid(unordered_iterator); + _cache = unordered_iterator._cache; + _iterator = unordered_iterator._iterator->second.order; + } + + /// Generalized conversion move constructor. + /// + /// \param unordered_iterator The unordered iterator to move-construct from. + template < + typename AnyCache, + typename UnderlyingIterator, + typename = std::enable_if_t< + std::is_same, std::decay_t>::value>> + BaseOrderedIterator(BaseUnorderedIterator&& + unordered_iterator) { + // Atomicity + _throw_if_at_invalid(unordered_iterator); + _cache = std::move(unordered_iterator._cache); + _entry = std::move(unordered_iterator._entry); + _iterator = std::move(unordered_iterator._iterator->second.order); + } + + /// Copy constructor. + BaseOrderedIterator(const BaseOrderedIterator& other) = default; + + /// Move constructor. + BaseOrderedIterator(BaseOrderedIterator&& other) = default; + + /// Copy assignment operator. + BaseOrderedIterator& operator=(const BaseOrderedIterator& other) = default; + + /// Move assignment operator. + BaseOrderedIterator& operator=(BaseOrderedIterator&& other) = default; + + /// Destructor. + virtual ~BaseOrderedIterator() = default; + + /// Checks for equality between this iterator and another ordered iterator. + /// + /// \param other The other ordered iterator. + /// \returns True if both iterators point to the same entry, else false. + bool operator==(const BaseOrderedIterator& other) const noexcept { + return this->_iterator == other._iterator; + } + + /// Checks for inequality between this iterator another ordered iterator. + /// + /// \param other The other ordered iterator. + /// \returns True if the iterators point to different entries, else false. + bool operator!=(const BaseOrderedIterator& other) const noexcept { + return !(*this == other); + } + + /// Checks for inequality between this iterator and another unordered + /// iterator. + /// + /// \param other The other unordered iterator. + /// \returns True if both iterators point to the end of the same cache, else + /// the result of comparing with the unordered iterator, converted to an + /// ordered iterator. + template + bool operator==( + const BaseUnorderedIterator& other) const + noexcept { + if (this->_cache != other._cache) return false; + + // The past-the-end iterators of the same cache should compare equal. + // This is an exceptional guarantee we make. This is also the reason + // why we can't rely on the conversion from unordered to ordered iterators + // because construction of an ordered iterator from the past-the-end + // unordered iterator will fail (with an InvalidIteratorConversion error) + if (other == other._cache->unordered_end()) { + return *this == this->_cache->ordered_end(); + } + + // Will call the other overload + return *this == static_cast(other); + } + + /// Checks for equality between an unordered iterator and an ordered iterator. + /// + /// \param first The unordered iterator. + /// \param second The ordered iterator. + /// \returns True if both iterators point to the end of the same cache, else + /// the result of comparing with the unordered iterator, converted to an + /// ordered iterator. + template + friend bool operator==( + const BaseUnorderedIterator& first, + const BaseOrderedIterator& second) noexcept { + return second == first; + } + + /// Checks for inequality between an unordered + /// iterator and an ordered iterator. + /// + /// \param first The ordered iterator. + /// \param second The unordered iterator. + /// \returns True if the iterators point to different entries, else false. + template + friend bool + operator!=(const BaseOrderedIterator& first, + const BaseUnorderedIterator& + second) noexcept { + return !(first == second); + } + + /// Checks for inequality between an unordered + /// iterator and an ordered iterator. + /// + /// \param first The unordered iterator. + /// \param second The ordered iterator. + /// \returns True if the iterators point to different entries, else false. + template + friend bool operator!=( + const BaseUnorderedIterator& first, + const BaseOrderedIterator& second) noexcept { + return second != first; + } + + /// Increments the iterator to the next entry. + /// + /// If the iterator already pointed to the end any number of increments + /// before, behavior is undefined. + /// + /// \returns The resulting iterator. + BaseOrderedIterator& operator++() { + ++_iterator; + _entry.reset(); + return *this; + } + + /// Increments the iterator and returns a copy of the previous one. + /// + /// If the iterator already pointed to the end any number of increments + /// before, behavior is undefined. + /// + /// \returns A copy of the previous iterator. + BaseOrderedIterator operator++(int) { + auto previous = *this; + ++*this; + return previous; + } + + /// Decrements the iterator to the previous entry. + /// + /// \returns The resulting iterator. + BaseOrderedIterator& operator--() { + --_iterator; + _entry.reset(); + return *this; + } + + /// Decrements the iterator and returns a copy of the previous entry. + /// + /// \returns The previous iterator. + BaseOrderedIterator operator--(int) { + auto previous = *this; + --*this; + return previous; + } + + Entry& operator*() noexcept override { + return _maybe_lookup(); + } + + /// \returns A reference to the entry the iterator points to. + /// \details If the iterator is invalid, behavior is undefined. + Entry& entry() override { + _cache->throw_if_invalid(*this); + return _maybe_lookup(); + } + + /// \returns A reference to the value the iterator points to. + /// \details If the iterator is invalid, behavior is undefined. + Value& value() override { + return entry().value(); + } + + /// \returns A reference to the key the iterator points to. + /// \details If the iterator is invalid, behavior is undefined. + const Key& key() override { + // No lookup required + _cache->throw_if_invalid(*this); + return *_iterator; + } + + protected: + template + friend class BaseOrderedIterator; + + /// Looks up the entry for a key if this was not done already. + /// + /// \returns The entry, which was possibly newly looked up. + Entry& _maybe_lookup() { + if (!_entry.has_value()) { + _lookup(); + } + + return *_entry; + } + + /// Looks up the entry for a key and sets the internal entry member. + void _lookup() { + auto iterator = _cache->_map.find(*_iterator); + _entry.emplace(iterator->first, iterator->second.value); + } + + private: + /// Throws an exception if the given unordered iterator is invalid. + /// + /// \param unordered_iterator The iterator to check. + /// \throws LRU::Error::InvalidIteratorConversion if the iterator is invalid. + template + void _throw_if_at_invalid(const UnorderedIterator& unordered_iterator) { + // For atomicity of the copy assignment, we assign the cache pointer only + // after this check in the copy/move constructor and use the iterator's + // cache. If an exception is thrown, the state of the ordered iterator is + // unchanged compared to before the assignment. + if (unordered_iterator == unordered_iterator._cache->unordered_end()) { + throw LRU::Error::InvalidIteratorConversion(); + } + } +}; + +} // namespace Internal +} // namespace LRU + +#endif // BASE_ORDERED_ITERATOR_HPP diff --git a/include/lru/internal/base-unordered-iterator.hpp b/include/lru/internal/base-unordered-iterator.hpp new file mode 100644 index 0000000..f294b13 --- /dev/null +++ b/include/lru/internal/base-unordered-iterator.hpp @@ -0,0 +1,216 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef BASE_UNORDERED_ITERATOR_HPP +#define BASE_UNORDERED_ITERATOR_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include + + +namespace LRU { + +// Forward declaration. +template +class TimedCache; + +namespace Internal { +template +using BaseForBaseUnorderedIterator = + BaseIteratorfirst), + decltype(UnderlyingIterator()->second.value), + Cache, + UnderlyingIterator>; + +/// The base class for all const and non-const unordered iterators. +/// +/// An unordered iterator is a wrapper around an `unordered_map` iterator with +/// ForwardIterator category. As such, it is (nearly) as fast to access the pair +/// as through the unordered iterator as through the map iterator directly. +/// However, the order of keys is unspecified. For this reason, unordered +/// iterators have the special property that they may be used to construct +/// ordered iterators, after which the order of insertion is respected. +/// +/// \tparam Cache The type of the cache instances of the iterator point into. +/// \tparam UnderlyingIterator The underlying iterator class used to implement +/// the iterator. +template +class BaseUnorderedIterator + : public BaseForBaseUnorderedIterator { + protected: + using super = BaseForBaseUnorderedIterator; + using PRIVATE_BASE_ITERATOR_MEMBERS; + // These are the key and value types the BaseIterator extracts + using Key = typename super::KeyType; + using Value = typename super::ValueType; + + public: + using Tag = LRU::Tag::UnorderedIterator; + using PUBLIC_BASE_ITERATOR_MEMBERS; + + /// Constructor. + BaseUnorderedIterator() noexcept = default; + + /// \copydoc BaseIterator::BaseIterator(Cache,UnderlyingIterator) + explicit BaseUnorderedIterator(Cache& cache, + const UnderlyingIterator& iterator) noexcept + : super(cache, iterator) { + } + + /// Generalized copy constructor. + /// + /// Useful mainly for non-const to const conversion. + /// + /// \param other The iterator to copy from. + template + BaseUnorderedIterator( + const BaseUnorderedIterator& + other) noexcept + : super(other) { + } + + /// Copy constructor. + BaseUnorderedIterator(const BaseUnorderedIterator& other) noexcept = default; + + /// Move constructor. + BaseUnorderedIterator(BaseUnorderedIterator&& other) noexcept = default; + + /// Copy assignment operator. + BaseUnorderedIterator& + operator=(const BaseUnorderedIterator& other) noexcept = default; + + /// Move assignment operator. + template + BaseUnorderedIterator& + operator=(BaseUnorderedIterator + unordered_iterator) noexcept { + swap(unordered_iterator); + return *this; + } + + /// Destructor. + virtual ~BaseUnorderedIterator() = default; + + /// Compares this iterator for equality with another unordered iterator. + /// + /// \param other Another unordered iterator. + /// \returns True if both iterators point to the same entry, else false. + template + bool + operator==(const BaseUnorderedIterator& other) const + noexcept { + return this->_iterator == other._iterator; + } + + /// Compares this iterator for inequality with another unordered iterator. + /// + /// \param other Another unordered iterator. + /// \returns True if the iterators point to different entries, else false. + template + bool + operator!=(const BaseUnorderedIterator& other) const + noexcept { + return !(*this == other); + } + + /// Increments the iterator to the next entry. + /// + /// If the iterator already pointed to the end any number of increments + /// before, behavior is undefined. + /// + /// \returns The resulting iterator. + BaseUnorderedIterator& operator++() { + ++_iterator; + _entry.reset(); + return *this; + } + + /// Increments the iterator and returns a copy of the previous one. + /// + /// If the iterator already pointed to the end any number of increments + /// before, behavior is undefined. + /// + /// \returns A copy of the previous iterator. + BaseUnorderedIterator operator++(int) { + auto previous = *this; + ++*this; + return previous; + } + + /// \copydoc BaseIterator::operator* + /// \details If the iterator is invalid, behavior is undefined. No exception + /// handling is performed. + Entry& operator*() noexcept override { + if (!_entry.has_value()) { + _entry.emplace(_iterator->first, _iterator->second.value); + } + + return *_entry; + } + + /// \returns A reference to the entry the iterator points to. + /// \throws LRU::Error::InvalidIterator if the iterator is the end iterator. + /// \throws LRU::Error::KeyExpired if the key pointed to by the iterator has + /// expired. + Entry& entry() override { + if (!_entry.has_value()) { + _entry.emplace(_iterator->first, _iterator->second.value); + } + + _cache->throw_if_invalid(*this); + return *_entry; + } + + /// \returns A reference to the key the iterator points to. + /// \throws LRU::Error::InvalidIterator if the iterator is the end iterator. + /// \throws LRU::Error::KeyExpired if the key pointed to by the iterator has + /// expired. + const Key& key() override { + return entry().key(); + } + + /// \returns A reference to the value the iterator points to. + /// \throws LRU::Error::InvalidIterator if the iterator is the end iterator. + /// \throws LRU::Error::KeyExpired if the key pointed to by the iterator has + /// expired. + Value& value() override { + return entry().value(); + } + + protected: + template + friend class BaseOrderedIterator; + + template + friend class LRU::TimedCache; +}; +} // namespace Internal +} // namespace LRU + +#endif // BASE_UNORDERED_ITERATOR_HPP diff --git a/include/lru/internal/callback-manager.hpp b/include/lru/internal/callback-manager.hpp new file mode 100644 index 0000000..c1fc889 --- /dev/null +++ b/include/lru/internal/callback-manager.hpp @@ -0,0 +1,159 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_CALLBACK_MANAGER_HPP +#define LRU_INTERNAL_CALLBACK_MANAGER_HPP + +#include +#include + +#include +#include + +namespace LRU { +namespace Internal { + +/// Manages hit, miss and access callbacks for a cache. +/// +/// The callback manager implements the "publisher" of the observer pattern we +/// implement. It stores and calls three kinds of callbacks: +/// 1. Hit callbacks, taking a key and value after a cache hit. +/// 2. Miss callbacks, taking only a key, that was not found in a cache. +/// 3. Access callbacks, taking a key and a boolean indicating a hit or a miss. +/// +/// Callbacks can be added, accessed and cleared. +template +class CallbackManager { + public: + using HitCallback = std::function; + using MissCallback = std::function; + using AccessCallback = std::function; + + using HitCallbackContainer = std::vector; + using MissCallbackContainer = std::vector; + using AccessCallbackContainer = std::vector; + + /// Calls all callbacks registered for a hit, with the given key and value. + /// + /// \param key The key for which a cache hit ocurred. + /// \param value The value that was found for the key. + void hit(const Key& key, const Value& value) { + _call_each(_hit_callbacks, key, value); + _call_each(_access_callbacks, key, true); + } + + /// Calls all callbacks registered for a miss, with the given key. + /// + /// \param key The key for which a cache miss ocurred. + void miss(const Key& key) { + _call_each(_miss_callbacks, key); + _call_each(_access_callbacks, key, false); + } + + /// Registers a new hit callback. + /// + /// \param hit_callback The hit callback function to register with the + /// manager. + template + void hit_callback(Callback&& hit_callback) { + _hit_callbacks.emplace_back(std::forward(hit_callback)); + } + + /// Registers a new miss callback. + /// + /// \param miss_callback The miss callback function to register with the + /// manager. + template + void miss_callback(Callback&& miss_callback) { + _miss_callbacks.emplace_back(std::forward(miss_callback)); + } + + /// Registers a new access callback. + /// + /// \param access_callback The access callback function to register with the + /// manager. + template + void access_callback(Callback&& access_callback) { + _access_callbacks.emplace_back(std::forward(access_callback)); + } + + /// Clears all hit callbacks. + void clear_hit_callbacks() { + _hit_callbacks.clear(); + } + + /// Clears all miss callbacks. + void clear_miss_callbacks() { + _miss_callbacks.clear(); + } + + /// Clears all access callbacks. + void clear_access_callbacks() { + _access_callbacks.clear(); + } + + /// Clears all callbacks. + void clear() { + clear_hit_callbacks(); + clear_miss_callbacks(); + clear_access_callbacks(); + } + + /// \returns All hit callbacks. + const HitCallbackContainer& hit_callbacks() const noexcept { + return _hit_callbacks; + } + + /// \returns All miss callbacks. + const MissCallbackContainer& miss_callbacks() const noexcept { + return _miss_callbacks; + } + + /// \returns All access callbacks. + const AccessCallbackContainer& access_callbacks() const noexcept { + return _access_callbacks; + } + + private: + /// Calls each function in the given container with the given arguments. + /// + /// \param callbacks The container of callbacks to call. + /// \param args The arguments to call the callbacks with. + template + void _call_each(const CallbackContainer& callbacks, Args&&... args) { + for (const auto& callback : callbacks) { + callback(std::forward(args)...); + } + } + + /// The container of hit callbacks registered. + HitCallbackContainer _hit_callbacks; + + /// The container of miss callbacks registered. + MissCallbackContainer _miss_callbacks; + + /// The container of access callbacks registered. + AccessCallbackContainer _access_callbacks; +}; +} // namespace Internal +} // namespace LRU + +#endif // LRU_INTERNAL_CALLBACK_MANAGER_HPP diff --git a/include/lru/internal/definitions.hpp b/include/lru/internal/definitions.hpp new file mode 100644 index 0000000..cf0d1c7 --- /dev/null +++ b/include/lru/internal/definitions.hpp @@ -0,0 +1,88 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_DEFINITIONS_HPP +#define LRU_INTERNAL_DEFINITIONS_HPP + +#include +#include +#include +#include +#include +#include + +namespace LRU { +namespace Internal { + +/// The default capacity for all caches. +const std::size_t DEFAULT_CAPACITY = 128; + +/// The reference type use to store keys in the order queue. +template +using Reference = std::reference_wrapper; + +/// Compares two References for equality. +/// +/// This is necessary because `std::reference_wrapper` does not define any +/// operator overloads. We do need them, however (e.g. for container +/// comparison). +/// +/// \param first The first reference to compare. +/// \param second The second reference to compare. +template +bool operator==(const Reference& first, const Reference& second) { + return first.get() == second.get(); +} + +/// Compares two References for inequality. +/// +/// This is necessary because `std::reference_wrapper` does not define any +/// operator overloads. We do need them, however (e.g. for container +/// comparison). +/// +/// \param first The first reference to compare. +/// \param second The second reference to compare. +template +bool operator!=(const Reference& first, const Reference& second) { + return !(first == second); +} + +/// The default queue type used internally. +template +using Queue = std::list>; + +/// The default map type used internally. +template +using Map = std::unordered_map; + +/// The default clock used internally. +using Clock = std::chrono::steady_clock; + +/// The default timestamp (time point) used internally. +using Timestamp = Clock::time_point; +} // namespace Internal +} // namespace LRU + + +#endif // LRU_INTERNAL_DEFINITIONS_HPP diff --git a/include/lru/internal/hash.hpp b/include/lru/internal/hash.hpp new file mode 100644 index 0000000..49dcc0f --- /dev/null +++ b/include/lru/internal/hash.hpp @@ -0,0 +1,62 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_HASH_HPP +#define LRU_INTERNAL_HASH_HPP + +#include +#include +#include + +/// `std::hash` specialization to allow storing tuples as keys +/// in `std::unordered_map`. +/// +/// Essentially hashes all tuple elements and jumbles the +/// individual hashes together. +namespace std { +template +struct hash> { + using argument_type = std::tuple; + using result_type = std::size_t; + + result_type operator()(const argument_type& argument) const { + return hash_tuple(argument, std::make_index_sequence()); + } + + private: + template + result_type + hash_tuple(const argument_type& tuple, std::index_sequence) const { + auto value = std::get(tuple); + auto current = std::hash{}(value); + auto seed = hash_tuple(tuple, std::index_sequence()); + + // http://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html + return current + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + + result_type hash_tuple(const argument_type&, std::index_sequence<>) const { + return 0; + } +}; +} // namespace std + +#endif // LRU_INTERNAL_HASH_HPP diff --git a/include/lru/internal/information.hpp b/include/lru/internal/information.hpp new file mode 100644 index 0000000..4a28a98 --- /dev/null +++ b/include/lru/internal/information.hpp @@ -0,0 +1,145 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_INFORMATION_HPP +#define LRU_INTERNAL_INFORMATION_HPP + +#include +#include +#include + +#include +#include + +namespace LRU { +namespace Internal { + +/// The value type of internal maps, used to store a value and iterator. +/// +/// This information object is the basis of an LRU cache, which must associated +/// a value and such an order iterator with a key, such that the iterator may be +/// moved to the front of the order when the key is updated with a new value. +/// +/// \tparam Key The key type of the information. +/// \tparam Value The value type of the information. +template +struct Information { + using KeyType = Key; + using ValueType = Value; + using QueueIterator = typename Internal::Queue::const_iterator; + + /// Constructor. + /// + /// \param value_ The value for the information. + /// \param order_ The order iterator for the information. + explicit Information(const Value& value_, + QueueIterator order_ = QueueIterator()) + : value(value_), order(order_) { + } + + /// Constructor. + /// + /// \param order_ The order iterator for the information. + /// \param value_arguments Any number of arguments to perfectly forward to the + /// value type's constructor. + // template + // Information(QueueIterator order_, ValueArguments&&... value_arguments) + // : value(std::forward(value_arguments)...), order(order_) { + // } + + /// Constructor. + /// + /// \param order_ The order iterator for the information. + /// \param value_arguments A tuple of arguments to perfectly forward to the + /// value type's constructor. + /// + template + explicit Information(const std::tuple& value_arguments, + QueueIterator order_ = QueueIterator()) + : Information( + order_, value_arguments, Internal::tuple_indices(value_arguments)) { + } + + /// Copy constructor. + Information(const Information& other) = default; + + /// Move constructor. + Information(Information&& other) = default; + + /// Copy assignment operator. + Information& operator=(const Information& other) = default; + + /// Move assignment operator. + Information& operator=(Information&& other) = default; + + /// Destructor. + virtual ~Information() = default; + + /// Compares the information for equality with another information object. + /// + /// \param other The other information object to compare to. + /// \returns True if key and value (not the iterator itself) of the two + /// information objects are equal, else false. + virtual bool operator==(const Information& other) const noexcept { + if (this == &other) return true; + if (this->value != other.value) return false; + // We do not compare the iterator (because otherwise two containers + // holding information would never be equal). We also do not compare + // the key stored in the iterator, because keys will always have been + // compared before this operator is called. + return true; + } + + /// Compares the information for inequality with another information object. + /// + /// \param other The other information object to compare for. + /// \returns True if key and value (not the iterator itself) of the two + /// information objects are unequal, else false. + virtual bool operator!=(const Information& other) const noexcept { + return !(*this == other); + } + + /// The value of the information. + Value value; + + /// The order iterator of the information. + QueueIterator order; + + private: + /// Implementation for the constructor taking a tuple of arguments for the + /// value. + /// + /// \param order_ The order iterator for the information. + /// \param value_argument The tuple of arguments to perfectly forward to the + /// value type's constructor. + /// \param _ An index sequence to access the elements of the tuple + template + Information(const QueueIterator& order_, + const std::tuple& value_argument, + std::index_sequence _) + : value(std::forward(std::get(value_argument))...) + , order(order_) { + } +}; +} // namespace Internal +} // namespace LRU + +#endif // LRU_INTERNAL_INFORMATION_HPP diff --git a/include/lru/internal/last-accessed.hpp b/include/lru/internal/last-accessed.hpp new file mode 100644 index 0000000..c6bfb25 --- /dev/null +++ b/include/lru/internal/last-accessed.hpp @@ -0,0 +1,254 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_LAST_ACCESSED_HPP +#define LRU_INTERNAL_LAST_ACCESSED_HPP + +#include +#include +#include + +#include + +namespace LRU { +namespace Internal { + +/// Provides a simple iterator-compatible pointer object for a key and +/// information. +/// +/// The easisest idea for this class, theoretically, would be to just store an s +/// iterator to the internal cache map (i.e. template the class on the iterator +/// type). However, the major trouble with that approach is that this class +/// should be 100% *mutable*, as in "always non-const", so that keys and +/// informations +/// we store for fast access can be (quickly) retrieved as either const or +/// non-const (iterators for example). This is not possible, since the +/// const-ness of `const_iterators` are not the usual idea of const in C++, +/// meaning especially it cannot be cast away with a `const_cast` as is required +/// for the mutability. As such, we *must* store the plain keys and +/// informations. +/// This, however, means that iterators cannot be stored efficiently, since a +/// new hash table lookup would be required to go from a key to its iterator. +/// However, since the main use case of this class is to avoid a second lookup +/// in the usual `if (cache.contains(key)) return cache.lookup(key)`, which is +/// not an issue for iterators since they can be compared to the `end` iterator +/// in constant time (equivalent to the call to `contains()`). +/// +/// WARNING: This class stores *pointers* to keys and informations. As such +/// lifetime +/// of the pointed-to objects must be cared for by the user of this class. +/// +/// \tparam Key The type of key being accessed. +/// \tparam InformationType The type of information being accessed. +/// \tparam KeyEqual The type of the key comparison function. +template > +class LastAccessed { + public: + /// Constructor. + /// + /// \param key_equal The function to compare keys with. + explicit LastAccessed(const KeyEqual& key_equal = KeyEqual()) + : _key(nullptr) + , _information(nullptr) + , _is_valid(false) + , _key_equal(key_equal) { + } + + /// Constructor. + /// + /// \param key The key to store a reference to. + /// \param information The information to store a reference to. + /// \param key_equal The function to compare keys with. + LastAccessed(const Key& key, + const InformationType& information, + const KeyEqual& key_equal = KeyEqual()) + : _key(const_cast(&key)) + , _information(const_cast(&information)) + , _is_valid(true) + , _key_equal(key_equal) { + } + + /// Constructor. + /// + /// \param iterator An iterator pointing to a key and information to use for + /// constructing the instance. + /// \param key_equal The function to compare keys with. + template + explicit LastAccessed(Iterator iterator, + const KeyEqual& key_equal = KeyEqual()) + : LastAccessed(iterator->first, iterator->second, key_equal) { + } + + /// Copy assignment operator for iterators. + /// + /// \param iterator An iterator pointing to a key and value to use for the + /// `LastAccessed` instance. + /// \return The resulting `LastAccessed` instance. + template + LastAccessed& operator=(Iterator iterator) { + _key = const_cast(&(iterator->first)); + _information = const_cast(&(iterator->second)); + _is_valid = true; + + return *this; + } + + /// Compares a `LastAccessed` object for equality with a key. + /// + /// \param last_accessed The `LastAccessed` instance to compare. + /// \param key The key instance to compare. + /// \returns True if the key of the `LastAccessed` object's key equals the + /// given key, else false. + friend bool + operator==(const LastAccessed& last_accessed, const Key& key) noexcept { + if (!last_accessed._is_valid) return false; + return last_accessed._key_equal(key, last_accessed.key()); + } + + /// \copydoc operator==(const LastAccessed&,const Key&) + friend bool + operator==(const Key& key, const LastAccessed& last_accessed) noexcept { + return last_accessed == key; + } + + /// Compares a `LastAccessed` object for equality with an iterator. + /// + /// \param last_accessed The `LastAccessed` instance to compare. + /// \param iterator The iterator to compare with. + /// \returns True if the `LastAccessed` object's key equals that of the + /// iterator, else false. + template > + friend bool + operator==(const LastAccessed& last_accessed, Iterator iterator) noexcept { + /// Fast comparisons to an iterator (not relying on implicit conversion) + return last_accessed == iterator->first; + } + + /// \copydoc operator==(const LastAccessed&,Iterator) + template > + friend bool + operator==(Iterator iterator, const LastAccessed& last_accessed) noexcept { + return last_accessed == iterator; + } + + /// Compares a `LastAccessed` object for inequality with something. + /// + /// \param last_accessed The `LastAccessed` instance to compare. + /// \param other Something else to compare to. + /// \returns True if the key of the `LastAccessed` object's key does not equal + /// the given other object's key, else false. + template + friend bool + operator!=(const LastAccessed& last_accessed, const T& other) noexcept { + return !(last_accessed == other); + } + + /// \copydoc operator!=(const LastAccessed&,const T&) + template + friend bool + operator!=(const T& other, const LastAccessed& last_accessed) noexcept { + return !(other == last_accessed); + } + + /// \returns The last accessed key. + Key& key() noexcept { + assert(is_valid()); + return *_key; + } + + /// \returns The last accessed key. + const Key& key() const noexcept { + assert(is_valid()); + return *_key; + } + + /// \returns The last accessed information. + InformationType& information() noexcept { + assert(is_valid()); + return *_information; + } + + /// \returns The last accessed information. + const InformationType& information() const noexcept { + assert(is_valid()); + return *_information; + } + + /// \returns The last accessed information. + auto& iterator() noexcept { + assert(is_valid()); + return _information->order; + } + + /// \returns The last accessed value. + auto& value() noexcept { + assert(is_valid()); + return _information->value; + } + + /// \returns The last accessed value. + const auto& value() const noexcept { + assert(is_valid()); + return _information->value; + } + + /// \returns True if the key and information of the instance may be accessed, + /// else false. + bool is_valid() const noexcept { + return _is_valid; + } + + /// \copydoc is_valid() + explicit operator bool() const noexcept { + return is_valid(); + } + + /// Invalidates the instance. + void invalidate() noexcept { + _is_valid = false; + _key = nullptr; + _information = nullptr; + } + + /// \returns The key comparison function used. + const KeyEqual& key_equal() const noexcept { + return _key_equal; + } + + private: + /// A pointer to the key that was last accessed (if any). + Key* _key; + + /// A pointer to the information that was last accessed (if any). + InformationType* _information; + + /// True if the key and information pointers are valid, else false. + bool _is_valid; + + /// The function used to compare keys. + KeyEqual _key_equal; +}; +} // namespace Internal +} // namespace LRU + +#endif // LRU_INTERNAL_LAST_ACCESSED_HPP diff --git a/include/lru/internal/optional.hpp b/include/lru/internal/optional.hpp new file mode 100644 index 0000000..8336d3b --- /dev/null +++ b/include/lru/internal/optional.hpp @@ -0,0 +1,207 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_OPTIONAL_HPP +#define LRU_INTERNAL_OPTIONAL_HPP + +#ifndef __has_include +#define USE_LRU_OPTIONAL +#elif __has_include() + +#include + +namespace LRU { +namespace Internal { +template +using Optional = std::optional; +} // namespace Internal +} // namespace LRU + +#else +#define USE_LRU_OPTIONAL +#endif + +#ifdef USE_LRU_OPTIONAL +#include +#include + +namespace LRU { +namespace Internal { + +// A roll-your-own replacement of `std::optional`. +// +// This class is only to be used if `std::optional` is unavailable. It +// implements an optional type simply on top of a `unique_ptr`. It is +// API-compatible with `std::optional`, as required for our purposes. +template +class Optional { + public: + /// Constructor. + Optional() = default; + + /// Copy constructor. + /// + /// \param other The other optional object to copy from. + Optional(const Optional& other) { + if (other) emplace(*other); + } + + /// Generalized copy constructor. + /// + /// \param other The other optional object to copy from. + template ::value>> + Optional(const Optional& other) { + if (other) emplace(*other); + } + + /// Move constructor. + /// + /// \param other The other optional object to move into this one. + Optional(Optional&& other) noexcept { + swap(other); + } + + /// Generalized move constructor. + /// + /// \param other The other optional object to move into this one. + template ::value>> + Optional(Optional&& other) noexcept { + if (other) { + _value = std::make_unique(std::move(*other)); + } + } + + /// Assignment operator. + /// + /// \param other The other object to assign from. + /// \returns The resulting optional instance. + Optional& operator=(Optional other) noexcept { + swap(other); + return *this; + } + + /// Swaps the contents of this optional with another one. + /// + /// \param other The other optional to swap with. + void swap(Optional& other) { + _value.swap(other._value); + } + + /// Swaps the contents of two optionals. + /// + /// \param first The first optional to swap. + /// \param second The second optional to swap. + friend void swap(Optional& first, Optional& second) /* NOLINT */ { + first.swap(second); + } + + /// \returns True if the `Optional` has a value, else false. + bool has_value() const noexcept { + return static_cast(_value); + } + + /// \copydoc has_value() + explicit operator bool() const noexcept { + return has_value(); + } + + /// \returns A pointer to the current value. Behavior is undefined if the + /// optional has no value. + T* operator->() { + return _value.get(); + } + + /// \returns A const pointer to the current value. Behavior is undefined if + /// the `Optional` has no value. + const T* operator->() const { + return _value.get(); + } + + /// \returns A const reference to the current value. Behavior is undefined if + /// the `Optional` has no value. + const T& operator*() const { + return *_value; + } + + /// \returns A reference to the current value. Behavior is undefined if + /// the `Optional` has no value. + T& operator*() { + return *_value; + } + + /// \returns A reference to the current value. + /// \throws std::runtime_error If the `Optional` currently has no value. + T& value() { + if (!has_value()) { + // Actually std::bad_optional_access + throw std::runtime_error("optional has no value"); + } + + return *_value; + } + + /// \returns A const reference to the current value. + /// \throws std::runtime_error If the `Optional` currently has no value. + const T& value() const { + if (!has_value()) { + // Actually std::bad_optional_access + throw std::runtime_error("optional has no value"); + } + + return *_value; + } + + /// \returns The current value, or the given argument if there is no value. + /// \param default_value The value to return if this `Optional` currently has + /// no value. + template + T value_or(U&& default_value) const { + return *this ? **this : static_cast(std::forward(default_value)); + } + + /// Resets the `Optional` to have no value. + void reset() noexcept { + _value.reset(); + } + + /// Constructs the `Optional`'s value with the given arguments. + /// + /// \param args Arguments to perfeclty forward to the value's constructor. + template + void emplace(Args&&... args) { + _value = std::make_unique(std::forward(args)...); + } + + private: + template + friend class Optional; + + /// The value, as we implement it. + std::unique_ptr _value; +}; +} // namespace Internal +} // namespace LRU + +#endif + +#endif // LRU_INTERNAL_OPTIONAL_HPP diff --git a/include/lru/internal/statistics-mutator.hpp b/include/lru/internal/statistics-mutator.hpp new file mode 100644 index 0000000..4dfd52a --- /dev/null +++ b/include/lru/internal/statistics-mutator.hpp @@ -0,0 +1,150 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_STATISTICS_MUTATOR_HPP +#define LRU_STATISTICS_MUTATOR_HPP + +#include +#include +#include +#include + +#include +#include + +namespace LRU { +namespace Internal { + +/// A mutable proxy interface to a statistics object. +/// +/// The `StatisticsMutator` allows modification of the members of a statistics +/// object via a narrow interface, available only to internal classes. The point +/// of this is that while we don't want the user to be able to modify the hit or +/// miss count on a statistics object (it is "getter-only" in that sense), it's +/// also not ideal, from an encapsulation standpoint, to make the cache classes +/// (which do need to access and modify the hit and miss counts) friends of the +/// statistics. This is especially true since the caches should only need to +/// register hits or misses and not have to increment the count of total +/// accesses. As such, we really require a "package-level" interface that is not +/// visible to the end user, while at the same time providing an interface to +/// internal classes. The `StatisticsMutator` is a proxy/adapter class that +/// serves exactly this purpose. It is friends with the `Statistics` and can +/// thus access its members. At the same time the interface it defines is narrow +/// and provides only the necessary interface for the cache classes to register +/// hits and misses. +template +class StatisticsMutator { + public: + using StatisticsPointer = std::shared_ptr>; + + /// Constructor. + StatisticsMutator() noexcept = default; + + /// Constructor. + /// + /// \param stats A shared pointer lvalue reference. + StatisticsMutator(const StatisticsPointer& stats) // NOLINT(runtime/explicit) + : _stats(stats) { + } + + /// Constructor. + /// + /// \param stats A shared pointer rvalue reference to move into the + /// mutator. + StatisticsMutator(StatisticsPointer&& stats) // NOLINT(runtime/explicit) + : _stats(std::move(stats)) { + } + + /// Registers a hit for the given key with the internal statistics. + /// + /// \param key The key to register a hit for. + void register_hit(const Key& key) { + assert(has_stats()); + + _stats->_total_accesses += 1; + _stats->_total_hits += 1; + + auto iterator = _stats->_key_map.find(key); + if (iterator != _stats->_key_map.end()) { + iterator->second.hits += 1; + } + } + + /// Registers a miss for the given key with the internal statistics. + /// + /// \param key The key to register a miss for. + void register_miss(const Key& key) { + assert(has_stats()); + + _stats->_total_accesses += 1; + + auto iterator = _stats->_key_map.find(key); + if (iterator != _stats->_key_map.end()) { + iterator->second.misses += 1; + } + } + + /// \returns A reference to the statistics object. + Statistics& get() noexcept { + assert(has_stats()); + return *_stats; + } + + /// \returns A const reference to the statistics object. + const Statistics& get() const noexcept { + assert(has_stats()); + return *_stats; + } + + /// \returns A `shared_ptr` to the statistics object. + StatisticsPointer& shared() noexcept { + return _stats; + } + + /// \returns A const `shared_ptr` to the statistics object. + const StatisticsPointer& shared() const noexcept { + return _stats; + } + + /// \returns True if the mutator has a statistics object, else false. + bool has_stats() const noexcept { + return _stats != nullptr; + } + + /// \copydoc has_stats() + explicit operator bool() const noexcept { + return has_stats(); + } + + /// Resets the internal statistics pointer. + void reset() { + _stats.reset(); + } + + private: + /// A shared pointer to a statistics object. + std::shared_ptr> _stats; +}; + +} // namespace Internal +} // namespace LRU + +#endif // LRU_STATISTICS_MUTATOR_HPP diff --git a/include/lru/internal/timed-information.hpp b/include/lru/internal/timed-information.hpp new file mode 100644 index 0000000..72865a6 --- /dev/null +++ b/include/lru/internal/timed-information.hpp @@ -0,0 +1,116 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_INTERNAL_TIMED_INFORMATION_HPP +#define LRU_INTERNAL_TIMED_INFORMATION_HPP + +#include +#include +#include + +#include +#include +#include + +namespace LRU { +namespace Internal { + +/// The information object for timed caches. +/// +/// TimedInformation differs from plain information only in that it stores the +/// creation time, to know when a key has expired. +/// +/// \tparam Key The key type of the information. +/// \tparam Value The value type of the information. +template +struct TimedInformation : public Information { + using super = Information; + using typename super::QueueIterator; + using Timestamp = Internal::Timestamp; + + /// Constructor. + /// + /// \param value_ The value for the information. + /// \param insertion_time_ The insertion timestamp of the key. + /// \param order_ The order iterator for the information. + TimedInformation(const Value& value_, + const Timestamp& insertion_time_, + QueueIterator order_ = QueueIterator()) + : super(value_, order_), insertion_time(insertion_time_) { + } + + /// Constructor. + /// + /// Uses the current time as the insertion timestamp. + /// + /// \param value_ The value for the information. + /// \param order_ The order iterator for the information. + explicit TimedInformation(const Value& value_, + QueueIterator order_ = QueueIterator()) + : TimedInformation(value_, Internal::Clock::now(), order_) { + } + + /// \copydoc Information::Information(QueueIterator,ValueArguments&&) + template + TimedInformation(QueueIterator order_, ValueArguments&&... value_argument) + : super(std::forward(value_argument)..., order_) + , insertion_time(Internal::Clock::now()) { + } + + /// \copydoc Information::Information(QueueIterator,const + /// std::tuple&) + template + explicit TimedInformation( + const std::tuple& value_arguments, + QueueIterator order_ = QueueIterator()) + : super(value_arguments, order_), insertion_time(Internal::Clock::now()) { + } + + /// Compares this timed information for equality with another one. + /// + /// Additionally to key and value equality, the timed information requires + /// that the insertion timestamps be equal. + /// + /// \param other The other timed information. + /// \returns True if this information equals the other one, else false. + bool operator==(const TimedInformation& other) const noexcept { + if (super::operator!=(other)) return false; + return this->insertion_time == other.insertion_time; + } + + /// Compares this timed information for inequality with another one. + /// + /// \param other The other timed information. + /// \returns True if this information does not equal the other one, else + /// false. + /// \see operator==() + bool operator!=(const TimedInformation& other) const noexcept { + return !(*this == other); + } + + /// The time at which the key of the information was insterted into a cache. + const Timestamp insertion_time; +}; + +} // namespace Internal +} // namespace LRU + +#endif // LRU_INTERNAL_TIMED_INFORMATION_HPP diff --git a/include/lru/internal/utility.hpp b/include/lru/internal/utility.hpp new file mode 100644 index 0000000..2be5a8d --- /dev/null +++ b/include/lru/internal/utility.hpp @@ -0,0 +1,178 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_UTILITY_HPP +#define LRU_UTILITY_HPP + +#include +#include +#include +#include + +namespace LRU { +namespace Internal { + +/// Generates an index sequence for a tuple. +/// +/// \tparam Ts The types of the tuple (to deduce the size). +template +constexpr auto tuple_indices(const std::tuple&) { + return std::make_index_sequence(); +} + +/// Applies (in the functional sense) a tuple to the constructor of a class. +/// +/// \tparam T The type to construct. +/// \tparam Indices The indices into the tuple (generated from an index +/// sequence). +/// \param args The tuple of arguments to construct the object with. +template +constexpr T construct_from_tuple(const std::tuple& arguments, + std::index_sequence) { + return T(std::forward(std::get(arguments))...); +} + +/// Applies (in the functional sense) a tuple to the constructor of a class. +/// +/// \tparam T The type to construct. +/// \param args The tuple of arguments to construct the object with. +template +constexpr T construct_from_tuple(const std::tuple& args) { + return construct_from_tuple(args, tuple_indices(args)); +} + +/// Applies (in the functional sense) a tuple to the constructor of a class. +/// +/// \tparam T The type to construct. +/// \param args The tuple of arguments to construct the object with. +template +constexpr T construct_from_tuple(std::tuple&& args) { + return construct_from_tuple(std::move(args), tuple_indices(args)); +} + +/// A type trait that disables a template overload if a type is not an iterator. +/// +/// \tparam T the type to check. +template +using enable_if_iterator = typename std::iterator_traits::value_type; + +/// A type trait that disables a template overload if a type is not a range. +/// +/// \tparam T the type to check. +template +using enable_if_range = std::pair().begin()), + decltype(std::declval().end())>; + +/// A type trait that disables a template overload if a type is not an iterator +/// over a pair. +/// +/// \tparam T the type to check. +template +using enable_if_iterator_over_pair = + std::pair::value_type::first_type, + typename std::iterator_traits::value_type::first_type>; + + +/// A type trait that disables a template overload if a type is not convertible +/// to a target type. +/// +/// \tparam Target The type one wants to check against. +/// \tparam T The type to check. +template +using enable_if_same = std::enable_if_t::value>; + +/// Base case for `static_all_of` (the neutral element of AND is true). +constexpr bool static_all_of() noexcept { + return true; +} + +/// Checks if all the given parameters evaluate to true. +/// +/// \param head The first expression to check. +/// \param tail The remaining expression to check. +template +constexpr bool static_all_of(Head&& head, Tail&&... tail) { + // Replace with (ts && ...) when the time is right + return std::forward(head) && static_all_of(std::forward(tail)...); +} + +/// Base case for `static_any_of` (the neutral element of OR is false). +constexpr bool static_any_of() noexcept { + return false; +} + +/// Checks if any the given parameters evaluate to true. +/// +/// \param head The first expression to check. +/// \param tail The remaining expression to check. +/// \returns True if any of the given parameters evaluate to true. +template +constexpr bool static_any_of(Head&& head, Tail&&... tail) { + // Replace with (ts || ...) when the time is right + return std::forward(head) || static_any_of(std::forward(tail)...); +} + +/// Checks if none the given parameters evaluate to true. +/// +/// \param ts The expressions to check. +/// \returns True if any of the given parameters evaluate to true. +template +constexpr bool static_none_of(Ts&&... ts) { + // Replace with (!ts && ...) when the time is right + return !static_any_of(std::forward(ts)...); +} + +/// Checks if all the given types are convertible to the first type. +/// +/// \tparam T the first type. +/// \tparam Ts The types to check against the first. +template +constexpr bool + all_of_type = static_all_of(std::is_convertible::value...); + +/// Checks if none of the given types are convertible to the first type. +/// +/// \tparam T the first type. +/// \tparam Ts The types to check against the first. +template +constexpr bool + none_of_type = static_none_of(std::is_convertible::value...); + +/// Base case for `for_each`. +template +void for_each(Function) noexcept { +} + +/// Calls a function for each of the given variadic arguments. +/// +/// \param function The function to call for each argument. +/// \param head The first value to call the function with. +/// \param tail The remaining values to call the function with. +template +void for_each(Function function, Head&& head, Tail&&... tail) { + function(std::forward(head)); + for_each(function, std::forward(tail)...); +} + +} // namespace Internal +} // namespace LRU + +#endif // LRU_UTILITY_HPP diff --git a/include/lru/iterator-tags.hpp b/include/lru/iterator-tags.hpp new file mode 100644 index 0000000..d4115ce --- /dev/null +++ b/include/lru/iterator-tags.hpp @@ -0,0 +1,40 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_ITERATOR_TAGS_HPP +#define LRU_ITERATOR_TAGS_HPP + +namespace LRU { +namespace Tag { +struct OrderedIterator {}; +struct UnorderedIterator {}; +} // namespace Tag + +namespace Lowercase { +namespace tag { +using ordered_iterator = ::LRU::Tag::OrderedIterator; +using unordered_iterator = ::LRU::Tag::UnorderedIterator; +} // namespace tag +} // namespace Lowercase + +} // namespace LRU + +#endif // LRU_ITERATOR_TAGS_HPP diff --git a/include/lru/key-statistics.hpp b/include/lru/key-statistics.hpp new file mode 100644 index 0000000..6989db0 --- /dev/null +++ b/include/lru/key-statistics.hpp @@ -0,0 +1,67 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + + +#ifndef LRU_KEY_STATISTICS_HPP +#define LRU_KEY_STATISTICS_HPP + +#include + +namespace LRU { + +/// Stores statistics for a single key. +/// +/// The statistics stored are the total number of hits and the total number of +/// misses. The total number of acccesses (the sum of hits and misses) may be +/// accessed as well. +struct KeyStatistics { + using size_t = std::size_t; + + /// Constructor. + /// + /// \param hits_ The initial number of hits for the key. + /// \param misses_ The initial number of misses for the key. + explicit KeyStatistics(size_t hits_ = 0, size_t misses_ = 0) + : hits(hits_), misses(misses_) { + } + + /// \returns The total number of accesses made for the key. + /// \details This is the sum of the hits and misses. + size_t accesses() const noexcept { + return hits + misses; + } + + /// Resets the statistics for a key (sets them to zero). + void reset() { + hits = 0; + misses = 0; + } + + /// The number of hits for the key. + size_t hits; + + /// The number of misses for the key. + size_t misses; +}; + +} // namespace LRU + +#endif // LRU_KEY_STATISTICS_HPP diff --git a/include/lru/lowercase.hpp b/include/lru/lowercase.hpp new file mode 100644 index 0000000..1f0c996 --- /dev/null +++ b/include/lru/lowercase.hpp @@ -0,0 +1,34 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_LOWERCASE_HPP +#define LRU_LOWERCASE_HPP + +#include + +namespace LRU { +using namespace Lowercase; // NOLINT(build/namespaces) +} // namespace LRU + +namespace lru = LRU; + + +#endif // LRU_LOWERCASE_HPP diff --git a/include/lru/lru.hpp b/include/lru/lru.hpp new file mode 100644 index 0000000..f3acae0 --- /dev/null +++ b/include/lru/lru.hpp @@ -0,0 +1,33 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_HPP +#define LRU_HPP + +#include +#include +#include +#include +#include +#include +#include + +#endif // LRU_HPP diff --git a/include/lru/statistics.hpp b/include/lru/statistics.hpp new file mode 100644 index 0000000..0c97016 --- /dev/null +++ b/include/lru/statistics.hpp @@ -0,0 +1,256 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + + +#ifndef LRU_STATISTICS_HPP +#define LRU_STATISTICS_HPP + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace LRU { +namespace Internal { +template +class StatisticsMutator; +} + +/// Stores statistics about LRU cache utilization and efficiency. +/// +/// The statistics object stores the number of misses and hits were recorded for +/// a cache in total. Furthemore, it is possibly to register a number of keys +/// for *monitoring*. For each of these keys, an additional hit and miss count +/// is maintained, that can keep insight into the utiliization of a particular +/// cache. Note that accesses only mean lookups -- insertions or erasures will +/// never signify an "access". +/// +/// \tparam Key The type of the keys being monitored. +template +class Statistics { + public: + using size_t = std::size_t; + using InitializerList = std::initializer_list; + + /// Constructor. + Statistics() noexcept : _total_accesses(0), _total_hits(0) { + } + + /// Constructor. + /// + /// \param keys Any number of keys to monitor. + template >> + explicit Statistics(Keys&&... keys) : Statistics() { + // clang-format off + Internal::for_each([this](auto&& key) { + this->monitor(std::forward(key)); + }, std::forward(keys)...); + // clang-format on + } + + /// Constructor. + /// + /// \param range A range of keys to monitor. + template > + explicit Statistics(const Range& range) + : Statistics(std::begin(range), std::end(range)) { + } + + /// Constructor. + /// + /// \param begin The start iterator of a range of keys to monitor. + /// \param end The end iterator of a range of keys to monitor. + template > + Statistics(Iterator begin, Iterator end) : Statistics() { + for (; begin != end; ++begin) { + monitor(*begin); + } + } + + /// Constructor. + /// + /// \param list A list of keys to monitor. + Statistics(InitializerList list) // NOLINT(runtime/explicit) + : Statistics(list.begin(), list.end()) { + } + + /// \returns The total number of accesses (hits + misses) made to the cache. + size_t total_accesses() const noexcept { + return _total_accesses; + } + + /// \returns The total number of hits made to the cache. + size_t total_hits() const noexcept { + return _total_hits; + } + + /// \returns The total number of misses made to the cache. + size_t total_misses() const noexcept { + return total_accesses() - total_hits(); + } + + /// \returns The ratio of hits ($\in [0, 1]$) relative to all accesses. + double hit_rate() const noexcept { + return static_cast(total_hits()) / total_accesses(); + } + + /// \returns The ratio of misses ($\in [0, 1]$) relative to all accesses. + double miss_rate() const noexcept { + return 1 - hit_rate(); + } + + /// \returns The number of hits for the given key. + /// \param key The key to retrieve the hits for. + /// \throws LRU::UnmonitoredKey if the key was not registered for monitoring. + size_t hits_for(const Key& key) const { + return stats_for(key).hits; + } + + /// \returns The number of misses for the given key. + /// \param key The key to retrieve the misses for. + /// \throws LRU::UnmonitoredKey if the key was not registered for monitoring. + size_t misses_for(const Key& key) const { + return stats_for(key).misses; + } + + /// \returns The number of accesses (hits + misses) for the given key. + /// \param key The key to retrieve the accesses for. + /// \throws LRU::UnmonitoredKey if the key was not registered for monitoring. + size_t accesses_for(const Key& key) const { + return stats_for(key).accesses(); + } + + /// \returns A `KeyStatistics` object for the given key. + /// \param key The key to retrieve the stats for. + /// \throws LRU::UnmonitoredKey if the key was not registered for monitoring. + const KeyStatistics& stats_for(const Key& key) const { + auto iterator = _key_map.find(key); + if (iterator == _key_map.end()) { + throw LRU::Error::UnmonitoredKey(); + } + + return iterator->second; + } + + /// \copydoc stats_for() + const KeyStatistics& operator[](const Key& key) const { + return stats_for(key); + } + + /// Registers the key for monitoring. + /// + /// If the key was already registered, this is a no-op (most importantly, the + /// old statistics are __not__ wiped). + /// + /// \param key The key to register. + void monitor(const Key& key) { + // emplace does nothing if the key is already present + _key_map.emplace(key, KeyStatistics()); + } + + /// Unregisters the given key from monitoring. + /// + /// \param key The key to unregister. + /// \throws LRU::Error::UnmonitoredKey if the key was never registered for + /// monitoring. + void unmonitor(const Key& key) { + auto iterator = _key_map.find(key); + if (iterator == _key_map.end()) { + throw LRU::Error::UnmonitoredKey(); + } else { + _key_map.erase(iterator); + } + } + + /// Unregisters all keys from monitoring. + void unmonitor_all() { + _key_map.clear(); + } + + /// Clears all statistics for the given key, but keeps on monitoring it. + /// + /// \param key The key to reset. + void reset_key(const Key& key) { + auto iterator = _key_map.find(key); + if (iterator == _key_map.end()) { + throw LRU::Error::UnmonitoredKey(); + } else { + iterator->second.reset(); + } + } + + /// Clears the statistics of all keys, but keeps on monitoring it them. + void reset_all() { + for (auto& pair : _key_map) { + _key_map.second.reset(); + } + } + + /// \returns True if the given key is currently registered for monitoring, + /// else false. + /// \param key The key to check for. + bool is_monitoring(const Key& key) const noexcept { + return _key_map.count(key); + } + + /// \returns The number of keys currnetly being monitored. + size_t number_of_monitored_keys() const noexcept { + return _key_map.size(); + } + + /// \returns True if currently any keys at all are being monitored, else + /// false. + bool is_monitoring_keys() const noexcept { + return !_key_map.empty(); + } + + private: + template + friend class Internal::StatisticsMutator; + + using HitMap = std::unordered_map; + + /// The total number of accesses made for any key. + size_t _total_accesses; + + /// The total number of htis made for any key. + size_t _total_hits; + + /// The map to keep track of statistics for monitored keys. + HitMap _key_map; +}; + +namespace Lowercase { +template +using statistics = Statistics; +} // namespace Lowercase + +} // namespace LRU + +#endif // LRU_STATISTICS_HPP diff --git a/include/lru/timed-cache.hpp b/include/lru/timed-cache.hpp new file mode 100644 index 0000000..0038932 --- /dev/null +++ b/include/lru/timed-cache.hpp @@ -0,0 +1,391 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_TIMED_CACHE_HPP +#define LRU_TIMED_CACHE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace LRU { +namespace Internal { +template +using TimedCacheBase = BaseCache; +} // namespace Internal + + +/// A timed LRU cache. +/// +/// A timed LRU cache behaves like a regular LRU cache, but adds the concept of +/// "expiration". The cache now not only remembers the order of insertion, but +/// also the point in time at which each element was inserted into the cache. +/// The cache then has an additional "time to live" property, which designates +/// the time after which a key in the cache is said to be "expired". Once a key +/// has expired, the cache will behave as if the key were not present in the +/// cache at all and, for example, return false on calls to `contains()` or +/// throw on calls to `lookup()`. +/// +/// \see LRU::Cache +template , + typename HashFunction = std::hash, + typename KeyEqual = std::equal_to> +class TimedCache + : public Internal::TimedCacheBase { + private: + using super = Internal::TimedCacheBase; + using PRIVATE_BASE_CACHE_MEMBERS; + + public: + using Tag = LRU::Tag::TimedCache; + using PUBLIC_BASE_CACHE_MEMBERS; + using super::ordered_end; + using super::unordered_end; + using typename super::size_t; + + /// \param time_to_live The time to live for keys in the cache. + /// \copydoc BaseCache::BaseCache(size_t,const HashFunction&,const KeyEqual&) + template + explicit TimedCache(const AnyDurationType& time_to_live, + size_t capacity = Internal::DEFAULT_CAPACITY, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(capacity, hash, equal) + , _time_to_live(std::chrono::duration_cast(time_to_live)) { + } + + /// \param time_to_live The time to live for keys in the cache. + /// \copydoc BaseCache::BaseCache(size_t,Iterator,Iterator,const + /// HashFunction&,const + /// KeyEqual&) + template + TimedCache(const AnyDurationType& time_to_live, + size_t capacity, + Iterator begin, + Iterator end, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(capacity, begin, end, hash, equal) + , _time_to_live(std::chrono::duration_cast(time_to_live)) { + } + + /// \param time_to_live The time to live for keys in the cache. + /// \copydoc BaseCache::BaseCache(Iterator,Iterator,const HashFunction&,const + /// KeyEqual&) + template + TimedCache(const AnyDurationType& time_to_live, + Iterator begin, + Iterator end, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(begin, end, hash, equal) + , _time_to_live(std::chrono::duration_cast(time_to_live)) { + } + + /// \param time_to_live The time to live for keys in the cache. + /// \copydoc BaseCache::BaseCache(Range,size_t,const HashFunction&,const + /// KeyEqual&) + template > + TimedCache(const AnyDurationType& time_to_live, + size_t capacity, + Range&& range, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(capacity, std::forward(range), hash, equal) + , _time_to_live(std::chrono::duration_cast(time_to_live)) { + } + + /// \param time_to_live The time to live for keys in the cache. + /// \copydoc BaseCache::BaseCache(Range,const HashFunction&,const + /// KeyEqual&) + template > + explicit TimedCache(const AnyDurationType& time_to_live, + Range&& range, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) + : super(std::forward(range), hash, equal) + , _time_to_live(std::chrono::duration_cast(time_to_live)) { + } + + /// \param time_to_live The time to live for keys in the cache. + /// \copydoc BaseCache::BaseCache(InitializerList,const HashFunction&,const + /// KeyEqual&) + template + TimedCache(const AnyDurationType& time_to_live, + InitializerList list, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) // NOLINT(runtime/explicit) + : super(list, hash, equal), + _time_to_live(std::chrono::duration_cast(time_to_live)) { + } + + /// \param time_to_live The time to live for keys in the cache. + /// \copydoc BaseCache::BaseCache(InitializerList,size_t,const + /// HashFunction&,const + /// KeyEqual&) + template + TimedCache(const AnyDurationType& time_to_live, + size_t capacity, + InitializerList list, + const HashFunction& hash = HashFunction(), + const KeyEqual& equal = KeyEqual()) // NOLINT(runtime/explicit) + : super(capacity, list, hash, equal), + _time_to_live(std::chrono::duration_cast(time_to_live)) { + } + + /// \copydoc BaseCache::swap + void swap(TimedCache& other) noexcept { + using std::swap; + + super::swap(other); + swap(_time_to_live, other._time_to_live); + } + + /// Swaps the contents of one cache with another cache. + /// + /// \param first The first cache to swap. + /// \param second The second cache to swap. + friend void swap(TimedCache& first, TimedCache& second) noexcept { + first.swap(second); + } + + /// \copydoc BaseCache::find(const Key&) + UnorderedIterator find(const Key& key) override { + auto iterator = _map.find(key); + if (iterator != _map.end()) { + if (!_has_expired(iterator->second)) { + _register_hit(key, iterator->second.value); + _move_to_front(iterator->second.order); + _last_accessed = iterator; + return {*this, iterator}; + } + } + + _register_miss(key); + + return end(); + } + + /// \copydoc BaseCache::find(const Key&) const + UnorderedConstIterator find(const Key& key) const override { + auto iterator = _map.find(key); + if (iterator != _map.end()) { + if (!_has_expired(iterator->second)) { + _register_hit(key, iterator->second.value); + _move_to_front(iterator->second.order); + _last_accessed = iterator; + return {*this, iterator}; + } + } + + _register_miss(key); + + return cend(); + } + + // no front() because we may have to erase the + // entire cache if everything happens to be expired + + /// \returns True if all keys in the cache have expired, else false. + bool all_expired() const { + // By the laws of predicate logic, any statement about any empty set is true + if (is_empty()) return true; + + /// If the most-recently inserted key has expired, all others must have too. + auto latest = _map.find(_order.back()); + return _has_expired(latest->second); + } + + /// Erases all expired elements from the cache. + /// + /// \complexity O(N) + /// \returns The number of elements erased. + size_t clear_expired() { + // We have to do a linear search here because linked lists do not + // support O(log N) binary searches given their node-based nature. + // Either way, in the worst case the entire cache has expired and + // we would have to do O(N) erasures. + + if (is_empty()) return 0; + + auto iterator = _order.begin(); + size_t number_of_erasures = 0; + + while (iterator != _order.end()) { + auto map_iterator = _map.find(*iterator); + + // If the current element hasn't expired, also all elements inserted + // after will not have, so we can stop. + if (!_has_expired(map_iterator->second)) break; + + _erase(map_iterator); + + iterator = _order.begin(); + number_of_erasures += 1; + } + + return number_of_erasures; + } + + /// \returns True if the given key is contained in the cache and has expired. + /// \param key The key to test expiration for. + bool has_expired(const Key& key) const noexcept { + auto iterator = _map.find(key); + return iterator != _map.end() && _has_expired(iterator->second); + } + + /// \returns True if the key pointed to by the iterator has expired. + /// \param ordered_iterator The ordered iterator to check. + /// \details If this is the end iterator, this method returns false. + bool has_expired(OrderedConstIterator ordered_iterator) const noexcept { + if (ordered_iterator == ordered_end()) return false; + auto iterator = _map.find(ordered_iterator->key()); + assert(iterator != _map.end()); + + return _has_expired(iterator->second); + } + + /// \returns True if the key pointed to by the iterator has expired. + /// \param unordered_iterator The unordered iterator to check. + /// \details If this is the end iterator, this method returns false. + bool has_expired(UnorderedConstIterator unordered_iterator) const noexcept { + if (unordered_iterator == unordered_end()) return false; + assert(unordered_iterator._iterator != _map.end()); + + return _has_expired(unordered_iterator._iterator->second); + } + + /// \copydoc BaseCache::is_valid(UnorderedConstIterator) + bool is_valid(UnorderedConstIterator unordered_iterator) const + noexcept override { + if (!super::is_valid(unordered_iterator)) return false; + if (has_expired(unordered_iterator)) return false; + return true; + } + + /// \copydoc BaseCache::is_valid(OrderedConstIterator) + bool is_valid(OrderedConstIterator ordered_iterator) const noexcept override { + if (!super::is_valid(ordered_iterator)) return false; + if (has_expired(ordered_iterator)) return false; + return true; + } + + /// \copydoc BaseCache::is_valid(UnorderedConstIterator) + /// \throws LRU::Error::KeyExpired if the key pointed to by the iterator has + /// expired. + void + throw_if_invalid(UnorderedConstIterator unordered_iterator) const override { + super::throw_if_invalid(unordered_iterator); + if (has_expired(unordered_iterator)) { + throw LRU::Error::KeyExpired(); + } + } + + /// \copydoc BaseCache::is_valid(OrderedConstIterator) + /// \throws LRU::Error::KeyExpired if the key pointed to by the iterator has + /// expired. + void throw_if_invalid(OrderedConstIterator ordered_iterator) const override { + super::throw_if_invalid(ordered_iterator); + if (has_expired(ordered_iterator)) { + throw LRU::Error::KeyExpired(); + } + } + + private: + using Clock = Internal::Clock; + + /// \returns True if the last accessed object is valid. + /// \details Next to performing the base cache's action, this method also + /// checks for expiration of the last accessed key. + bool _last_accessed_is_ok(const Key& key) const noexcept override { + if (!super::_last_accessed_is_ok(key)) return false; + return !_has_expired(_last_accessed.information()); + } + + /// \copydoc _value_for_last_accessed() const + Value& _value_for_last_accessed() override { + auto& information = _last_accessed.information(); + if (_has_expired(information)) { + throw LRU::Error::KeyExpired(); + } else { + return information.value; + } + } + + /// Attempts to access the last accessed key's value. + /// \throws LRU::Error::KeyExpired if the key has expired. + /// \returns The value of the last accessed key. + const Value& _value_for_last_accessed() const override { + const auto& information = _last_accessed.information(); + if (_has_expired(information)) { + throw LRU::Error::KeyExpired(); + } else { + return information.value; + } + } + + /// Checks if a key has expired, given its information. + /// + /// \param information The information to check expiration with. + /// \returns True if the key has expired, else false. + bool _has_expired(const Information& information) const noexcept { + auto elapsed = Clock::now() - information.insertion_time; + return std::chrono::duration_cast(elapsed) > _time_to_live; + } + + /// The duration after which a key is said to be expired. + Duration _time_to_live; +}; + +namespace Lowercase { +template +using timed_cache = TimedCache; +} // namespace Lowercase + +} // namespace LRU + +#endif // LRU_TIMED_CACHE_HPP diff --git a/include/lru/wrap.hpp b/include/lru/wrap.hpp new file mode 100644 index 0000000..e384968 --- /dev/null +++ b/include/lru/wrap.hpp @@ -0,0 +1,99 @@ +/// The MIT License (MIT) +/// Copyright (c) 2016 Peter Goldsborough +/// +/// Permission is hereby granted, free of charge, to any person obtaining a copy +/// of this software and associated documentation files (the "Software"), to +/// deal in the Software without restriction, including without limitation the +/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +/// sell copies of the Software, and to permit persons to whom the Software is +/// furnished to do so, subject to the following conditions: +/// +/// The above copyright notice and this permission notice shall be included in +/// all copies or substantial portions of the Software. +/// +/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +/// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +/// IN THE SOFTWARE. + +#ifndef LRU_WRAP_HPP +#define LRU_WRAP_HPP + +#include +#include +#include +#include + +#include +#include +#include + +namespace LRU { + +/// Wraps a function with a "shallow" LRU cache. +/// +/// Given a function, this function will return a new function, where +/// "top-level" calls are cached. With "top-level" or "shallow", we mean +/// that recursive calls to the same function are not cached, since those +/// will call the original function symbol, not the wrapped one. +/// +/// \tparam CacheType The cache template class to use. +/// \param original_function The function to wrap. +/// \param args Any arguments to forward to the cache. +/// \returns A new function with a shallow LRU cache. +template class CacheType = Cache, + typename... Args> +auto wrap(Function original_function, Args&&... args) { + return [ + original_function, + cache_args = std::forward_as_tuple(std::forward(args)...) + ](auto&&... arguments) mutable { + using Arguments = std::tuple...>; + using ReturnType = decltype( + original_function(std::forward(arguments)...)); + + static_assert(!std::is_void::value, + "Return type of wrapped function must not be void"); + + static auto cache = + Internal::construct_from_tuple>( + cache_args); + + auto key = std::make_tuple(arguments...); + auto iterator = cache.find(key); + + if (iterator != cache.end()) { + return iterator->second; + } + + auto value = + original_function(std::forward(arguments)...); + cache.emplace(key, value); + + return value; + }; +} + +/// Wraps a function with a "shallow" LRU timed cache. +/// +/// Given a function, this function will return a new function, where +/// "top-level" calls are cached. With "top-level" or "shallow", we mean +/// that recursive calls to the same function are not cached, since those +/// will call the original function symbol, not the wrapped one. +/// +/// \param original_function The function to wrap. +/// \param args Any arguments to forward to the cache. +/// \returns A new function with a shallow LRU cache. +template +auto timed_wrap(Function original_function, Duration duration, Args&&... args) { + return wrap( + original_function, duration, std::forward(args)...); +} + +} // namespace LRU + +#endif // LRU_WRAP_HPP diff --git a/include/lrucache.hpp b/include/lrucache.hpp new file mode 100644 index 0000000..818d303 --- /dev/null +++ b/include/lrucache.hpp @@ -0,0 +1,74 @@ +/* + * File: lrucache.hpp + * Author: Alexander Ponomarev + * + * Created on June 20, 2013, 5:09 PM + */ + +#ifndef _LRUCACHE_HPP_INCLUDED_ +#define _LRUCACHE_HPP_INCLUDED_ + +//#include +#include "tsl/hopscotch_map.h" +#include +#include +#include + +namespace cache { + + template> +class lru_cache { +public: + typedef typename std::pair key_value_pair_t; + typedef typename std::list::iterator list_iterator_t; + + lru_cache(size_t max_size) : + _max_size(max_size) { + } + + void put(const key_t& key, const value_t& value) { + auto it = _cache_items_map.find(key); + _cache_items_list.push_front(key_value_pair_t(key, value)); + if (it != _cache_items_map.end()) { + _cache_items_list.erase(it->second); + _cache_items_map.erase(it); + } + _cache_items_map[key] = _cache_items_list.begin(); + + if (_cache_items_map.size() > _max_size) { + auto last = _cache_items_list.end(); + last--; + _cache_items_map.erase(last->first); + _cache_items_list.pop_back(); + } + } + + const value_t& get(const key_t& key) { + auto it = _cache_items_map.find(key); + if (it == _cache_items_map.end()) { + throw std::range_error("There is no such key in cache"); + } else { + _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second); + return it->second->second; + } + } + + bool exists(const key_t& key) const { + return _cache_items_map.find(key) != _cache_items_map.end(); + } + + size_t size() const { + return _cache_items_map.size(); + } + +private: + std::list _cache_items_list; + //std::unordered_map _cache_items_map; + tsl::hopscotch_map _cache_items_map; + size_t _max_size; +}; + +} // namespace cache + +#endif /* _LRUCACHE_HPP_INCLUDED_ */ + diff --git a/include/monochromatic_component_iterator.h b/include/monochromatic_component_iterator.h new file mode 100644 index 0000000..139fa5d --- /dev/null +++ b/include/monochromatic_component_iterator.h @@ -0,0 +1,219 @@ +// +// Created by Fatemeh Almodaresi on 6/4/18. +// + +#ifndef MANTIS_MONOCHROME_ITERATOR_H +#define MANTIS_MONOCHROME_ITERATOR_H + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "sparsepp/spp.h" +#include "tsl/sparse_map.h" +#include "sdsl/bit_vectors.hpp" +#include "bitvector.h" +#include "cqf.h" +#include "hashutil.h" +#include "common_types.h" + +namespace dna { + +/////////////// bases ///////////////// + enum base { + C = 0, A = 1, T = 2, G = 3 + }; + + base operator-(base b); // return the complementary base + extern const base bases[4]; + extern const std::map base_from_char; + extern const std::map base_to_char; + +///////////// kmers ///////////////////// + class kmer { + public: + int len; + uint64_t val; + + kmer(void); + + kmer(base b); + + kmer(int l, uint64_t v); + + kmer(std::string s); + + // Convert to string + operator std::string() const; + }; + + bool operator<(kmer a, kmer b); + + bool operator==(kmer a, kmer b); + + bool operator!=(kmer a, kmer b); + +// Return the reverse complement of k + kmer operator-(kmer k); + + kmer canonicalize(kmer k); + +// Return the kmer of length |a| that results from shifting b into a +// from the right + kmer operator<<(kmer a, kmer b); + +// Return the kmer of length |b| that results from shifting a into b +// from the left + kmer operator>>(kmer a, kmer b); + +// Append two kmers + kmer operator+(kmer a, kmer b); + + kmer suffix(kmer k, int len); + + kmer prefix(kmer k, int len); + +// The purpose of this class is to enable us to declare containers +// as holding canonical kmers, e.g. set. Then all +// inserts/queries/etc will automatically canonicalize their +// arguments. + class canonical_kmer : public kmer { + public: + canonical_kmer(void); + + canonical_kmer(base b); + + canonical_kmer(int l, uint64_t v); + + canonical_kmer(std::string s); + + canonical_kmer(kmer k); + }; +} + +struct Mc_stats { + uint64_t nodeCnt = 0; + uint64_t min_dist = -1; // infinity +}; + +typedef dna::canonical_kmer edge; // k-mer +typedef dna::canonical_kmer node; // (k-1)-mer + +struct hash128 { + uint64_t operator()(const __uint128_t &val128) const { + __uint128_t val = val128; + // Using the same seed as we use in k-mer hashing. + return HashUtil::MurmurHash64A((void *) &val, sizeof(__uint128_t), + 2038074743); + } +}; + +struct bvHash128 { + __uint128_t operator()(const sdsl::bit_vector&bv) const { + return HashUtil::MurmurHash128A((void *) bv.data(), + bv.capacity()/8, 2038074743, + 2038074751); + } +}; + + +struct Edge { + uint32_t n1; + uint32_t n2; + + Edge(uint32_t inN1, uint32_t inN2) : n1(inN1), n2(inN2) {} + + bool operator==(const Edge& e) const { + return n1 == e.n1 && n2 == e.n2; + } +}; + +struct edge_hash { + uint64_t operator() (const Edge& e) const { + uint64_t res = e.n1; + return (res << 32) | (uint64_t)e.n2; + } +}; + +class monochromatic_component_iterator { +public: + class work_item { + public: + node curr; + uint64_t idx; + uint64_t colorid; + + work_item(node currin, uint64_t idxin, uint64_t coloridin) : curr(currin), idx(idxin), colorid(coloridin) {} + + bool operator<(const work_item &item2) const { + return (*this).curr < item2.curr; + } + }; + + bool done(); + + void operator++(void); + + Mc_stats operator*(void); + +//monochromatic_component_iterator(const CQF *g); + monochromatic_component_iterator(const CQF *g, + std::vector> &bvin, + uint64_t num_samplesin = 2586); + + void neighborDist(uint64_t cntrr); + void buildEqGraph(uint64_t cntrr); + + void uniqNeighborDist(uint64_t num_samples); + + uint64_t cntr{0}, isolatedCnt{0}; + std::vector withMax0; + //spp::sparse_hash_map<__uint128_t, uint64_t, hash128> eqclass_map; + spp::sparse_hash_map eqclass_map; + std::unordered_map edges; + + +private: + + uint32_t k; + std::queue work; + std::unordered_set visitedKeys; + const CQF *cqf; + CQF::Iterator it; + //BitVectorRRR &bv; + std::vector> &bvs; + uint64_t num_samples; + sdsl::bit_vector visited; + uint16_t distThreshold = 5; + + + bool exists(edge e, uint64_t &idx, uint64_t &eqid); + + std::set neighbors(monochromatic_component_iterator::work_item n); + + work_item front(std::queue &w); + + + uint64_t manhattanDist(uint64_t eq1, uint64_t eq2); + + __uint128_t manhattanDistBvHash(uint64_t eq1, uint64_t eq2, + uint64_t num_samples); + void manhattanDistBvHash(uint64_t eq1, uint64_t eq2, + sdsl::bit_vector& dist, + uint64_t num_samples); + void buildColor(std::vector &eq, uint64_t eqid); + +}; + + +#endif //MANTIS_MONOCHROME_ITERATOR_H diff --git a/include/nonstd/optional.hpp b/include/nonstd/optional.hpp new file mode 100644 index 0000000..b460791 --- /dev/null +++ b/include/nonstd/optional.hpp @@ -0,0 +1,1214 @@ +// +// Copyright (c) 2014-2018 Martin Moene +// +// https://github.com/martinmoene/optional-lite +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef NONSTD_OPTIONAL_LITE_HPP +#define NONSTD_OPTIONAL_LITE_HPP + +#define optional_lite_VERSION "3.0.0" + +// Compiler detection (C++20 is speculative): +// Note: MSVC supports C++14 since it supports C++17. + +#ifdef _MSVC_LANG +# define optional_MSVC_LANG _MSVC_LANG +#else +# define optional_MSVC_LANG 0 +#endif + +#define optional_CPP11 (__cplusplus == 201103L ) +#define optional_CPP11_OR_GREATER (__cplusplus >= 201103L || optional_MSVC_LANG >= 201103L ) +#define optional_CPP14_OR_GREATER (__cplusplus >= 201402L || optional_MSVC_LANG >= 201703L ) +#define optional_CPP17_OR_GREATER (__cplusplus >= 201703L || optional_MSVC_LANG >= 201703L ) +#define optional_CPP20_OR_GREATER (__cplusplus >= 202000L || optional_MSVC_LANG >= 202000L ) + +// use C++17 std::optional if available: + +#if defined( __has_include ) +# define optional_HAS_INCLUDE( arg ) __has_include( arg ) +#else +# define optional_HAS_INCLUDE( arg ) 0 +#endif + +#define optional_HAVE_STD_OPTIONAL ( optional_CPP17_OR_GREATER && optional_HAS_INCLUDE( ) ) + +#if optional_HAVE_STD_OPTIONAL + +#include + +namespace nonstd { + + using std::optional; + using std::bad_optional_access; + using std::hash; + + using std::nullopt; + using std::nullopt_t; + using std::in_place; + using std::in_place_type; + using std::in_place_index; + using std::in_place_t; + using std::in_place_type_t; + using std::in_place_index_t; + + using std::operator==; + using std::operator!=; + using std::operator<; + using std::operator<=; + using std::operator>; + using std::operator>=; + using std::make_optional; + using std::swap; +} + +#else // C++17 std::optional + +#include +#include +#include + +// optional-lite alignment configuration: + +#ifndef optional_CONFIG_MAX_ALIGN_HACK +# define optional_CONFIG_MAX_ALIGN_HACK 0 +#endif + +#ifndef optional_CONFIG_ALIGN_AS +// no default, used in #if defined() +#endif + +#ifndef optional_CONFIG_ALIGN_AS_FALLBACK +# define optional_CONFIG_ALIGN_AS_FALLBACK double +#endif + +// Compiler warning suppression: + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wundef" +#elif defined __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wundef" +#endif + +// half-open range [lo..hi): +#define optional_BETWEEN( v, lo, hi ) ( lo <= v && v < hi ) + +#if defined(_MSC_VER) && !defined(__clang__) +# define optional_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900)) ) +#else +# define optional_COMPILER_MSVC_VERSION 0 +#endif + +#define optional_COMPILER_VERSION( major, minor, patch ) ( 10 * (10 * major + minor ) + patch ) + +#if defined __GNUC__ +# define optional_COMPILER_GNUC_VERSION optional_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define optional_COMPILER_GNUC_VERSION 0 +#endif + +#if defined __clang__ +# define optional_COMPILER_CLANG_VERSION optional_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define optional_COMPILER_CLANG_VERSION 0 +#endif + +#if optional_BETWEEN(optional_COMPILER_MSVC_VERSION, 70, 140 ) +# pragma warning( push ) +# pragma warning( disable: 4345 ) // initialization behavior changed +#endif + +#if optional_BETWEEN(optional_COMPILER_MSVC_VERSION, 70, 150 ) +# pragma warning( push ) +# pragma warning( disable: 4814 ) // in C++14 'constexpr' will not imply 'const' +#endif + +// Presence of language and library features: + +#define optional_HAVE(FEATURE) ( optional_HAVE_##FEATURE ) + +// Presence of C++11 language features: + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 100 +# define optional_HAVE_AUTO 1 +# define optional_HAVE_NULLPTR 1 +# define optional_HAVE_STATIC_ASSERT 1 +#endif + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 120 +# define optional_HAVE_DEFAULT_FUNCTION_TEMPLATE_ARG 1 +# define optional_HAVE_INITIALIZER_LIST 1 +#endif + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 140 +# define optional_HAVE_ALIAS_TEMPLATE 1 +# define optional_HAVE_CONSTEXPR_11 1 +# define optional_HAVE_ENUM_CLASS 1 +# define optional_HAVE_EXPLICIT_CONVERSION 1 +# define optional_HAVE_IS_DEFAULT 1 +# define optional_HAVE_IS_DELETE 1 +# define optional_HAVE_NOEXCEPT 1 +# define optional_HAVE_REF_QUALIFIER 1 +#endif + +// Presence of C++14 language features: + +#if optional_CPP14_OR_GREATER +# define optional_HAVE_CONSTEXPR_14 1 +#endif + +// Presence of C++17 language features: + +#if optional_CPP17_OR_GREATER +# define optional_HAVE_ENUM_CLASS_CONSTRUCTION_FROM_UNDERLYING_TYPE 1 +#endif + +// Presence of C++ library features: + +#if optional_COMPILER_GNUC_VERSION +# define optional_HAVE_TR1_TYPE_TRAITS 1 +# define optional_HAVE_TR1_ADD_POINTER 1 +#endif + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 90 +# define optional_HAVE_TYPE_TRAITS 1 +# define optional_HAVE_STD_ADD_POINTER 1 +#endif + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 110 +# define optional_HAVE_ARRAY 1 +#endif + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 120 +# define optional_HAVE_CONDITIONAL 1 +#endif + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 140 || (optional_COMPILER_MSVC_VERSION >= 90 && _HAS_CPP0X) +# define optional_HAVE_CONTAINER_DATA_METHOD 1 +#endif + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 120 +# define optional_HAVE_REMOVE_CV 1 +#endif + +#if optional_CPP11_OR_GREATER || optional_COMPILER_MSVC_VERSION >= 140 +# define optional_HAVE_SIZED_TYPES 1 +#endif + +// For the rest, consider VC14 as C++11 for optional-lite: + +#if optional_COMPILER_MSVC_VERSION >= 140 +# undef optional_CPP11_OR_GREATER +# define optional_CPP11_OR_GREATER 1 +#endif + +// C++ feature usage: + +#if optional_HAVE( CONSTEXPR_11 ) +# define optional_constexpr constexpr +#else +# define optional_constexpr /*constexpr*/ +#endif + +#if optional_HAVE( CONSTEXPR_14 ) +# define optional_constexpr14 constexpr +#else +# define optional_constexpr14 /*constexpr*/ +#endif + +#if optional_HAVE( NOEXCEPT ) +# define optional_noexcept noexcept +#else +# define optional_noexcept /*noexcept*/ +#endif + +#if optional_HAVE( NULLPTR ) +# define optional_nullptr nullptr +#else +# define optional_nullptr NULL +#endif + +#if optional_HAVE( REF_QUALIFIER ) +# define optional_ref_qual & +# define optional_refref_qual && +#else +# define optional_ref_qual /*&*/ +# define optional_refref_qual /*&&*/ +#endif + +// additional includes: + +#if optional_CPP11_OR_GREATER +# include +#endif + +#if optional_HAVE( INITIALIZER_LIST ) +# include +#endif + +#if optional_HAVE( TYPE_TRAITS ) +# include +#elif optional_HAVE( TR1_TYPE_TRAITS ) +# include +#endif + +// type traits needed: + +namespace nonstd { namespace optional_lite { namespace detail { + +#if optional_HAVE( CONDITIONAL ) + using std::conditional; +#else + template< bool B, typename T, typename F > struct conditional { typedef T type; }; + template< typename T, typename F > struct conditional { typedef F type; }; +#endif // optional_HAVE_CONDITIONAL + +}}} + +// +// in_place: code duplicated in any-lite, optional-lite, variant-lite: +// + +#ifndef nonstd_lite_HAVE_IN_PLACE_TYPES + +namespace nonstd { + +namespace detail { + +template< class T > +struct in_place_type_tag {}; + +template< std::size_t I > +struct in_place_index_tag {}; + +} // namespace detail + +struct in_place_t {}; + +template< class T > +inline in_place_t in_place( detail::in_place_type_tag = detail::in_place_type_tag() ) +{ + return in_place_t(); +} + +template< std::size_t I > +inline in_place_t in_place( detail::in_place_index_tag = detail::in_place_index_tag() ) +{ + return in_place_t(); +} + +template< class T > +inline in_place_t in_place_type( detail::in_place_type_tag = detail::in_place_type_tag() ) +{ + return in_place_t(); +} + +template< std::size_t I > +inline in_place_t in_place_index( detail::in_place_index_tag = detail::in_place_index_tag() ) +{ + return in_place_t(); +} + +// mimic templated typedef: + +#define nonstd_lite_in_place_type_t( T) nonstd::in_place_t(&)( nonstd::detail::in_place_type_tag ) +#define nonstd_lite_in_place_index_t(T) nonstd::in_place_t(&)( nonstd::detail::in_place_index_tag ) + +#define nonstd_lite_HAVE_IN_PLACE_TYPES 1 + +} // namespace nonstd + +#endif // nonstd_lite_HAVE_IN_PLACE_TYPES + +// +// optional: +// + +namespace nonstd { namespace optional_lite { + +/// class optional + +template< typename T > +class optional; + +namespace detail { + +// C++11 emulation: + +struct nulltype{}; + +template< typename Head, typename Tail > +struct typelist +{ + typedef Head head; + typedef Tail tail; +}; + +#if optional_CONFIG_MAX_ALIGN_HACK + +// Max align, use most restricted type for alignment: + +#define optional_UNIQUE( name ) optional_UNIQUE2( name, __LINE__ ) +#define optional_UNIQUE2( name, line ) optional_UNIQUE3( name, line ) +#define optional_UNIQUE3( name, line ) name ## line + +#define optional_ALIGN_TYPE( type ) \ + type optional_UNIQUE( _t ); struct_t< type > optional_UNIQUE( _st ) + +template< typename T > +struct struct_t { T _; }; + +union max_align_t +{ + optional_ALIGN_TYPE( char ); + optional_ALIGN_TYPE( short int ); + optional_ALIGN_TYPE( int ); + optional_ALIGN_TYPE( long int ); + optional_ALIGN_TYPE( float ); + optional_ALIGN_TYPE( double ); + optional_ALIGN_TYPE( long double ); + optional_ALIGN_TYPE( char * ); + optional_ALIGN_TYPE( short int * ); + optional_ALIGN_TYPE( int * ); + optional_ALIGN_TYPE( long int * ); + optional_ALIGN_TYPE( float * ); + optional_ALIGN_TYPE( double * ); + optional_ALIGN_TYPE( long double * ); + optional_ALIGN_TYPE( void * ); + +#ifdef HAVE_LONG_LONG + optional_ALIGN_TYPE( long long ); +#endif + + struct Unknown; + + Unknown ( * optional_UNIQUE(_) )( Unknown ); + Unknown * Unknown::* optional_UNIQUE(_); + Unknown ( Unknown::* optional_UNIQUE(_) )( Unknown ); + + struct_t< Unknown ( * )( Unknown) > optional_UNIQUE(_); + struct_t< Unknown * Unknown::* > optional_UNIQUE(_); + struct_t< Unknown ( Unknown::* )(Unknown) > optional_UNIQUE(_); +}; + +#undef optional_UNIQUE +#undef optional_UNIQUE2 +#undef optional_UNIQUE3 + +#undef optional_ALIGN_TYPE + +#elif defined( optional_CONFIG_ALIGN_AS ) // optional_CONFIG_MAX_ALIGN_HACK + +// Use user-specified type for alignment: + +#define optional_ALIGN_AS( unused ) \ + optional_CONFIG_ALIGN_AS + +#else // optional_CONFIG_MAX_ALIGN_HACK + +// Determine POD type to use for alignment: + +#define optional_ALIGN_AS( to_align ) \ + typename type_of_size< alignment_types, alignment_of< to_align >::value >::type + +template +struct alignment_of; + +template +struct alignment_of_hack +{ + char c; + T t; + alignment_of_hack(); +}; + +template +struct alignment_logic +{ + enum { value = A < S ? A : S }; +}; + +template< typename T > +struct alignment_of +{ + enum { value = alignment_logic< + sizeof( alignment_of_hack ) - sizeof(T), sizeof(T) >::value, }; +}; + +template< typename List, size_t N > +struct type_of_size +{ + typedef typename conditional< + N == sizeof( typename List::head ), + typename List::head, + typename type_of_size::type >::type type; +}; + +template< size_t N > +struct type_of_size< nulltype, N > +{ + typedef optional_CONFIG_ALIGN_AS_FALLBACK type; +}; + +template< typename T> +struct struct_t { T _; }; + +#define optional_ALIGN_TYPE( type ) \ + typelist< type , typelist< struct_t< type > + +struct Unknown; + +typedef + optional_ALIGN_TYPE( char ), + optional_ALIGN_TYPE( short ), + optional_ALIGN_TYPE( int ), + optional_ALIGN_TYPE( long ), + optional_ALIGN_TYPE( float ), + optional_ALIGN_TYPE( double ), + optional_ALIGN_TYPE( long double ), + + optional_ALIGN_TYPE( char *), + optional_ALIGN_TYPE( short * ), + optional_ALIGN_TYPE( int * ), + optional_ALIGN_TYPE( long * ), + optional_ALIGN_TYPE( float * ), + optional_ALIGN_TYPE( double * ), + optional_ALIGN_TYPE( long double * ), + + optional_ALIGN_TYPE( Unknown ( * )( Unknown ) ), + optional_ALIGN_TYPE( Unknown * Unknown::* ), + optional_ALIGN_TYPE( Unknown ( Unknown::* )( Unknown ) ), + + nulltype + > > > > > > > > > > > > > > + > > > > > > > > > > > > > > + > > > > > > + alignment_types; + +#undef optional_ALIGN_TYPE + +#endif // optional_CONFIG_MAX_ALIGN_HACK + +/// C++03 constructed union to hold value. + +template< typename T > +union storage_t +{ +private: + friend class optional; + + typedef T value_type; + + storage_t() {} + + storage_t( value_type const & v ) + { + construct_value( v ); + } + + void construct_value( value_type const & v ) + { + ::new( value_ptr() ) value_type( v ); + } + +#if optional_CPP11_OR_GREATER + + storage_t( value_type && v ) + { + construct_value( std::move( v ) ); + } + + void construct_value( value_type && v ) + { + ::new( value_ptr() ) value_type( std::move( v ) ); + } + + template< class... Args > + void emplace( Args&&... args ) + { + ::new( value_ptr() ) value_type( std::forward(args)... ); + } + + template< class U, class... Args > + void emplace( std::initializer_list il, Args&&... args ) + { + ::new( value_ptr() ) value_type( il, std::forward(args)... ); + } + +#endif + + void destruct_value() + { + value_ptr()->~T(); + } + + value_type const * value_ptr() const + { + return as(); + } + + value_type * value_ptr() + { + return as(); + } + + value_type const & value() const optional_ref_qual + { + return * value_ptr(); + } + + value_type & value() optional_ref_qual + { + return * value_ptr(); + } + +#if optional_CPP11_OR_GREATER + + value_type const && value() const optional_refref_qual + { + return * value_ptr(); + } + + value_type && value() optional_refref_qual + { + return * value_ptr(); + } + +#endif + +#if optional_CPP11_OR_GREATER + + using aligned_storage_t = typename std::aligned_storage< sizeof(value_type), alignof(value_type) >::type; + aligned_storage_t data; + +#elif optional_CONFIG_MAX_ALIGN_HACK + + typedef struct { unsigned char data[ sizeof(value_type) ]; } aligned_storage_t; + + max_align_t hack; + aligned_storage_t data; + +#else + typedef optional_ALIGN_AS(value_type) align_as_type; + + typedef struct { align_as_type data[ 1 + ( sizeof(value_type) - 1 ) / sizeof(align_as_type) ]; } aligned_storage_t; + aligned_storage_t data; + +# undef optional_ALIGN_AS + +#endif // optional_CONFIG_MAX_ALIGN_HACK + + void * ptr() optional_noexcept + { + return &data; + } + + void const * ptr() const optional_noexcept + { + return &data; + } + + template + U * as() + { + return reinterpret_cast( ptr() ); + } + + template + U const * as() const + { + return reinterpret_cast( ptr() ); + } +}; + +} // namespace detail + +/// disengaged state tag + +struct nullopt_t +{ + struct init{}; + optional_constexpr nullopt_t( init ) {} +}; + +#if optional_HAVE( CONSTEXPR_11 ) +constexpr nullopt_t nullopt{ nullopt_t::init{} }; +#else +// extra parenthesis to prevent the most vexing parse: +const nullopt_t nullopt(( nullopt_t::init() )); +#endif + +/// optional access error + +class bad_optional_access : public std::logic_error +{ +public: + explicit bad_optional_access() + : logic_error( "bad optional access" ) {} +}; + +/// optional + +template< typename T> +class optional +{ +private: + typedef void (optional::*safe_bool)() const; + +public: + typedef T value_type; + + optional_constexpr optional() optional_noexcept + : has_value_( false ) + , contained() + {} + + optional_constexpr optional( nullopt_t ) optional_noexcept + : has_value_( false ) + , contained() + {} + + optional( optional const & rhs ) + : has_value_( rhs.has_value() ) + { + if ( rhs.has_value() ) + contained.construct_value( rhs.contained.value() ); + } + +#if optional_CPP11_OR_GREATER + optional_constexpr14 optional( optional && rhs ) noexcept( std::is_nothrow_move_constructible::value ) + : has_value_( rhs.has_value() ) + { + if ( rhs.has_value() ) + contained.construct_value( std::move( rhs.contained.value() ) ); + } +#endif + + optional_constexpr optional( value_type const & value ) + : has_value_( true ) + , contained( value ) + {} + +#if optional_CPP11_OR_GREATER + + optional_constexpr optional( value_type && value ) + : has_value_( true ) + , contained( std::move( value ) ) + {} + + template< class... Args > + optional_constexpr explicit optional( nonstd_lite_in_place_type_t(T), Args&&... args ) + : has_value_( true ) + , contained( T( std::forward(args)...) ) + {} + + template< class U, class... Args > + optional_constexpr explicit optional( nonstd_lite_in_place_type_t(T), std::initializer_list il, Args&&... args ) + : has_value_( true ) + , contained( T( il, std::forward(args)...) ) + {} + +#endif // optional_CPP11_OR_GREATER + + ~optional() + { + if ( has_value() ) + contained.destruct_value(); + } + + // assignment + + optional & operator=( nullopt_t ) optional_noexcept + { + reset(); + return *this; + } + + optional & operator=( optional const & rhs ) +#if optional_CPP11_OR_GREATER + noexcept( std::is_nothrow_move_assignable::value && std::is_nothrow_move_constructible::value ) +#endif + { + if ( has_value() == true && rhs.has_value() == false ) reset(); + else if ( has_value() == false && rhs.has_value() == true ) initialize( *rhs ); + else if ( has_value() == true && rhs.has_value() == true ) contained.value() = *rhs; + return *this; + } + +#if optional_CPP11_OR_GREATER + + optional & operator=( optional && rhs ) noexcept + { + if ( has_value() == true && rhs.has_value() == false ) reset(); + else if ( has_value() == false && rhs.has_value() == true ) initialize( std::move( *rhs ) ); + else if ( has_value() == true && rhs.has_value() == true ) contained.value() = std::move( *rhs ); + return *this; + } + + template< class U, + typename = typename std::enable_if< std::is_same< typename std::decay::type, T>::value >::type > + optional & operator=( U && v ) + { + if ( has_value() ) contained.value() = std::forward( v ); + else initialize( T( std::forward( v ) ) ); + return *this; + } + + template< class... Args > + void emplace( Args&&... args ) + { + *this = nullopt; + contained.emplace( std::forward(args)... ); + has_value_ = true; + } + + + template< class U, class... Args > + void emplace( std::initializer_list il, Args&&... args ) + { + *this = nullopt; + contained.emplace( il, std::forward(args)... ); + has_value_ = true; + } + +#endif // optional_CPP11_OR_GREATER + + // swap + + void swap( optional & rhs ) +#if optional_CPP11_OR_GREATER + noexcept( std::is_nothrow_move_constructible::value && noexcept( std::swap( std::declval(), std::declval() ) ) ) +#endif + { + using std::swap; + if ( has_value() == true && rhs.has_value() == true ) { swap( **this, *rhs ); } + else if ( has_value() == false && rhs.has_value() == true ) { initialize( *rhs ); rhs.reset(); } + else if ( has_value() == true && rhs.has_value() == false ) { rhs.initialize( **this ); reset(); } + } + + // observers + + optional_constexpr value_type const * operator ->() const + { + return assert( has_value() ), + contained.value_ptr(); + } + + optional_constexpr14 value_type * operator ->() + { + return assert( has_value() ), + contained.value_ptr(); + } + + optional_constexpr value_type const & operator *() const optional_ref_qual + { + return assert( has_value() ), + contained.value(); + } + + optional_constexpr14 value_type & operator *() optional_ref_qual + { + return assert( has_value() ), + contained.value(); + } + +#if optional_CPP11_OR_GREATER + + optional_constexpr value_type const && operator *() const optional_refref_qual + { + return assert( has_value() ), + std::move( contained.value() ); + } + + optional_constexpr14 value_type && operator *() optional_refref_qual + { + return assert( has_value() ), + std::move( contained.value() ); + } + +#endif + +#if optional_CPP11_OR_GREATER + optional_constexpr explicit operator bool() const optional_noexcept + { + return has_value(); + } +#else + optional_constexpr operator safe_bool() const optional_noexcept + { + return has_value() ? &optional::this_type_does_not_support_comparisons : 0; + } +#endif + + optional_constexpr bool has_value() const optional_noexcept + { + return has_value_; + } + + optional_constexpr14 value_type const & value() const optional_ref_qual + { + if ( ! has_value() ) + throw bad_optional_access(); + + return contained.value(); + } + + optional_constexpr14 value_type & value() optional_ref_qual + { + if ( ! has_value() ) + throw bad_optional_access(); + + return contained.value(); + } + +#if optional_HAVE( REF_QUALIFIER ) + + optional_constexpr14 value_type const && value() const optional_refref_qual + { + if ( ! has_value() ) + throw bad_optional_access(); + + return std::move( contained.value() ); + } + + optional_constexpr14 value_type && value() optional_refref_qual + { + if ( ! has_value() ) + throw bad_optional_access(); + + return std::move( contained.value() ); + } + +#endif + +#if optional_CPP11_OR_GREATER + + template< class U > + optional_constexpr value_type value_or( U && v ) const optional_ref_qual + { + return has_value() ? contained.value() : static_cast(std::forward( v ) ); + } + + template< class U > + optional_constexpr value_type value_or( U && v ) const optional_refref_qual + { + return has_value() ? std::move( contained.value() ) : static_cast(std::forward( v ) ); + } + +#else + + template< class U > + optional_constexpr value_type value_or( U const & v ) const + { + return has_value() ? contained.value() : static_cast( v ); + } + +#endif // optional_CPP11_OR_GREATER + + // modifiers + + void reset() optional_noexcept + { + if ( has_value() ) + contained.destruct_value(); + + has_value_ = false; + } + +private: + void this_type_does_not_support_comparisons() const {} + + template< typename V > + void initialize( V const & value ) + { + assert( ! has_value() ); + contained.construct_value( value ); + has_value_ = true; + } + +#if optional_CPP11_OR_GREATER + template< typename V > + void initialize( V && value ) + { + assert( ! has_value() ); + contained.construct_value( std::move( value ) ); + has_value_ = true; + } + +#endif + +private: + bool has_value_; + detail::storage_t< value_type > contained; + +}; + +// Relational operators + +template< typename T, typename U > +inline optional_constexpr bool operator==( optional const & x, optional const & y ) +{ + return bool(x) != bool(y) ? false : !bool( x ) ? true : *x == *y; +} + +template< typename T, typename U > +inline optional_constexpr bool operator!=( optional const & x, optional const & y ) +{ + return !(x == y); +} + +template< typename T, typename U > +inline optional_constexpr bool operator<( optional const & x, optional const & y ) +{ + return (!y) ? false : (!x) ? true : *x < *y; +} + +template< typename T, typename U > +inline optional_constexpr bool operator>( optional const & x, optional const & y ) +{ + return (y < x); +} + +template< typename T, typename U > +inline optional_constexpr bool operator<=( optional const & x, optional const & y ) +{ + return !(y < x); +} + +template< typename T, typename U > +inline optional_constexpr bool operator>=( optional const & x, optional const & y ) +{ + return !(x < y); +} + +// Comparison with nullopt + +template< typename T > +inline optional_constexpr bool operator==( optional const & x, nullopt_t ) optional_noexcept +{ + return (!x); +} + +template< typename T > +inline optional_constexpr bool operator==( nullopt_t, optional const & x ) optional_noexcept +{ + return (!x); +} + +template< typename T > +inline optional_constexpr bool operator!=( optional const & x, nullopt_t ) optional_noexcept +{ + return bool(x); +} + +template< typename T > +inline optional_constexpr bool operator!=( nullopt_t, optional const & x ) optional_noexcept +{ + return bool(x); +} + +template< typename T > +inline optional_constexpr bool operator<( optional const &, nullopt_t ) optional_noexcept +{ + return false; +} + +template< typename T > +inline optional_constexpr bool operator<( nullopt_t, optional const & x ) optional_noexcept +{ + return bool(x); +} + +template< typename T > +inline optional_constexpr bool operator<=( optional const & x, nullopt_t ) optional_noexcept +{ + return (!x); +} + +template< typename T > +inline optional_constexpr bool operator<=( nullopt_t, optional const & ) optional_noexcept +{ + return true; +} + +template< typename T > +inline optional_constexpr bool operator>( optional const & x, nullopt_t ) optional_noexcept +{ + return bool(x); +} + +template< typename T > +inline optional_constexpr bool operator>( nullopt_t, optional const & ) optional_noexcept +{ + return false; +} + +template< typename T > +inline optional_constexpr bool operator>=( optional const &, nullopt_t ) optional_noexcept +{ + return true; +} + +template< typename T > +inline optional_constexpr bool operator>=( nullopt_t, optional const & x ) optional_noexcept +{ + return (!x); +} + +// Comparison with T + +template< typename T, typename U > +inline optional_constexpr bool operator==( optional const & x, U const & v ) +{ + return bool(x) ? *x == v : false; +} + +template< typename T, typename U > +inline optional_constexpr bool operator==( U const & v, optional const & x ) +{ + return bool(x) ? v == *x : false; +} + +template< typename T, typename U > +inline optional_constexpr bool operator!=( optional const & x, U const & v ) +{ + return bool(x) ? *x != v : true; +} + +template< typename T, typename U > +inline optional_constexpr bool operator!=( U const & v, optional const & x ) +{ + return bool(x) ? v != *x : true; +} + +template< typename T, typename U > +inline optional_constexpr bool operator<( optional const & x, U const & v ) +{ + return bool(x) ? *x < v : true; +} + +template< typename T, typename U > +inline optional_constexpr bool operator<( U const & v, optional const & x ) +{ + return bool(x) ? v < *x : false; +} + +template< typename T, typename U > +inline optional_constexpr bool operator<=( optional const & x, U const & v ) +{ + return bool(x) ? *x <= v : true; +} + +template< typename T, typename U > +inline optional_constexpr bool operator<=( U const & v, optional const & x ) +{ + return bool(x) ? v <= *x : false; +} + +template< typename T, typename U > +inline optional_constexpr bool operator>( optional const & x, U const & v ) +{ + return bool(x) ? *x > v : false; +} + +template< typename T, typename U > +inline optional_constexpr bool operator>( U const & v, optional const & x ) +{ + return bool(x) ? v > *x : true; +} + +template< typename T, typename U > +inline optional_constexpr bool operator>=( optional const & x, U const & v ) +{ + return bool(x) ? *x >= v : false; +} + +template< typename T, typename U > +inline optional_constexpr bool operator>=( U const & v, optional const & x ) +{ + return bool(x) ? v >= *x : true; +} + +// Specialized algorithms + +template< typename T > +void swap( optional & x, optional & y ) +#if optional_CPP11_OR_GREATER + noexcept( noexcept( x.swap(y) ) ) +#endif +{ + x.swap( y ); +} + +#if optional_CPP11_OR_GREATER + +template< class T > +optional_constexpr optional< typename std::decay::type > make_optional( T && v ) +{ + return optional< typename std::decay::type >( std::forward( v ) ); +} + +template< class T, class...Args > +optional_constexpr optional make_optional( Args&&... args ) +{ + return optional( in_place, std::forward(args)...); +} + +template< class T, class U, class... Args > +optional_constexpr optional make_optional( std::initializer_list il, Args&&... args ) +{ + return optional( in_place, il, std::forward(args)...); +} + +#else + +template< typename T > +optional make_optional( T const & v ) +{ + return optional( v ); +} + +#endif // optional_CPP11_OR_GREATER + +} // namespace optional + +using namespace optional_lite; + +} // namespace nonstd + +#if optional_CPP11_OR_GREATER + +// specialize the std::hash algorithm: + +namespace std { + +template< class T > +struct hash< nonstd::optional > +{ +public: + std::size_t operator()( nonstd::optional const & v ) const optional_noexcept + { + return bool( v ) ? hash()( *v ) : 0; + } +}; + +} //namespace std + +#endif // optional_CPP11_OR_GREATER + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined __GNUC__ +# pragma GCC diagnostic pop +#endif + +#endif // have C++17 std::optional + +#endif // NONSTD_OPTIONAL_LITE_HPP diff --git a/include/squeakrconfig.h b/include/squeakrconfig.h new file mode 100644 index 0000000..b830e15 --- /dev/null +++ b/include/squeakrconfig.h @@ -0,0 +1,33 @@ +/* + * ============================================================================ + * + * Authors: Prashant Pandey + * Rob Johnson + * Rob Patro (rob.patro@cs.stonybrook.edu) + * + * ============================================================================ + */ + +#ifndef _SQUEAKR_CONFIG_H_ +#define _SQUEAKR_CONFIG_H_ + +#include +#include + +namespace squeakr { + constexpr const uint32_t SQUEAKR_INVALID_VERSION{1}; + constexpr const uint32_t SQUEAKR_INVALID_ENDIAN{2}; + constexpr const uint32_t INDEX_VERSION{2}; + constexpr const uint64_t ENDIANNESS{0x0102030405060708ULL}; + + typedef struct __attribute__ ((__packed__)) squeakrconfig { + uint64_t kmer_size; + uint64_t cutoff; + uint64_t contains_counts; + uint64_t endianness; + uint32_t version; + } squeakrconfig; + + int read_config(std::string squeakr_file, squeakrconfig *config); +} +#endif diff --git a/include/tsl/bhopscotch_map.h b/include/tsl/bhopscotch_map.h new file mode 100644 index 0000000..e5bcbf3 --- /dev/null +++ b/include/tsl/bhopscotch_map.h @@ -0,0 +1,675 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_BHOPSCOTCH_MAP_H +#define TSL_BHOPSCOTCH_MAP_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hopscotch_hash.h" + + +namespace tsl { + + +/** + * Similar to tsl::hopscotch_map but instead of using a list for overflowing elements it uses + * a binary search tree. It thus needs an additional template parameter Compare. Compare should + * be arithmetically coherent with KeyEqual. + * + * The binary search tree allows the map to have a worst-case scenario of O(log n) for search + * and delete, even if the hash function maps all the elements to the same bucket. + * For insert, the amortized worst case is O(log n), but the worst case is O(n) in case of rehash. + * + * This makes the map resistant to DoS attacks (but doesn't preclude you to have a good hash function, + * as an element in the bucket array is faster to retrieve than in the tree). + * + * @copydoc hopscotch_map + */ +template, + class KeyEqual = std::equal_to, + class Compare = std::less, + class Allocator = std::allocator>, + unsigned int NeighborhoodSize = 62, + bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class bhopscotch_map { +private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const std::pair& key_value) const { + return key_value.first; + } + + const key_type& operator()(std::pair& key_value) { + return key_value.first; + } + }; + + class ValueSelect { + public: + using value_type = T; + + const value_type& operator()(const std::pair& key_value) const { + return key_value.second; + } + + value_type& operator()(std::pair& key_value) { + return key_value.second; + } + }; + + + // TODO Not optimal as we have to use std::pair as ValueType which forbid + // us to move the key in the bucket array, we have to use copy. Optimize. + using overflow_container_type = std::map; + using ht = detail_hopscotch_hash::hopscotch_hash, KeySelect, ValueSelect, + Hash, KeyEqual, + Allocator, NeighborhoodSize, + StoreHash, GrowthPolicy, + overflow_container_type>; + +public: + using key_type = typename ht::key_type; + using mapped_type = T; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using key_compare = Compare; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + + /* + * Constructors + */ + bhopscotch_map() : bhopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) { + } + + explicit bhopscotch_map(size_type bucket_count, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator(), + const Compare& comp = Compare()) : + m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, comp) + { + } + + bhopscotch_map(size_type bucket_count, + const Allocator& alloc) : bhopscotch_map(bucket_count, Hash(), KeyEqual(), alloc) + { + } + + bhopscotch_map(size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : bhopscotch_map(bucket_count, hash, KeyEqual(), alloc) + { + } + + explicit bhopscotch_map(const Allocator& alloc) : bhopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { + } + + template + bhopscotch_map(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : bhopscotch_map(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template + bhopscotch_map(InputIt first, InputIt last, + size_type bucket_count, + const Allocator& alloc) : bhopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) + { + } + + template + bhopscotch_map(InputIt first, InputIt last, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : bhopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc) + { + } + + bhopscotch_map(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : + bhopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) + { + } + + bhopscotch_map(std::initializer_list init, + size_type bucket_count, + const Allocator& alloc) : + bhopscotch_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) + { + } + + bhopscotch_map(std::initializer_list init, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : + bhopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) + { + } + + + bhopscotch_map& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + + + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + + template::value>::type* = nullptr> + std::pair insert(P&& value) { + return m_ht.insert(std::forward

(value)); + } + + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + + template::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) { + return m_ht.insert(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + + + + template + std::pair insert_or_assign(const key_type& k, M&& obj) { + return m_ht.insert_or_assign(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) { + return m_ht.insert_or_assign(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { + return m_ht.insert_or_assign(hint, k, std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { + return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); + } + + + + /** + * Due to the way elements are stored, emplace will need to move or copy the key-value once. + * The method is equivalent to insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + + + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. + * The method is equivalent to insert(hint, value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + + + + template + std::pair try_emplace(const key_type& k, Args&&... args) { + return m_ht.try_emplace(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) { + return m_ht.try_emplace(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { + return m_ht.try_emplace(hint, k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { + return m_ht.try_emplace(hint, std::move(k), std::forward(args)...); + } + + + + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + size_type erase(const K& key) { return m_ht.erase(key); } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); } + + + + + void swap(bhopscotch_map& other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + T& at(const Key& key) { return m_ht.at(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } + + const T& at(const Key& key) const { return m_ht.at(key); } + + /** + * @copydoc at(const Key& key, std::size_t precalculated_hash) + */ + const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + T& at(const K& key) { return m_ht.at(key); } + + /** + * @copydoc at(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } + + /** + * @copydoc at(const K& key) + */ + template::value && has_is_transparent::value>::type* = nullptr> + const T& at(const K& key) const { return m_ht.at(key); } + + /** + * @copydoc at(const K& key, std::size_t precalculated_hash) + */ + template::value && has_is_transparent::value>::type* = nullptr> + const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } + + + + + T& operator[](const Key& key) { return m_ht[key]; } + T& operator[](Key&& key) { return m_ht[std::move(key)]; } + + + + + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + size_type count(const K& key) const { return m_ht.count(key); } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + + + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + iterator find(const K& key) { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + /** + * @copydoc find(const K& key) + */ + template::value && has_is_transparent::value>::type* = nullptr> + const_iterator find(const K& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const K& key, std::size_t precalculated_hash) + */ + template::value && has_is_transparent::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } + + + + + std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key) { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template::value && has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template::value && has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + + + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + key_compare key_comp() const { return m_ht.key_comp(); } + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const bhopscotch_map& lhs, const bhopscotch_map& rhs) { + if(lhs.size() != rhs.size()) { + return false; + } + + for(const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs.first); + if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) { + return false; + } + } + + return true; + } + + friend bool operator!=(const bhopscotch_map& lhs, const bhopscotch_map& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(bhopscotch_map& lhs, bhopscotch_map& rhs) { + lhs.swap(rhs); + } + + + +private: + ht m_ht; +}; + + +/** + * Same as `tsl::bhopscotch_map`. + */ +template, + class KeyEqual = std::equal_to, + class Compare = std::less, + class Allocator = std::allocator>, + unsigned int NeighborhoodSize = 62, + bool StoreHash = false> +using bhopscotch_pg_map = bhopscotch_map; + +} // end namespace tsl + +#endif diff --git a/include/tsl/bhopscotch_set.h b/include/tsl/bhopscotch_set.h new file mode 100644 index 0000000..86d563c --- /dev/null +++ b/include/tsl/bhopscotch_set.h @@ -0,0 +1,529 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_BHOPSCOTCH_SET_H +#define TSL_BHOPSCOTCH_SET_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hopscotch_hash.h" + + +namespace tsl { + + +/** + * Similar to tsl::hopscotch_set but instead of using a list for overflowing elements it uses + * a binary search tree. It thus needs an additional template parameter Compare. Compare should + * be arithmetically coherent with KeyEqual. + * + * The binary search tree allows the set to have a worst-case scenario of O(log n) for search + * and delete, even if the hash function maps all the elements to the same bucket. + * For insert, the amortized worst case is O(log n), but the worst case is O(n) in case of rehash. + * + * This makes the set resistant to DoS attacks (but doesn't preclude you to have a good hash function, + * as an element in the bucket array is faster to retrieve than in the tree). + * + * @copydoc hopscotch_set + */ +template, + class KeyEqual = std::equal_to, + class Compare = std::less, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, + bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class bhopscotch_set { +private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const Key& key) const { + return key; + } + + key_type& operator()(Key& key) { + return key; + } + }; + + + using overflow_container_type = std::set; + using ht = tsl::detail_hopscotch_hash::hopscotch_hash; + +public: + using key_type = typename ht::key_type; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using key_compare = Compare; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + + /* + * Constructors + */ + bhopscotch_set() : bhopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) { + } + + explicit bhopscotch_set(size_type bucket_count, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator(), + const Compare& comp = Compare()) : + m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR, comp) + { + } + + bhopscotch_set(size_type bucket_count, + const Allocator& alloc) : bhopscotch_set(bucket_count, Hash(), KeyEqual(), alloc) + { + } + + bhopscotch_set(size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : bhopscotch_set(bucket_count, hash, KeyEqual(), alloc) + { + } + + explicit bhopscotch_set(const Allocator& alloc) : bhopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { + } + + template + bhopscotch_set(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : bhopscotch_set(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template + bhopscotch_set(InputIt first, InputIt last, + size_type bucket_count, + const Allocator& alloc) : bhopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) + { + } + + template + bhopscotch_set(InputIt first, InputIt last, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : bhopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc) + { + } + + bhopscotch_set(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : + bhopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) + { + } + + bhopscotch_set(std::initializer_list init, + size_type bucket_count, + const Allocator& alloc) : + bhopscotch_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) + { + } + + bhopscotch_set(std::initializer_list init, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : + bhopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) + { + } + + + bhopscotch_set& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + + + + std::pair insert(const value_type& value) { return m_ht.insert(value); } + std::pair insert(value_type&& value) { return m_ht.insert(std::move(value)); } + + iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); } + iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); } + + template + void insert(InputIt first, InputIt last) { m_ht.insert(first, last); } + void insert(std::initializer_list ilist) { m_ht.insert(ilist.begin(), ilist.end()); } + + + + + /** + * Due to the way elements are stored, emplace will need to move or copy the key-value once. + * The method is equivalent to insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { return m_ht.emplace(std::forward(args)...); } + + + + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. + * The method is equivalent to insert(hint, value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + + + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + size_type erase(const K& key) { return m_ht.erase(key); } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { return m_ht.erase(key, precalculated_hash); } + + + + + void swap(bhopscotch_set& other) { other.m_ht.swap(m_ht); } + + + /* + * Lookup + */ + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + size_type count(const K& key) const { return m_ht.count(key); } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + + + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + iterator find(const K& key) { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + /** + * @copydoc find(const K& key) + */ + template::value && has_is_transparent::value>::type* = nullptr> + const_iterator find(const K& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } + + + + + std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent + * and Compare::is_transparent exist. + * If so, K must be hashable and comparable to Key. + */ + template::value && has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key) { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value && has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template::value && has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template::value && has_is_transparent::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + + + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + key_compare key_comp() const { return m_ht.key_comp(); } + + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const bhopscotch_set& lhs, const bhopscotch_set& rhs) { + if(lhs.size() != rhs.size()) { + return false; + } + + for(const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs); + if(it_element_rhs == rhs.cend()) { + return false; + } + } + + return true; + } + + friend bool operator!=(const bhopscotch_set& lhs, const bhopscotch_set& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(bhopscotch_set& lhs, bhopscotch_set& rhs) { + lhs.swap(rhs); + } + +private: + ht m_ht; +}; + + +/** + * Same as `tsl::bhopscotch_set`. + */ +template, + class KeyEqual = std::equal_to, + class Compare = std::less, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, + bool StoreHash = false> +using bhopscotch_pg_set = bhopscotch_set; + +} // end namespace tsl + +#endif diff --git a/include/tsl/hopscotch_growth_policy.h b/include/tsl/hopscotch_growth_policy.h new file mode 100644 index 0000000..65b1845 --- /dev/null +++ b/include/tsl/hopscotch_growth_policy.h @@ -0,0 +1,295 @@ +/** + * MIT License + * + * Copyright (c) 2018 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H +#define TSL_HOPSCOTCH_GROWTH_POLICY_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace tsl { +namespace hh { + +/** + * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows + * the table to use a mask operation instead of a modulo operation to map a hash to a bucket. + * + * GrowthFactor must be a power of two >= 2. + */ +template +class power_of_two_growth_policy { +public: + /** + * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter. + * This number is a minimum, the policy may update this value with a higher value if needed (but not lower). + * + * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and + * bucket_for_hash must always return 0 in this case. + */ + explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { + if(min_bucket_count_in_out > max_bucket_count()) { + throw std::length_error("The hash table exceeds its maxmimum size."); + } + + if(min_bucket_count_in_out > 0) { + min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out); + m_mask = min_bucket_count_in_out - 1; + } + else { + m_mask = 0; + } + } + + /** + * Return the bucket [0, bucket_count()) to which the hash belongs. + * If bucket_count() is 0, it must always return 0. + */ + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash & m_mask; + } + + /** + * Return the bucket count to use when the bucket array grows on rehash. + */ + std::size_t next_bucket_count() const { + if((m_mask + 1) > max_bucket_count() / GrowthFactor) { + throw std::length_error("The hash table exceeds its maxmimum size."); + } + + return (m_mask + 1) * GrowthFactor; + } + + /** + * Return the maximum number of buckets supported by the policy. + */ + std::size_t max_bucket_count() const { + // Largest power of two. + return (std::numeric_limits::max() / 2) + 1; + } + + /** + * Reset the growth policy as if it was created with a bucket count of 0. + * After a clear, the policy must always return 0 when bucket_for_hash is called. + */ + void clear() noexcept { + m_mask = 0; + } + +private: + static std::size_t round_up_to_power_of_two(std::size_t value) { + if(is_power_of_two(value)) { + return value; + } + + if(value == 0) { + return 1; + } + + --value; + for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { + value |= value >> i; + } + + return value + 1; + } + + static constexpr bool is_power_of_two(std::size_t value) { + return value != 0 && (value & (value - 1)) == 0; + } + +private: + static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2."); + + std::size_t m_mask; +}; + + +/** + * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash + * to a bucket. Slower but it can be useful if you want a slower growth. + */ +template> +class mod_growth_policy { +public: + explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) { + if(min_bucket_count_in_out > max_bucket_count()) { + throw std::length_error("The hash table exceeds its maxmimum size."); + } + + if(min_bucket_count_in_out > 0) { + m_mod = min_bucket_count_in_out; + } + else { + m_mod = 1; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return hash % m_mod; + } + + std::size_t next_bucket_count() const { + if(m_mod == max_bucket_count()) { + throw std::length_error("The hash table exceeds its maxmimum size."); + } + + const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR); + if(!std::isnormal(next_bucket_count)) { + throw std::length_error("The hash table exceeds its maxmimum size."); + } + + if(next_bucket_count > double(max_bucket_count())) { + return max_bucket_count(); + } + else { + return std::size_t(next_bucket_count); + } + } + + std::size_t max_bucket_count() const { + return MAX_BUCKET_COUNT; + } + + void clear() noexcept { + m_mod = 1; + } + +private: + static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den; + static const std::size_t MAX_BUCKET_COUNT = + std::size_t(double( + std::numeric_limits::max() / REHASH_SIZE_MULTIPLICATION_FACTOR + )); + + static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1."); + + std::size_t m_mod; +}; + + + +namespace detail { + +static constexpr const std::array PRIMES = {{ + 1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, + 1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, + 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, + 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul +}}; + +template +static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; } + +// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the +// compiler can optimize the modulo code better with a constant known at the compilation. +static constexpr const std::array MOD_PRIME = {{ + &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, + &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, + &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, + &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39> +}}; + +} + +/** + * Grow the hash table by using prime numbers as bucket count. Slower than tsl::hh::power_of_two_growth_policy in + * general but will probably distribute the values around better in the buckets with a poor hash function. + * + * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers. + * + * With a switch the code would look like: + * \code + * switch(iprime) { // iprime is the current prime of the hash table + * case 0: hash % 5ul; + * break; + * case 1: hash % 17ul; + * break; + * case 2: hash % 29ul; + * break; + * ... + * } + * \endcode + * + * Due to the constant variable in the modulo the compiler is able to optimize the operation + * by a series of multiplications, substractions and shifts. + * + * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement. + */ +class prime_growth_policy { +public: + explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) { + auto it_prime = std::lower_bound(detail::PRIMES.begin(), + detail::PRIMES.end(), min_bucket_count_in_out); + if(it_prime == detail::PRIMES.end()) { + throw std::length_error("The hash table exceeds its maxmimum size."); + } + + m_iprime = static_cast(std::distance(detail::PRIMES.begin(), it_prime)); + if(min_bucket_count_in_out > 0) { + min_bucket_count_in_out = *it_prime; + } + else { + min_bucket_count_in_out = 0; + } + } + + std::size_t bucket_for_hash(std::size_t hash) const noexcept { + return detail::MOD_PRIME[m_iprime](hash); + } + + std::size_t next_bucket_count() const { + if(m_iprime + 1 >= detail::PRIMES.size()) { + throw std::length_error("The hash table exceeds its maxmimum size."); + } + + return detail::PRIMES[m_iprime + 1]; + } + + std::size_t max_bucket_count() const { + return detail::PRIMES.back(); + } + + void clear() noexcept { + m_iprime = 0; + } + +private: + unsigned int m_iprime; + + static_assert(std::numeric_limits::max() >= detail::PRIMES.size(), + "The type of m_iprime is not big enough."); +}; + +} +} + +#endif diff --git a/include/tsl/hopscotch_hash.h b/include/tsl/hopscotch_hash.h new file mode 100644 index 0000000..1031c0c --- /dev/null +++ b/include/tsl/hopscotch_hash.h @@ -0,0 +1,1801 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_HASH_H +#define TSL_HOPSCOTCH_HASH_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hopscotch_growth_policy.h" + + + +#if (defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9)) +#define TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR +#endif + + + +/* + * Only activate tsl_assert if TSL_DEBUG is defined. + * This way we avoid the performance hit when NDEBUG is not defined with assert as tsl_assert is used a lot + * (people usually compile with "-O3" and not "-O3 -DNDEBUG"). + */ +#ifndef tsl_assert + #ifdef TSL_DEBUG + #define tsl_assert(expr) assert(expr) + #else + #define tsl_assert(expr) (static_cast(0)) + #endif +#endif + +namespace tsl { + +namespace detail_hopscotch_hash { + + +template +struct make_void { + using type = void; +}; + + +template +struct has_is_transparent : std::false_type { +}; + +template +struct has_is_transparent::type> : std::true_type { +}; + + +template +struct has_key_compare : std::false_type { +}; + +template +struct has_key_compare::type> : std::true_type { +}; + + +template +struct is_power_of_two_policy: std::false_type { +}; + +template +struct is_power_of_two_policy>: std::true_type { +}; + + + + + +/* + * smallest_type_for_min_bits::type returns the smallest type that can fit MinBits. + */ +static const std::size_t SMALLEST_TYPE_MAX_BITS_SUPPORTED = 64; +template +class smallest_type_for_min_bits { +}; + +template +class smallest_type_for_min_bits 0) && (MinBits <= 8)>::type> { +public: + using type = std::uint_least8_t; +}; + +template +class smallest_type_for_min_bits 8) && (MinBits <= 16)>::type> { +public: + using type = std::uint_least16_t; +}; + +template +class smallest_type_for_min_bits 16) && (MinBits <= 32)>::type> { +public: + using type = std::uint_least32_t; +}; + +template +class smallest_type_for_min_bits 32) && (MinBits <= 64)>::type> { +public: + using type = std::uint_least64_t; +}; + + + +/* + * Each bucket may store up to three elements: + * - An aligned storage to store a value_type object with placement-new. + * - An (optional) hash of the value in the bucket. + * - An unsigned integer of type neighborhood_bitmap used to tell us which buckets in the neighborhood of the + * current bucket contain a value with a hash belonging to the current bucket. + * + * For a bucket 'bct', a bit 'i' (counting from 0 and from the least significant bit to the most significant) + * set to 1 means that the bucket 'bct + i' contains a value with a hash belonging to bucket 'bct'. + * The bits used for that, start from the third least significant bit. + * The two least significant bits are reserved: + * - The least significant bit is set to 1 if there is a value in the bucket storage. + * - The second least significant bit is set to 1 if there is an overflow. More than NeighborhoodSize values + * give the same hash, all overflow values are stored in the m_overflow_elements list of the map. + * + * Details regarding hopscotch hashing an its implementation can be found here: + * https://tessil.github.io/2016/08/29/hopscotch-hashing.html + */ +static const std::size_t NB_RESERVED_BITS_IN_NEIGHBORHOOD = 2; + + +using truncated_hash_type = std::uint_least32_t; + +/** + * Helper class that store a truncated hash if StoreHash is true and nothing otherwise. + */ +template +class hopscotch_bucket_hash { +public: + bool bucket_hash_equal(std::size_t /*hash*/) const noexcept { + return true; + } + + truncated_hash_type truncated_bucket_hash() const noexcept { + return 0; + } + +protected: + void copy_hash(const hopscotch_bucket_hash& ) noexcept { + } + + void set_hash(truncated_hash_type /*hash*/) noexcept { + } +}; + +template<> +class hopscotch_bucket_hash { +public: + bool bucket_hash_equal(std::size_t hash) const noexcept { + return m_hash == truncated_hash_type(hash); + } + + truncated_hash_type truncated_bucket_hash() const noexcept { + return m_hash; + } + +protected: + void copy_hash(const hopscotch_bucket_hash& bucket) noexcept { + m_hash = bucket.m_hash; + } + + void set_hash(truncated_hash_type hash) noexcept { + m_hash = hash; + } + +private: + truncated_hash_type m_hash; +}; + + +template +class hopscotch_bucket: public hopscotch_bucket_hash { +private: + static const std::size_t MIN_NEIGHBORHOOD_SIZE = 4; + static const std::size_t MAX_NEIGHBORHOOD_SIZE = SMALLEST_TYPE_MAX_BITS_SUPPORTED - NB_RESERVED_BITS_IN_NEIGHBORHOOD; + + + static_assert(NeighborhoodSize >= 4, "NeighborhoodSize should be >= 4."); + // We can't put a variable in the message, ensure coherence + static_assert(MIN_NEIGHBORHOOD_SIZE == 4, ""); + + static_assert(NeighborhoodSize <= 62, "NeighborhoodSize should be <= 62."); + // We can't put a variable in the message, ensure coherence + static_assert(MAX_NEIGHBORHOOD_SIZE == 62, ""); + + + static_assert(!StoreHash || NeighborhoodSize <= 30, + "NeighborhoodSize should be <= 30 if StoreHash is true."); + // We can't put a variable in the message, ensure coherence + static_assert(MAX_NEIGHBORHOOD_SIZE - 32 == 30, ""); + + using bucket_hash = hopscotch_bucket_hash; + +public: + using value_type = ValueType; + using neighborhood_bitmap = + typename smallest_type_for_min_bits::type; + + + hopscotch_bucket() noexcept: bucket_hash(), m_neighborhood_infos(0) { + tsl_assert(empty()); + } + + + hopscotch_bucket(const hopscotch_bucket& bucket) + noexcept(std::is_nothrow_copy_constructible::value): bucket_hash(bucket), + m_neighborhood_infos(0) + { + if(!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) value_type(bucket.value()); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + hopscotch_bucket(hopscotch_bucket&& bucket) + noexcept(std::is_nothrow_move_constructible::value) : bucket_hash(std::move(bucket)), + m_neighborhood_infos(0) + { + if(!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) value_type(std::move(bucket.value())); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + hopscotch_bucket& operator=(const hopscotch_bucket& bucket) + noexcept(std::is_nothrow_copy_constructible::value) + { + if(this != &bucket) { + remove_value(); + + bucket_hash::operator=(bucket); + if(!bucket.empty()) { + ::new (static_cast(std::addressof(m_value))) value_type(bucket.value()); + } + + m_neighborhood_infos = bucket.m_neighborhood_infos; + } + + return *this; + } + + hopscotch_bucket& operator=(hopscotch_bucket&& ) = delete; + + ~hopscotch_bucket() noexcept { + if(!empty()) { + destroy_value(); + } + } + + neighborhood_bitmap neighborhood_infos() const noexcept { + return neighborhood_bitmap(m_neighborhood_infos >> NB_RESERVED_BITS_IN_NEIGHBORHOOD); + } + + void set_overflow(bool has_overflow) noexcept { + if(has_overflow) { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 2); + } + else { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~2); + } + } + + bool has_overflow() const noexcept { + return (m_neighborhood_infos & 2) != 0; + } + + bool empty() const noexcept { + return (m_neighborhood_infos & 1) == 0; + } + + void toggle_neighbor_presence(std::size_t ineighbor) noexcept { + tsl_assert(ineighbor <= NeighborhoodSize); + m_neighborhood_infos = neighborhood_bitmap( + m_neighborhood_infos ^ (1ull << (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD))); + } + + bool check_neighbor_presence(std::size_t ineighbor) const noexcept { + tsl_assert(ineighbor <= NeighborhoodSize); + if(((m_neighborhood_infos >> (ineighbor + NB_RESERVED_BITS_IN_NEIGHBORHOOD)) & 1) == 1) { + return true; + } + + return false; + } + + value_type& value() noexcept { + tsl_assert(!empty()); + return *reinterpret_cast(std::addressof(m_value)); + } + + const value_type& value() const noexcept { + tsl_assert(!empty()); + return *reinterpret_cast(std::addressof(m_value)); + } + + template + void set_value_of_empty_bucket(truncated_hash_type hash, Args&&... value_type_args) { + tsl_assert(empty()); + + ::new (static_cast(std::addressof(m_value))) value_type(std::forward(value_type_args)...); + set_empty(false); + this->set_hash(hash); + } + + void swap_value_into_empty_bucket(hopscotch_bucket& empty_bucket) { + tsl_assert(empty_bucket.empty()); + if(!empty()) { + ::new (static_cast(std::addressof(empty_bucket.m_value))) value_type(std::move(value())); + empty_bucket.copy_hash(*this); + empty_bucket.set_empty(false); + + destroy_value(); + set_empty(true); + } + } + + void remove_value() noexcept { + if(!empty()) { + destroy_value(); + set_empty(true); + } + } + + void clear() noexcept { + if(!empty()) { + destroy_value(); + } + + m_neighborhood_infos = 0; + tsl_assert(empty()); + } + + static std::size_t max_size() noexcept { + if(StoreHash) { + return std::numeric_limits::max(); + } + else { + return std::numeric_limits::max(); + } + } + + static truncated_hash_type truncate_hash(std::size_t hash) noexcept { + return truncated_hash_type(hash); + } + +private: + void set_empty(bool is_empty) noexcept { + if(is_empty) { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos & ~1); + } + else { + m_neighborhood_infos = neighborhood_bitmap(m_neighborhood_infos | 1); + } + } + + void destroy_value() noexcept { + tsl_assert(!empty()); + value().~value_type(); + } + +private: + using storage = typename std::aligned_storage::type; + + neighborhood_bitmap m_neighborhood_infos; + storage m_value; +}; + + +/** + * Internal common class used by (b)hopscotch_map and (b)hopscotch_set. + * + * ValueType is what will be stored by hopscotch_hash (usually std::pair for a map and Key for a set). + * + * KeySelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the key. + * + * ValueSelect should be a FunctionObject which takes a ValueType in parameter and returns a reference to the value. + * ValueSelect should be void if there is no value (in a set for example). + * + * OverflowContainer will be used as containers for overflown elements. Usually it should be a list + * or a set/map. + */ +template +class hopscotch_hash: private Hash, private KeyEqual, private GrowthPolicy { +private: + template + using has_mapped_type = typename std::integral_constant::value>; + + static_assert(noexcept(std::declval().bucket_for_hash(std::size_t(0))), "GrowthPolicy::bucket_for_hash must be noexcept."); + static_assert(noexcept(std::declval().clear()), "GrowthPolicy::clear must be noexcept."); + +public: + template + class hopscotch_iterator; + + using key_type = typename KeySelect::key_type; + using value_type = ValueType; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using hasher = Hash; + using key_equal = KeyEqual; + using allocator_type = Allocator; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; + using const_pointer = const value_type*; + using iterator = hopscotch_iterator; + using const_iterator = hopscotch_iterator; + +private: + using hopscotch_bucket = tsl::detail_hopscotch_hash::hopscotch_bucket; + using neighborhood_bitmap = typename hopscotch_bucket::neighborhood_bitmap; + + using buckets_allocator = typename std::allocator_traits::template rebind_alloc; + using buckets_container_type = std::vector; + + using overflow_container_type = OverflowContainer; + + static_assert(std::is_same::value, + "OverflowContainer should have ValueType as type."); + + static_assert(std::is_same::value, + "Invalid allocator, not the same type as the value_type."); + + + using iterator_buckets = typename buckets_container_type::iterator; + using const_iterator_buckets = typename buckets_container_type::const_iterator; + + using iterator_overflow = typename overflow_container_type::iterator; + using const_iterator_overflow = typename overflow_container_type::const_iterator; + +public: + /** + * The `operator*()` and `operator->()` methods return a const reference and const pointer respectively to the + * stored value type. + * + * In case of a map, to get a modifiable reference to the value associated to a key (the `.second` in the + * stored pair), you have to call `value()`. + */ + template + class hopscotch_iterator { + friend class hopscotch_hash; + private: + using iterator_bucket = typename std::conditional::type; + using iterator_overflow = typename std::conditional::type; + + + hopscotch_iterator(iterator_bucket buckets_iterator, iterator_bucket buckets_end_iterator, + iterator_overflow overflow_iterator) noexcept : + m_buckets_iterator(buckets_iterator), m_buckets_end_iterator(buckets_end_iterator), + m_overflow_iterator(overflow_iterator) + { + } + + public: + using iterator_category = std::forward_iterator_tag; + using value_type = const typename hopscotch_hash::value_type; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using pointer = value_type*; + + + hopscotch_iterator() noexcept { + } + + hopscotch_iterator(const hopscotch_iterator& other) noexcept : + m_buckets_iterator(other.m_buckets_iterator), m_buckets_end_iterator(other.m_buckets_end_iterator), + m_overflow_iterator(other.m_overflow_iterator) + { + } + + const typename hopscotch_hash::key_type& key() const { + if(m_buckets_iterator != m_buckets_end_iterator) { + return KeySelect()(m_buckets_iterator->value()); + } + + return KeySelect()(*m_overflow_iterator); + } + + template::value>::type* = nullptr> + typename std::conditional< + IsConst, + const typename U::value_type&, + typename U::value_type&>::type value() const + { + if(m_buckets_iterator != m_buckets_end_iterator) { + return U()(m_buckets_iterator->value()); + } + + return U()(*m_overflow_iterator); + } + + reference operator*() const { + if(m_buckets_iterator != m_buckets_end_iterator) { + return m_buckets_iterator->value(); + } + + return *m_overflow_iterator; + } + + pointer operator->() const { + if(m_buckets_iterator != m_buckets_end_iterator) { + return std::addressof(m_buckets_iterator->value()); + } + + return std::addressof(*m_overflow_iterator); + } + + hopscotch_iterator& operator++() { + if(m_buckets_iterator == m_buckets_end_iterator) { + ++m_overflow_iterator; + return *this; + } + + do { + ++m_buckets_iterator; + } while(m_buckets_iterator != m_buckets_end_iterator && m_buckets_iterator->empty()); + + return *this; + } + + hopscotch_iterator operator++(int) { + hopscotch_iterator tmp(*this); + ++*this; + + return tmp; + } + + friend bool operator==(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { + return lhs.m_buckets_iterator == rhs.m_buckets_iterator && + lhs.m_overflow_iterator == rhs.m_overflow_iterator; + } + + friend bool operator!=(const hopscotch_iterator& lhs, const hopscotch_iterator& rhs) { + return !(lhs == rhs); + } + + private: + iterator_bucket m_buckets_iterator; + iterator_bucket m_buckets_end_iterator; + iterator_overflow m_overflow_iterator; + }; + +public: + template::value>::type* = nullptr> + hopscotch_hash(size_type bucket_count, + const Hash& hash, + const KeyEqual& equal, + const Allocator& alloc, + float max_load_factor) : Hash(hash), + KeyEqual(equal), + GrowthPolicy(bucket_count), + m_buckets(alloc), + m_overflow_elements(alloc), + m_first_or_empty_bucket(static_empty_bucket_ptr()), + m_nb_elements(0) + { + if(bucket_count > max_bucket_count()) { + throw std::length_error("The map exceeds its maxmimum size."); + } + + if(bucket_count > 0) { + static_assert(NeighborhoodSize - 1 > 0, ""); + + // Can't directly construct with the appropriate size in the initializer + // as m_buckets(bucket_count, alloc) is not supported by GCC 4.8 + m_buckets.resize(bucket_count + NeighborhoodSize - 1); + m_first_or_empty_bucket = m_buckets.data(); + } + + + this->max_load_factor(max_load_factor); + + + // Check in the constructor instead of outside of a function to avoi compilation issues + // when value_type is not complete. + static_assert(std::is_nothrow_move_constructible::value || + std::is_copy_constructible::value, + "value_type must be either copy constructible or nothrow move constructible."); + } + + template::value>::type* = nullptr> + hopscotch_hash(size_type bucket_count, + const Hash& hash, + const KeyEqual& equal, + const Allocator& alloc, + float max_load_factor, + const typename OC::key_compare& comp) : Hash(hash), + KeyEqual(equal), + GrowthPolicy(bucket_count), + m_buckets(alloc), + m_overflow_elements(comp, alloc), + m_first_or_empty_bucket(static_empty_bucket_ptr()), + m_nb_elements(0) + { + + if(bucket_count > max_bucket_count()) { + throw std::length_error("The map exceeds its maxmimum size."); + } + + if(bucket_count > 0) { + static_assert(NeighborhoodSize - 1 > 0, ""); + + // Can't directly construct with the appropriate size in the initializer + // as m_buckets(bucket_count, alloc) is not supported by GCC 4.8 + m_buckets.resize(bucket_count + NeighborhoodSize - 1); + m_first_or_empty_bucket = m_buckets.data(); + } + + + this->max_load_factor(max_load_factor); + + + // Check in the constructor instead of outside of a function to avoi compilation issues + // when value_type is not complete. + static_assert(std::is_nothrow_move_constructible::value || + std::is_copy_constructible::value, + "value_type must be either copy constructible or nothrow move constructible."); + } + + hopscotch_hash(const hopscotch_hash& other): + Hash(other), + KeyEqual(other), + GrowthPolicy(other), + m_buckets(other.m_buckets), + m_overflow_elements(other.m_overflow_elements), + m_first_or_empty_bucket(m_buckets.empty()?static_empty_bucket_ptr(): + m_buckets.data()), + m_nb_elements(other.m_nb_elements), + m_max_load_factor(other.m_max_load_factor), + m_max_load_threshold_rehash(other.m_max_load_threshold_rehash), + m_min_load_threshold_rehash(other.m_min_load_threshold_rehash) + { + } + + hopscotch_hash(hopscotch_hash&& other) + noexcept( + std::is_nothrow_move_constructible::value && + std::is_nothrow_move_constructible::value && + std::is_nothrow_move_constructible::value && + std::is_nothrow_move_constructible::value && + std::is_nothrow_move_constructible::value + ): + Hash(std::move(static_cast(other))), + KeyEqual(std::move(static_cast(other))), + GrowthPolicy(std::move(static_cast(other))), + m_buckets(std::move(other.m_buckets)), + m_overflow_elements(std::move(other.m_overflow_elements)), + m_first_or_empty_bucket(m_buckets.empty()?static_empty_bucket_ptr(): + m_buckets.data()), + m_nb_elements(other.m_nb_elements), + m_max_load_factor(other.m_max_load_factor), + m_max_load_threshold_rehash(other.m_max_load_threshold_rehash), + m_min_load_threshold_rehash(other.m_min_load_threshold_rehash) + { + other.GrowthPolicy::clear(); + other.m_buckets.clear(); + other.m_overflow_elements.clear(); + other.m_first_or_empty_bucket = static_empty_bucket_ptr(); + other.m_nb_elements = 0; + other.m_max_load_threshold_rehash = 0; + other.m_min_load_threshold_rehash = 0; + } + + hopscotch_hash& operator=(const hopscotch_hash& other) { + if(&other != this) { + Hash::operator=(other); + KeyEqual::operator=(other); + GrowthPolicy::operator=(other); + + m_buckets = other.m_buckets; + m_overflow_elements = other.m_overflow_elements; + m_first_or_empty_bucket = m_buckets.empty()?static_empty_bucket_ptr(): + m_buckets.data(); + m_nb_elements = other.m_nb_elements; + m_max_load_factor = other.m_max_load_factor; + m_max_load_threshold_rehash = other.m_max_load_threshold_rehash; + m_min_load_threshold_rehash = other.m_min_load_threshold_rehash; + } + + return *this; + } + + hopscotch_hash& operator=(hopscotch_hash&& other) { + other.swap(*this); + other.clear(); + + return *this; + } + + allocator_type get_allocator() const { + return m_buckets.get_allocator(); + } + + + /* + * Iterators + */ + iterator begin() noexcept { + auto begin = m_buckets.begin(); + while(begin != m_buckets.end() && begin->empty()) { + ++begin; + } + + return iterator(begin, m_buckets.end(), m_overflow_elements.begin()); + } + + const_iterator begin() const noexcept { + return cbegin(); + } + + const_iterator cbegin() const noexcept { + auto begin = m_buckets.cbegin(); + while(begin != m_buckets.cend() && begin->empty()) { + ++begin; + } + + return const_iterator(begin, m_buckets.cend(), m_overflow_elements.cbegin()); + } + + iterator end() noexcept { + return iterator(m_buckets.end(), m_buckets.end(), m_overflow_elements.end()); + } + + const_iterator end() const noexcept { + return cend(); + } + + const_iterator cend() const noexcept { + return const_iterator(m_buckets.cend(), m_buckets.cend(), m_overflow_elements.cend()); + } + + + /* + * Capacity + */ + bool empty() const noexcept { + return m_nb_elements == 0; + } + + size_type size() const noexcept { + return m_nb_elements; + } + + size_type max_size() const noexcept { + return hopscotch_bucket::max_size(); + } + + /* + * Modifiers + */ + void clear() noexcept { + for(auto& bucket: m_buckets) { + bucket.clear(); + } + + m_overflow_elements.clear(); + m_nb_elements = 0; + } + + + std::pair insert(const value_type& value) { + return insert_impl(value); + } + + template::value>::type* = nullptr> + std::pair insert(P&& value) { + return insert_impl(value_type(std::forward

(value))); + } + + std::pair insert(value_type&& value) { + return insert_impl(std::move(value)); + } + + + iterator insert(const_iterator hint, const value_type& value) { + if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { + return mutable_iterator(hint); + } + + return insert(value).first; + } + + template::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) { + return emplace_hint(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) { + if(hint != cend() && compare_keys(KeySelect()(*hint), KeySelect()(value))) { + return mutable_iterator(hint); + } + + return insert(std::move(value)).first; + } + + + template + void insert(InputIt first, InputIt last) { + if(std::is_base_of::iterator_category>::value) + { + const auto nb_elements_insert = std::distance(first, last); + const std::size_t nb_elements_in_buckets = m_nb_elements - m_overflow_elements.size(); + const std::size_t nb_free_buckets = m_max_load_threshold_rehash - nb_elements_in_buckets; + tsl_assert(m_nb_elements >= m_overflow_elements.size()); + tsl_assert(m_max_load_threshold_rehash >= nb_elements_in_buckets); + + if(nb_elements_insert > 0 && nb_free_buckets < std::size_t(nb_elements_insert)) { + reserve(nb_elements_in_buckets + std::size_t(nb_elements_insert)); + } + } + + for(; first != last; ++first) { + insert(*first); + } + } + + + template + std::pair insert_or_assign(const key_type& k, M&& obj) { + return insert_or_assign_impl(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) { + return insert_or_assign_impl(std::move(k), std::forward(obj)); + } + + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { + if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(k, std::forward(obj)).first; + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { + if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { + auto it = mutable_iterator(hint); + it.value() = std::forward(obj); + + return it; + } + + return insert_or_assign(std::move(k), std::forward(obj)).first; + } + + + template + std::pair emplace(Args&&... args) { + return insert(value_type(std::forward(args)...)); + } + + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return insert(hint, value_type(std::forward(args)...)); + } + + template + std::pair try_emplace(const key_type& k, Args&&... args) { + return try_emplace_impl(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) { + return try_emplace_impl(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { + if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { + return mutable_iterator(hint); + } + + return try_emplace(k, std::forward(args)...).first; + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { + if(hint != cend() && compare_keys(KeySelect()(*hint), k)) { + return mutable_iterator(hint); + } + + return try_emplace(std::move(k), std::forward(args)...).first; + } + + + /** + * Here to avoid `template size_type erase(const K& key)` being used when + * we use an iterator instead of a const_iterator. + */ + iterator erase(iterator pos) { + return erase(const_iterator(pos)); + } + + iterator erase(const_iterator pos) { + const std::size_t ibucket_for_hash = bucket_for_hash(hash_key(pos.key())); + + if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) { + auto it_bucket = m_buckets.begin() + std::distance(m_buckets.cbegin(), pos.m_buckets_iterator); + erase_from_bucket(*it_bucket, ibucket_for_hash); + + return ++iterator(it_bucket, m_buckets.end(), m_overflow_elements.begin()); + } + else { + auto it_next_overflow = erase_from_overflow(pos.m_overflow_iterator, ibucket_for_hash); + return iterator(m_buckets.end(), m_buckets.end(), it_next_overflow); + } + } + + iterator erase(const_iterator first, const_iterator last) { + if(first == last) { + return mutable_iterator(first); + } + + auto to_delete = erase(first); + while(to_delete != last) { + to_delete = erase(to_delete); + } + + return to_delete; + } + + template + size_type erase(const K& key) { + return erase(key, hash_key(key)); + } + + template + size_type erase(const K& key, std::size_t hash) { + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + hopscotch_bucket* bucket_found = find_in_buckets(key, hash, m_first_or_empty_bucket + ibucket_for_hash); + if(bucket_found != nullptr) { + erase_from_bucket(*bucket_found, ibucket_for_hash); + + return 1; + } + + if((m_first_or_empty_bucket + ibucket_for_hash)->has_overflow()) { + auto it_overflow = find_in_overflow(key); + if(it_overflow != m_overflow_elements.end()) { + erase_from_overflow(it_overflow, ibucket_for_hash); + + return 1; + } + } + + return 0; + } + + void swap(hopscotch_hash& other) { + using std::swap; + + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + swap(m_buckets, other.m_buckets); + swap(m_overflow_elements, other.m_overflow_elements); + swap(m_first_or_empty_bucket, other.m_first_or_empty_bucket); + swap(m_nb_elements, other.m_nb_elements); + swap(m_max_load_factor, other.m_max_load_factor); + swap(m_max_load_threshold_rehash, other.m_max_load_threshold_rehash); + swap(m_min_load_threshold_rehash, other.m_min_load_threshold_rehash); + } + + + /* + * Lookup + */ + template::value>::type* = nullptr> + typename U::value_type& at(const K& key) { + return at(key, hash_key(key)); + } + + template::value>::type* = nullptr> + typename U::value_type& at(const K& key, std::size_t hash) { + return const_cast(static_cast(this)->at(key, hash)); + } + + + template::value>::type* = nullptr> + const typename U::value_type& at(const K& key) const { + return at(key, hash_key(key)); + } + + template::value>::type* = nullptr> + const typename U::value_type& at(const K& key, std::size_t hash) const { + using T = typename U::value_type; + + const T* value = find_value_impl(key, hash, m_first_or_empty_bucket + bucket_for_hash(hash)); + if(value == nullptr) { + throw std::out_of_range("Couldn't find key."); + } + else { + return *value; + } + } + + + template::value>::type* = nullptr> + typename U::value_type& operator[](K&& key) { + using T = typename U::value_type; + + const std::size_t hash = hash_key(key); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + T* value = find_value_impl(key, hash, m_first_or_empty_bucket + ibucket_for_hash); + if(value != nullptr) { + return *value; + } + else { + return insert_impl(ibucket_for_hash, hash, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple()).first.value(); + } + } + + + template + size_type count(const K& key) const { + return count(key, hash_key(key)); + } + + template + size_type count(const K& key, std::size_t hash) const { + return count_impl(key, hash, m_first_or_empty_bucket + bucket_for_hash(hash)); + } + + + template + iterator find(const K& key) { + return find(key, hash_key(key)); + } + + template + iterator find(const K& key, std::size_t hash) { + return find_impl(key, hash, m_first_or_empty_bucket + bucket_for_hash(hash)); + } + + + template + const_iterator find(const K& key) const { + return find(key, hash_key(key)); + } + + template + const_iterator find(const K& key, std::size_t hash) const { + return find_impl(key, hash, m_first_or_empty_bucket + bucket_for_hash(hash)); + } + + + template + std::pair equal_range(const K& key) { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range(const K& key, std::size_t hash) { + iterator it = find(key, hash); + return std::make_pair(it, (it == end())?it:std::next(it)); + } + + + template + std::pair equal_range(const K& key) const { + return equal_range(key, hash_key(key)); + } + + template + std::pair equal_range(const K& key, std::size_t hash) const { + const_iterator it = find(key, hash); + return std::make_pair(it, (it == cend())?it:std::next(it)); + } + + /* + * Bucket interface + */ + size_type bucket_count() const { + /* + * So that the last bucket can have NeighborhoodSize neighbors, the size of the bucket array is a little + * bigger than the real number of buckets when not empty. + * We could use some of the buckets at the beginning, but it is faster this way as we avoid extra checks. + */ + if(m_buckets.empty()) { + return 0; + } + + return m_buckets.size() - NeighborhoodSize + 1; + } + + size_type max_bucket_count() const { + const std::size_t max_bucket_count = std::min(GrowthPolicy::max_bucket_count(), m_buckets.max_size()); + return max_bucket_count - NeighborhoodSize + 1; + } + + + /* + * Hash policy + */ + float load_factor() const { + if(bucket_count() == 0) { + return 0; + } + + return float(m_nb_elements)/float(bucket_count()); + } + + float max_load_factor() const { + return m_max_load_factor; + } + + void max_load_factor(float ml) { + m_max_load_factor = std::max(0.1f, std::min(ml, 0.95f)); + m_max_load_threshold_rehash = size_type(float(bucket_count())*m_max_load_factor); + m_min_load_threshold_rehash = size_type(float(bucket_count())*MIN_LOAD_FACTOR_FOR_REHASH); + } + + void rehash(size_type count_) { + count_ = std::max(count_, size_type(std::ceil(float(size())/max_load_factor()))); + rehash_impl(count_); + } + + void reserve(size_type count_) { + rehash(size_type(std::ceil(float(count_)/max_load_factor()))); + } + + + /* + * Observers + */ + hasher hash_function() const { + return static_cast(*this); + } + + key_equal key_eq() const { + return static_cast(*this); + } + + /* + * Other + */ + iterator mutable_iterator(const_iterator pos) { + if(pos.m_buckets_iterator != pos.m_buckets_end_iterator) { + // Get a non-const iterator + auto it = m_buckets.begin() + std::distance(m_buckets.cbegin(), pos.m_buckets_iterator); + return iterator(it, m_buckets.end(), m_overflow_elements.begin()); + } + else { + // Get a non-const iterator + auto it = mutable_overflow_iterator(pos.m_overflow_iterator); + return iterator(m_buckets.end(), m_buckets.end(), it); + } + } + + size_type overflow_size() const noexcept { + return m_overflow_elements.size(); + } + + template::value>::type* = nullptr> + typename U::key_compare key_comp() const { + return m_overflow_elements.key_comp(); + } + + +private: + template + std::size_t hash_key(const K& key) const { + return Hash::operator()(key); + } + + template + bool compare_keys(const K1& key1, const K2& key2) const { + return KeyEqual::operator()(key1, key2); + } + + std::size_t bucket_for_hash(std::size_t hash) const { + const std::size_t bucket = GrowthPolicy::bucket_for_hash(hash); + tsl_assert(bucket < m_buckets.size() || (bucket == 0 && m_buckets.empty())); + + return bucket; + } + + template::value>::type* = nullptr> + void rehash_impl(size_type count_) { + hopscotch_hash new_map = new_hopscotch_hash(count_); + + if(!m_overflow_elements.empty()) { + new_map.m_overflow_elements.swap(m_overflow_elements); + new_map.m_nb_elements += new_map.m_overflow_elements.size(); + + for(const value_type& value : new_map.m_overflow_elements) { + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(new_map.hash_key(KeySelect()(value))); + new_map.m_buckets[ibucket_for_hash].set_overflow(true); + } + } + + try { + const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for(auto it_bucket = m_buckets.begin(); it_bucket != m_buckets.end(); ++it_bucket) { + if(it_bucket->empty()) { + continue; + } + + const std::size_t hash = use_stored_hash? + it_bucket->truncated_bucket_hash(): + new_map.hash_key(KeySelect()(it_bucket->value())); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_impl(ibucket_for_hash, hash, std::move(it_bucket->value())); + + + erase_from_bucket(*it_bucket, bucket_for_hash(hash)); + } + } + /* + * The call to insert_impl may throw an exception if an element is added to the overflow + * list. Rollback the elements in this case. + */ + catch(...) { + m_overflow_elements.swap(new_map.m_overflow_elements); + + const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for(auto it_bucket = new_map.m_buckets.begin(); it_bucket != new_map.m_buckets.end(); ++it_bucket) { + if(it_bucket->empty()) { + continue; + } + + const std::size_t hash = use_stored_hash? + it_bucket->truncated_bucket_hash(): + hash_key(KeySelect()(it_bucket->value())); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // The elements we insert were not in the overflow list before the switch. + // They will not be go in the overflow list if we rollback the switch. + insert_impl(ibucket_for_hash, hash, std::move(it_bucket->value())); + } + + throw; + } + + new_map.swap(*this); + } + + template::value && + !std::is_nothrow_move_constructible::value>::type* = nullptr> + void rehash_impl(size_type count_) { + hopscotch_hash new_map = new_hopscotch_hash(count_); + + const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(new_map.bucket_count()); + for(const hopscotch_bucket& bucket: m_buckets) { + if(bucket.empty()) { + continue; + } + + const std::size_t hash = use_stored_hash? + bucket.truncated_bucket_hash(): + new_map.hash_key(KeySelect()(bucket.value())); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_impl(ibucket_for_hash, hash, bucket.value()); + } + + for(const value_type& value: m_overflow_elements) { + const std::size_t hash = new_map.hash_key(KeySelect()(value)); + const std::size_t ibucket_for_hash = new_map.bucket_for_hash(hash); + + new_map.insert_impl(ibucket_for_hash, hash, value); + } + + new_map.swap(*this); + } + +#ifdef TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR + iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) { + return std::next(m_overflow_elements.begin(), std::distance(m_overflow_elements.cbegin(), it)); + } +#else + iterator_overflow mutable_overflow_iterator(const_iterator_overflow it) { + return m_overflow_elements.erase(it, it); + } +#endif + + // iterator is in overflow list + iterator_overflow erase_from_overflow(const_iterator_overflow pos, std::size_t ibucket_for_hash) { +#ifdef TSL_NO_RANGE_ERASE_WITH_CONST_ITERATOR + auto it_next = m_overflow_elements.erase(mutable_overflow_iterator(pos)); +#else + auto it_next = m_overflow_elements.erase(pos); +#endif + m_nb_elements--; + + + // Check if we can remove the overflow flag + tsl_assert(m_buckets[ibucket_for_hash].has_overflow()); + for(const value_type& value: m_overflow_elements) { + const std::size_t bucket_for_value = bucket_for_hash(hash_key(KeySelect()(value))); + if(bucket_for_value == ibucket_for_hash) { + return it_next; + } + } + + m_buckets[ibucket_for_hash].set_overflow(false); + return it_next; + } + + + /** + * bucket_for_value is the bucket in which the value is. + * ibucket_for_hash is the bucket where the value belongs. + */ + void erase_from_bucket(hopscotch_bucket& bucket_for_value, std::size_t ibucket_for_hash) noexcept { + const std::size_t ibucket_for_value = std::distance(m_buckets.data(), &bucket_for_value); + tsl_assert(ibucket_for_value >= ibucket_for_hash); + + bucket_for_value.remove_value(); + m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_for_value - ibucket_for_hash); + m_nb_elements--; + } + + + + template + std::pair insert_or_assign_impl(K&& key, M&& obj) { + auto it = try_emplace_impl(std::forward(key), std::forward(obj)); + if(!it.second) { + it.first.value() = std::forward(obj); + } + + return it; + } + + template + std::pair try_emplace_impl(P&& key, Args&&... args_value) { + const std::size_t hash = hash_key(key); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // Check if already presents + auto it_find = find_impl(key, hash, m_first_or_empty_bucket + ibucket_for_hash); + if(it_find != end()) { + return std::make_pair(it_find, false); + } + + return insert_impl(ibucket_for_hash, hash, std::piecewise_construct, + std::forward_as_tuple(std::forward

(key)), + std::forward_as_tuple(std::forward(args_value)...)); + } + + template + std::pair insert_impl(P&& value) { + const std::size_t hash = hash_key(KeySelect()(value)); + const std::size_t ibucket_for_hash = bucket_for_hash(hash); + + // Check if already presents + auto it_find = find_impl(KeySelect()(value), hash, m_first_or_empty_bucket + ibucket_for_hash); + if(it_find != end()) { + return std::make_pair(it_find, false); + } + + + return insert_impl(ibucket_for_hash, hash, std::forward

(value)); + } + + template + std::pair insert_impl(std::size_t ibucket_for_hash, std::size_t hash, Args&&... value_type_args) { + if((m_nb_elements - m_overflow_elements.size()) >= m_max_load_threshold_rehash) { + rehash(GrowthPolicy::next_bucket_count()); + ibucket_for_hash = bucket_for_hash(hash); + } + + std::size_t ibucket_empty = find_empty_bucket(ibucket_for_hash); + if(ibucket_empty < m_buckets.size()) { + do { + tsl_assert(ibucket_empty >= ibucket_for_hash); + + // Empty bucket is in range of NeighborhoodSize, use it + if(ibucket_empty - ibucket_for_hash < NeighborhoodSize) { + auto it = insert_in_bucket(ibucket_empty, ibucket_for_hash, + hash, std::forward(value_type_args)...); + return std::make_pair(iterator(it, m_buckets.end(), m_overflow_elements.begin()), true); + } + } + // else, try to swap values to get a closer empty bucket + while(swap_empty_bucket_closer(ibucket_empty)); + } + + // Load factor is too low or a rehash will not change the neighborhood, put the value in overflow list + if(size() < m_min_load_threshold_rehash || !will_neighborhood_change_on_rehash(ibucket_for_hash)) { + auto it = insert_in_overflow(ibucket_for_hash, std::forward(value_type_args)...); + return std::make_pair(iterator(m_buckets.end(), m_buckets.end(), it), true); + } + + rehash(GrowthPolicy::next_bucket_count()); + ibucket_for_hash = bucket_for_hash(hash); + + return insert_impl(ibucket_for_hash, hash, std::forward(value_type_args)...); + } + + /* + * Return true if a rehash will change the position of a key-value in the neighborhood of + * ibucket_neighborhood_check. In this case a rehash is needed instead of puting the value in overflow list. + */ + bool will_neighborhood_change_on_rehash(size_t ibucket_neighborhood_check) const { + std::size_t expand_bucket_count = GrowthPolicy::next_bucket_count(); + GrowthPolicy expand_growth_policy(expand_bucket_count); + + const bool use_stored_hash = USE_STORED_HASH_ON_REHASH(expand_bucket_count); + for(size_t ibucket = ibucket_neighborhood_check; + ibucket < m_buckets.size() && (ibucket - ibucket_neighborhood_check) < NeighborhoodSize; + ++ibucket) + { + tsl_assert(!m_buckets[ibucket].empty()); + + const size_t hash = use_stored_hash? + m_buckets[ibucket].truncated_bucket_hash(): + hash_key(KeySelect()(m_buckets[ibucket].value())); + if(bucket_for_hash(hash) != expand_growth_policy.bucket_for_hash(hash)) { + return true; + } + } + + return false; + } + + /* + * Return the index of an empty bucket in m_buckets. + * If none, the returned index equals m_buckets.size() + */ + std::size_t find_empty_bucket(std::size_t ibucket_start) const { + const std::size_t limit = std::min(ibucket_start + MAX_PROBES_FOR_EMPTY_BUCKET, m_buckets.size()); + for(; ibucket_start < limit; ibucket_start++) { + if(m_buckets[ibucket_start].empty()) { + return ibucket_start; + } + } + + return m_buckets.size(); + } + + /* + * Insert value in ibucket_empty where value originally belongs to ibucket_for_hash + * + * Return bucket iterator to ibucket_empty + */ + template + iterator_buckets insert_in_bucket(std::size_t ibucket_empty, std::size_t ibucket_for_hash, + std::size_t hash, Args&&... value_type_args) + { + tsl_assert(ibucket_empty >= ibucket_for_hash ); + tsl_assert(m_buckets[ibucket_empty].empty()); + m_buckets[ibucket_empty].set_value_of_empty_bucket(hopscotch_bucket::truncate_hash(hash), std::forward(value_type_args)...); + + tsl_assert(!m_buckets[ibucket_for_hash].empty()); + m_buckets[ibucket_for_hash].toggle_neighbor_presence(ibucket_empty - ibucket_for_hash); + m_nb_elements++; + + return m_buckets.begin() + ibucket_empty; + } + + template::value>::type* = nullptr> + iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, Args&&... value_type_args) { + auto it = m_overflow_elements.emplace(m_overflow_elements.end(), std::forward(value_type_args)...); + + m_buckets[ibucket_for_hash].set_overflow(true); + m_nb_elements++; + + return it; + } + + template::value>::type* = nullptr> + iterator_overflow insert_in_overflow(std::size_t ibucket_for_hash, Args&&... value_type_args) { + auto it = m_overflow_elements.emplace(std::forward(value_type_args)...).first; + + m_buckets[ibucket_for_hash].set_overflow(true); + m_nb_elements++; + + return it; + } + + /* + * Try to swap the bucket ibucket_empty_in_out with a bucket preceding it while keeping the neighborhood + * conditions correct. + * + * If a swap was possible, the position of ibucket_empty_in_out will be closer to 0 and true will re returned. + */ + bool swap_empty_bucket_closer(std::size_t& ibucket_empty_in_out) { + tsl_assert(ibucket_empty_in_out >= NeighborhoodSize); + const std::size_t neighborhood_start = ibucket_empty_in_out - NeighborhoodSize + 1; + + for(std::size_t to_check = neighborhood_start; to_check < ibucket_empty_in_out; to_check++) { + neighborhood_bitmap neighborhood_infos = m_buckets[to_check].neighborhood_infos(); + std::size_t to_swap = to_check; + + while(neighborhood_infos != 0 && to_swap < ibucket_empty_in_out) { + if((neighborhood_infos & 1) == 1) { + tsl_assert(m_buckets[ibucket_empty_in_out].empty()); + tsl_assert(!m_buckets[to_swap].empty()); + + m_buckets[to_swap].swap_value_into_empty_bucket(m_buckets[ibucket_empty_in_out]); + + tsl_assert(!m_buckets[to_check].check_neighbor_presence(ibucket_empty_in_out - to_check)); + tsl_assert(m_buckets[to_check].check_neighbor_presence(to_swap - to_check)); + + m_buckets[to_check].toggle_neighbor_presence(ibucket_empty_in_out - to_check); + m_buckets[to_check].toggle_neighbor_presence(to_swap - to_check); + + + ibucket_empty_in_out = to_swap; + + return true; + } + + to_swap++; + neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1); + } + } + + return false; + } + + + + template::value>::type* = nullptr> + typename U::value_type* find_value_impl(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) { + return const_cast( + static_cast(this)->find_value_impl(key, hash, bucket_for_hash)); + } + + /* + * Avoid the creation of an iterator to just get the value for operator[] and at() in maps. Faster this way. + * + * Return null if no value for the key (TODO use std::optional when available). + */ + template::value>::type* = nullptr> + const typename U::value_type* find_value_impl(const K& key, std::size_t hash, + const hopscotch_bucket* bucket_for_hash) const + { + const hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash); + if(bucket_found != nullptr) { + return std::addressof(ValueSelect()(bucket_found->value())); + } + + if(bucket_for_hash->has_overflow()) { + auto it_overflow = find_in_overflow(key); + if(it_overflow != m_overflow_elements.end()) { + return std::addressof(ValueSelect()(*it_overflow)); + } + } + + return nullptr; + } + + template + size_type count_impl(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const { + if(find_in_buckets(key, hash, bucket_for_hash) != nullptr) { + return 1; + } + else if(bucket_for_hash->has_overflow() && find_in_overflow(key) != m_overflow_elements.cend()) { + return 1; + } + else { + return 0; + } + } + + template + iterator find_impl(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) { + hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash); + if(bucket_found != nullptr) { + return iterator(m_buckets.begin() + std::distance(m_buckets.data(), bucket_found), + m_buckets.end(), m_overflow_elements.begin()); + } + + if(!bucket_for_hash->has_overflow()) { + return end(); + } + + return iterator(m_buckets.end(), m_buckets.end(), find_in_overflow(key)); + } + + template + const_iterator find_impl(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const { + const hopscotch_bucket* bucket_found = find_in_buckets(key, hash, bucket_for_hash); + if(bucket_found != nullptr) { + return const_iterator(m_buckets.cbegin() + std::distance(m_buckets.data(), bucket_found), + m_buckets.cend(), m_overflow_elements.cbegin()); + } + + if(!bucket_for_hash->has_overflow()) { + return cend(); + } + + + return const_iterator(m_buckets.cend(), m_buckets.cend(), find_in_overflow(key)); + } + + template + hopscotch_bucket* find_in_buckets(const K& key, std::size_t hash, hopscotch_bucket* bucket_for_hash) { + const hopscotch_bucket* bucket_found = + static_cast(this)->find_in_buckets(key, hash, bucket_for_hash); + return const_cast(bucket_found); + } + + + /** + * Return a pointer to the bucket which has the value, nullptr otherwise. + */ + template + const hopscotch_bucket* find_in_buckets(const K& key, std::size_t hash, const hopscotch_bucket* bucket_for_hash) const { + (void) hash; // Avoid warning of unused variable when StoreHash is false; + + // TODO Try to optimize the function. + // I tried to use ffs and __builtin_ffs functions but I could not reduce the time the function + // takes with -march=native + + neighborhood_bitmap neighborhood_infos = bucket_for_hash->neighborhood_infos(); + while(neighborhood_infos != 0) { + if((neighborhood_infos & 1) == 1) { + // Check StoreHash before calling bucket_hash_equal. Functionally it doesn't change anythin. + // If StoreHash is false, bucket_hash_equal is a no-op. Avoiding the call is there to help + // GCC optimizes `hash` parameter away, it seems to not be able to do without this hint. + if((!StoreHash || bucket_for_hash->bucket_hash_equal(hash)) && + compare_keys(KeySelect()(bucket_for_hash->value()), key)) + { + return bucket_for_hash; + } + } + + ++bucket_for_hash; + neighborhood_infos = neighborhood_bitmap(neighborhood_infos >> 1); + } + + return nullptr; + } + + + + template::value>::type* = nullptr> + iterator_overflow find_in_overflow(const K& key) { + return std::find_if(m_overflow_elements.begin(), m_overflow_elements.end(), + [&](const value_type& value) { + return compare_keys(key, KeySelect()(value)); + }); + } + + template::value>::type* = nullptr> + const_iterator_overflow find_in_overflow(const K& key) const { + return std::find_if(m_overflow_elements.cbegin(), m_overflow_elements.cend(), + [&](const value_type& value) { + return compare_keys(key, KeySelect()(value)); + }); + } + + template::value>::type* = nullptr> + iterator_overflow find_in_overflow(const K& key) { + return m_overflow_elements.find(key); + } + + template::value>::type* = nullptr> + const_iterator_overflow find_in_overflow(const K& key) const { + return m_overflow_elements.find(key); + } + + + + template::value>::type* = nullptr> + hopscotch_hash new_hopscotch_hash(size_type bucket_count) { + return hopscotch_hash(bucket_count, static_cast(*this), static_cast(*this), + get_allocator(), m_max_load_factor); + } + + template::value>::type* = nullptr> + hopscotch_hash new_hopscotch_hash(size_type bucket_count) { + return hopscotch_hash(bucket_count, static_cast(*this), static_cast(*this), + get_allocator(), m_max_load_factor, m_overflow_elements.key_comp()); + } + +public: + static const size_type DEFAULT_INIT_BUCKETS_SIZE = 16; + static constexpr float DEFAULT_MAX_LOAD_FACTOR = (NeighborhoodSize <= 30)?0.8f:0.9f; + +private: + static const std::size_t MAX_PROBES_FOR_EMPTY_BUCKET = 12*NeighborhoodSize; + static constexpr float MIN_LOAD_FACTOR_FOR_REHASH = 0.1f; + + static bool USE_STORED_HASH_ON_REHASH(size_type bucket_count) { + (void) bucket_count; + if(StoreHash && is_power_of_two_policy::value) { + tsl_assert(bucket_count > 0); + return (bucket_count - 1) <= std::numeric_limits::max(); + } + else { + return false; + } + } + + /** + * Return an always valid pointer to an static empty hopscotch_bucket. + */ + hopscotch_bucket* static_empty_bucket_ptr() { + static hopscotch_bucket empty_bucket; + return &empty_bucket; + } + +private: + buckets_container_type m_buckets; + overflow_container_type m_overflow_elements; + + /** + * Points to m_buckets.data() if !m_buckets.empty() otherwise points to static_empty_bucket_ptr. + * This variable is useful to avoid the cost of checking if m_buckets is empty when trying + * to find an element. + */ + hopscotch_bucket* m_first_or_empty_bucket; + + size_type m_nb_elements; + + float m_max_load_factor; + + /** + * Max size of the hash table before a rehash occurs automatically to grow the table. + */ + size_type m_max_load_threshold_rehash; + + /** + * Min size of the hash table before a rehash can occurs automatically (except if m_max_load_threshold_rehash os reached). + * If the neighborhood of a bucket is full before the min is reacher, the elements are put into m_overflow_elements. + */ + size_type m_min_load_threshold_rehash; +}; + +} // end namespace detail_hopscotch_hash + + +} // end namespace tsl + +#endif diff --git a/include/tsl/hopscotch_map.h b/include/tsl/hopscotch_map.h new file mode 100644 index 0000000..acd7a79 --- /dev/null +++ b/include/tsl/hopscotch_map.h @@ -0,0 +1,679 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_MAP_H +#define TSL_HOPSCOTCH_MAP_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hopscotch_hash.h" + + +namespace tsl { + +/** + * Implementation of a hash map using the hopscotch hashing algorithm. + * + * The Key and the value T must be either nothrow move-constructible, copy-constuctible or both. + * + * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false. + * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting + * the NeighborhoodSize to <= 30. There is no memory usage difference between + * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'. + * + * Storing the hash may improve performance on insert during the rehash process if the hash takes time + * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss). + * If used with simple Hash and KeyEqual it may slow things down. + * + * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy. + * + * GrowthPolicy defines how the map grows and consequently how a hash value is mapped to a bucket. + * By default the map uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets + * to a power of two and uses a mask to map the hash to a bucket instead of the slow modulo. + * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface. + * + * If the destructors of Key or T throw an exception, behaviour of the class is undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators + * if a displacement is needed to resolve a collision (which mean that most of the time, + * insert will invalidate the iterators). Or if there is a rehash. + * - erase: iterator on the erased element is the only one which become invalid. + */ +template, + class KeyEqual = std::equal_to, + class Allocator = std::allocator>, + unsigned int NeighborhoodSize = 62, + bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class hopscotch_map { +private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const std::pair& key_value) const { + return key_value.first; + } + + key_type& operator()(std::pair& key_value) { + return key_value.first; + } + }; + + class ValueSelect { + public: + using value_type = T; + + const value_type& operator()(const std::pair& key_value) const { + return key_value.second; + } + + value_type& operator()(std::pair& key_value) { + return key_value.second; + } + }; + + + using overflow_container_type = std::list, Allocator>; + using ht = detail_hopscotch_hash::hopscotch_hash, KeySelect, ValueSelect, + Hash, KeyEqual, + Allocator, NeighborhoodSize, + StoreHash, GrowthPolicy, + overflow_container_type>; + +public: + using key_type = typename ht::key_type; + using mapped_type = T; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + + + /* + * Constructors + */ + hopscotch_map() : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE) { + } + + explicit hopscotch_map(size_type bucket_count, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : + m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) + { + } + + hopscotch_map(size_type bucket_count, + const Allocator& alloc) : hopscotch_map(bucket_count, Hash(), KeyEqual(), alloc) + { + } + + hopscotch_map(size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : hopscotch_map(bucket_count, hash, KeyEqual(), alloc) + { + } + + explicit hopscotch_map(const Allocator& alloc) : hopscotch_map(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { + } + + template + hopscotch_map(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : hopscotch_map(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template + hopscotch_map(InputIt first, InputIt last, + size_type bucket_count, + const Allocator& alloc) : hopscotch_map(first, last, bucket_count, Hash(), KeyEqual(), alloc) + { + } + + template + hopscotch_map(InputIt first, InputIt last, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : hopscotch_map(first, last, bucket_count, hash, KeyEqual(), alloc) + { + } + + hopscotch_map(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : + hopscotch_map(init.begin(), init.end(), bucket_count, hash, equal, alloc) + { + } + + hopscotch_map(std::initializer_list init, + size_type bucket_count, + const Allocator& alloc) : + hopscotch_map(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) + { + } + + hopscotch_map(std::initializer_list init, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : + hopscotch_map(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) + { + } + + + hopscotch_map& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + + + + std::pair insert(const value_type& value) { + return m_ht.insert(value); + } + + template::value>::type* = nullptr> + std::pair insert(P&& value) { + return m_ht.insert(std::forward

(value)); + } + + std::pair insert(value_type&& value) { + return m_ht.insert(std::move(value)); + } + + + iterator insert(const_iterator hint, const value_type& value) { + return m_ht.insert(hint, value); + } + + template::value>::type* = nullptr> + iterator insert(const_iterator hint, P&& value) { + return m_ht.insert(hint, std::forward

(value)); + } + + iterator insert(const_iterator hint, value_type&& value) { + return m_ht.insert(hint, std::move(value)); + } + + + template + void insert(InputIt first, InputIt last) { + m_ht.insert(first, last); + } + + void insert(std::initializer_list ilist) { + m_ht.insert(ilist.begin(), ilist.end()); + } + + + + + template + std::pair insert_or_assign(const key_type& k, M&& obj) { + return m_ht.insert_or_assign(k, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& k, M&& obj) { + return m_ht.insert_or_assign(std::move(k), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj) { + return m_ht.insert_or_assign(hint, k, std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj) { + return m_ht.insert_or_assign(hint, std::move(k), std::forward(obj)); + } + + + + + /** + * Due to the way elements are stored, emplace will need to move or copy the key-value once. + * The method is equivalent to insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { + return m_ht.emplace(std::forward(args)...); + } + + + + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. + * The method is equivalent to insert(hint, value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + + + + template + std::pair try_emplace(const key_type& k, Args&&... args) { + return m_ht.try_emplace(k, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& k, Args&&... args) { + return m_ht.try_emplace(std::move(k), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args) { + return m_ht.try_emplace(hint, k, std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args) { + return m_ht.try_emplace(hint, std::move(k), std::forward(args)...); + } + + + + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + size_type erase(const K& key) { return m_ht.erase(key); } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + template::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + + + + void swap(hopscotch_map& other) { other.m_ht.swap(m_ht); } + + /* + * Lookup + */ + T& at(const Key& key) { return m_ht.at(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + T& at(const Key& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } + + + const T& at(const Key& key) const { return m_ht.at(key); } + + /** + * @copydoc at(const Key& key, std::size_t precalculated_hash) + */ + const T& at(const Key& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } + + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + T& at(const K& key) { return m_ht.at(key); } + + /** + * @copydoc at(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + T& at(const K& key, std::size_t precalculated_hash) { return m_ht.at(key, precalculated_hash); } + + + /** + * @copydoc at(const K& key) + */ + template::value>::type* = nullptr> + const T& at(const K& key) const { return m_ht.at(key); } + + /** + * @copydoc at(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + const T& at(const K& key, std::size_t precalculated_hash) const { return m_ht.at(key, precalculated_hash); } + + + + + T& operator[](const Key& key) { return m_ht[key]; } + T& operator[](Key&& key) { return m_ht[std::move(key)]; } + + + + + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { + return m_ht.count(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + size_type count(const K& key) const { return m_ht.count(key); } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + + + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + iterator find(const K& key) { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + /** + * @copydoc find(const K& key) + */ + template::value>::type* = nullptr> + const_iterator find(const K& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { + return m_ht.find(key, precalculated_hash); + } + + + + + std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) { return m_ht.equal_range(key); } + + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + + + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const hopscotch_map& lhs, const hopscotch_map& rhs) { + if(lhs.size() != rhs.size()) { + return false; + } + + for(const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs.first); + if(it_element_rhs == rhs.cend() || element_lhs.second != it_element_rhs->second) { + return false; + } + } + + return true; + } + + friend bool operator!=(const hopscotch_map& lhs, const hopscotch_map& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(hopscotch_map& lhs, hopscotch_map& rhs) { + lhs.swap(rhs); + } + + + +private: + ht m_ht; +}; + + +/** + * Same as `tsl::hopscotch_map`. + */ +template, + class KeyEqual = std::equal_to, + class Allocator = std::allocator>, + unsigned int NeighborhoodSize = 62, + bool StoreHash = false> +using hopscotch_pg_map = hopscotch_map; + +} // end namespace tsl + +#endif diff --git a/include/tsl/hopscotch_set.h b/include/tsl/hopscotch_set.h new file mode 100644 index 0000000..4013d33 --- /dev/null +++ b/include/tsl/hopscotch_set.h @@ -0,0 +1,525 @@ +/** + * MIT License + * + * Copyright (c) 2017 Tessil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef TSL_HOPSCOTCH_SET_H +#define TSL_HOPSCOTCH_SET_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "hopscotch_hash.h" + + +namespace tsl { + +/** + * Implementation of a hash set using the hopscotch hashing algorithm. + * + * The Key must be either nothrow move-constructible, copy-constuctible or both. + * + * The size of the neighborhood (NeighborhoodSize) must be > 0 and <= 62 if StoreHash is false. + * When StoreHash is true, 32-bits of the hash will be stored alongside the neighborhood limiting + * the NeighborhoodSize to <= 30. There is no memory usage difference between + * 'NeighborhoodSize 62; StoreHash false' and 'NeighborhoodSize 30; StoreHash true'. + * + * Storing the hash may improve performance on insert during the rehash process if the hash takes time + * to compute. It may also improve read performance if the KeyEqual function takes time (or incurs a cache-miss). + * If used with simple Hash and KeyEqual it may slow things down. + * + * StoreHash can only be set if the GrowthPolicy is set to tsl::power_of_two_growth_policy. + * + * GrowthPolicy defines how the set grows and consequently how a hash value is mapped to a bucket. + * By default the set uses tsl::power_of_two_growth_policy. This policy keeps the number of buckets + * to a power of two and uses a mask to set the hash to a bucket instead of the slow modulo. + * You may define your own growth policy, check tsl::power_of_two_growth_policy for the interface. + * + * If the destructor of Key throws an exception, behaviour of the class is undefined. + * + * Iterators invalidation: + * - clear, operator=, reserve, rehash: always invalidate the iterators. + * - insert, emplace, emplace_hint, operator[]: if there is an effective insert, invalidate the iterators + * if a displacement is needed to resolve a collision (which mean that most of the time, + * insert will invalidate the iterators). Or if there is a rehash. + * - erase: iterator on the erased element is the only one which become invalid. + */ +template, + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, + bool StoreHash = false, + class GrowthPolicy = tsl::hh::power_of_two_growth_policy<2>> +class hopscotch_set { +private: + template + using has_is_transparent = tsl::detail_hopscotch_hash::has_is_transparent; + + class KeySelect { + public: + using key_type = Key; + + const key_type& operator()(const Key& key) const { + return key; + } + + key_type& operator()(Key& key) { + return key; + } + }; + + + using overflow_container_type = std::list; + using ht = detail_hopscotch_hash::hopscotch_hash; + +public: + using key_type = typename ht::key_type; + using value_type = typename ht::value_type; + using size_type = typename ht::size_type; + using difference_type = typename ht::difference_type; + using hasher = typename ht::hasher; + using key_equal = typename ht::key_equal; + using allocator_type = typename ht::allocator_type; + using reference = typename ht::reference; + using const_reference = typename ht::const_reference; + using pointer = typename ht::pointer; + using const_pointer = typename ht::const_pointer; + using iterator = typename ht::iterator; + using const_iterator = typename ht::const_iterator; + + + /* + * Constructors + */ + hopscotch_set() : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE) { + } + + explicit hopscotch_set(size_type bucket_count, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : + m_ht(bucket_count, hash, equal, alloc, ht::DEFAULT_MAX_LOAD_FACTOR) + { + } + + hopscotch_set(size_type bucket_count, + const Allocator& alloc) : hopscotch_set(bucket_count, Hash(), KeyEqual(), alloc) + { + } + + hopscotch_set(size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : hopscotch_set(bucket_count, hash, KeyEqual(), alloc) + { + } + + explicit hopscotch_set(const Allocator& alloc) : hopscotch_set(ht::DEFAULT_INIT_BUCKETS_SIZE, alloc) { + } + + template + hopscotch_set(InputIt first, InputIt last, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : hopscotch_set(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template + hopscotch_set(InputIt first, InputIt last, + size_type bucket_count, + const Allocator& alloc) : hopscotch_set(first, last, bucket_count, Hash(), KeyEqual(), alloc) + { + } + + template + hopscotch_set(InputIt first, InputIt last, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : hopscotch_set(first, last, bucket_count, hash, KeyEqual(), alloc) + { + } + + hopscotch_set(std::initializer_list init, + size_type bucket_count = ht::DEFAULT_INIT_BUCKETS_SIZE, + const Hash& hash = Hash(), + const KeyEqual& equal = KeyEqual(), + const Allocator& alloc = Allocator()) : + hopscotch_set(init.begin(), init.end(), bucket_count, hash, equal, alloc) + { + } + + hopscotch_set(std::initializer_list init, + size_type bucket_count, + const Allocator& alloc) : + hopscotch_set(init.begin(), init.end(), bucket_count, Hash(), KeyEqual(), alloc) + { + } + + hopscotch_set(std::initializer_list init, + size_type bucket_count, + const Hash& hash, + const Allocator& alloc) : + hopscotch_set(init.begin(), init.end(), bucket_count, hash, KeyEqual(), alloc) + { + } + + + hopscotch_set& operator=(std::initializer_list ilist) { + m_ht.clear(); + + m_ht.reserve(ilist.size()); + m_ht.insert(ilist.begin(), ilist.end()); + + return *this; + } + + allocator_type get_allocator() const { return m_ht.get_allocator(); } + + + /* + * Iterators + */ + iterator begin() noexcept { return m_ht.begin(); } + const_iterator begin() const noexcept { return m_ht.begin(); } + const_iterator cbegin() const noexcept { return m_ht.cbegin(); } + + iterator end() noexcept { return m_ht.end(); } + const_iterator end() const noexcept { return m_ht.end(); } + const_iterator cend() const noexcept { return m_ht.cend(); } + + + /* + * Capacity + */ + bool empty() const noexcept { return m_ht.empty(); } + size_type size() const noexcept { return m_ht.size(); } + size_type max_size() const noexcept { return m_ht.max_size(); } + + /* + * Modifiers + */ + void clear() noexcept { m_ht.clear(); } + + + + + std::pair insert(const value_type& value) { return m_ht.insert(value); } + std::pair insert(value_type&& value) { return m_ht.insert(std::move(value)); } + + iterator insert(const_iterator hint, const value_type& value) { return m_ht.insert(hint, value); } + iterator insert(const_iterator hint, value_type&& value) { return m_ht.insert(hint, std::move(value)); } + + template + void insert(InputIt first, InputIt last) { m_ht.insert(first, last); } + void insert(std::initializer_list ilist) { m_ht.insert(ilist.begin(), ilist.end()); } + + + + + /** + * Due to the way elements are stored, emplace will need to move or copy the key-value once. + * The method is equivalent to insert(value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + std::pair emplace(Args&&... args) { return m_ht.emplace(std::forward(args)...); } + + + + + /** + * Due to the way elements are stored, emplace_hint will need to move or copy the key-value once. + * The method is equivalent to insert(hint, value_type(std::forward(args)...)); + * + * Mainly here for compatibility with the std::unordered_map interface. + */ + template + iterator emplace_hint(const_iterator hint, Args&&... args) { + return m_ht.emplace_hint(hint, std::forward(args)...); + } + + + + + iterator erase(iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator pos) { return m_ht.erase(pos); } + iterator erase(const_iterator first, const_iterator last) { return m_ht.erase(first, last); } + size_type erase(const key_type& key) { return m_ht.erase(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + size_type erase(const key_type& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + size_type erase(const K& key) { return m_ht.erase(key); } + + /** + * @copydoc erase(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup to the value if you already have the hash. + */ + template::value>::type* = nullptr> + size_type erase(const K& key, std::size_t precalculated_hash) { + return m_ht.erase(key, precalculated_hash); + } + + + + + void swap(hopscotch_set& other) { other.m_ht.swap(m_ht); } + + + /* + * Lookup + */ + size_type count(const Key& key) const { return m_ht.count(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + size_type count(const Key& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + size_type count(const K& key) const { return m_ht.count(key); } + + /** + * @copydoc count(const K& key) const + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + size_type count(const K& key, std::size_t precalculated_hash) const { return m_ht.count(key, precalculated_hash); } + + + + + iterator find(const Key& key) { return m_ht.find(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + iterator find(const Key& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + const_iterator find(const Key& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const Key& key, std::size_t precalculated_hash) + */ + const_iterator find(const Key& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + iterator find(const K& key) { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + iterator find(const K& key, std::size_t precalculated_hash) { return m_ht.find(key, precalculated_hash); } + + /** + * @copydoc find(const K& key) + */ + template::value>::type* = nullptr> + const_iterator find(const K& key) const { return m_ht.find(key); } + + /** + * @copydoc find(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + const_iterator find(const K& key, std::size_t precalculated_hash) const { return m_ht.find(key, precalculated_hash); } + + + + + std::pair equal_range(const Key& key) { return m_ht.equal_range(key); } + + /** + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + std::pair equal_range(const Key& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const Key& key, std::size_t precalculated_hash) + */ + std::pair equal_range(const Key& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * This overload only participates in the overload resolution if the typedef KeyEqual::is_transparent exists. + * If so, K must be hashable and comparable to Key. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key) + * + * Use the hash value 'precalculated_hash' instead of hashing the key. The hash value should be the same + * as hash_function()(key). Usefull to speed-up the lookup if you already have the hash. + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) { + return m_ht.equal_range(key, precalculated_hash); + } + + /** + * @copydoc equal_range(const K& key) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key) const { return m_ht.equal_range(key); } + + /** + * @copydoc equal_range(const K& key, std::size_t precalculated_hash) + */ + template::value>::type* = nullptr> + std::pair equal_range(const K& key, std::size_t precalculated_hash) const { + return m_ht.equal_range(key, precalculated_hash); + } + + + + + /* + * Bucket interface + */ + size_type bucket_count() const { return m_ht.bucket_count(); } + size_type max_bucket_count() const { return m_ht.max_bucket_count(); } + + + /* + * Hash policy + */ + float load_factor() const { return m_ht.load_factor(); } + float max_load_factor() const { return m_ht.max_load_factor(); } + void max_load_factor(float ml) { m_ht.max_load_factor(ml); } + + void rehash(size_type count_) { m_ht.rehash(count_); } + void reserve(size_type count_) { m_ht.reserve(count_); } + + + /* + * Observers + */ + hasher hash_function() const { return m_ht.hash_function(); } + key_equal key_eq() const { return m_ht.key_eq(); } + + + /* + * Other + */ + + /** + * Convert a const_iterator to an iterator. + */ + iterator mutable_iterator(const_iterator pos) { + return m_ht.mutable_iterator(pos); + } + + size_type overflow_size() const noexcept { return m_ht.overflow_size(); } + + friend bool operator==(const hopscotch_set& lhs, const hopscotch_set& rhs) { + if(lhs.size() != rhs.size()) { + return false; + } + + for(const auto& element_lhs : lhs) { + const auto it_element_rhs = rhs.find(element_lhs); + if(it_element_rhs == rhs.cend()) { + return false; + } + } + + return true; + } + + friend bool operator!=(const hopscotch_set& lhs, const hopscotch_set& rhs) { + return !operator==(lhs, rhs); + } + + friend void swap(hopscotch_set& lhs, hopscotch_set& rhs) { + lhs.swap(rhs); + } + +private: + ht m_ht; +}; + + +/** + * Same as `tsl::hopscotch_set`. + */ +template, + class KeyEqual = std::equal_to, + class Allocator = std::allocator, + unsigned int NeighborhoodSize = 62, + bool StoreHash = false> +using hopscotch_pg_set = hopscotch_set; + +} // end namespace tsl + +#endif diff --git a/include/util.h b/include/util.h index 396184a..fce583b 100644 --- a/include/util.h +++ b/include/util.h @@ -1,15 +1,6 @@ /* * ============================================================================ * - * Filename: util.h - * - * Description: - * - * Version: 1.0 - * Created: 2017-09-21 12:39:52 PM - * Revision: none - * Compiler: gcc - * * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu * Organization: Stony Brook University * @@ -33,6 +24,18 @@ #define PRINT_DEBUG 0 #endif +#define DEBUG(x) do { \ + if (PRINT_DEBUG) { std::cerr << x << std::endl; } \ +} while (0) + +#define ERROR(x) do { \ + { std::cerr << x << std::endl; } \ +} while (0) + +#define PRINT(x) do { \ + { std::cout << x << std::endl; } \ +} while (0) + #define DEBUG_CDBG(x) do { \ if (PRINT_DEBUG) { std::cerr << x << std::endl; } \ } while (0) diff --git a/raw/incqfs.lst b/raw/incqfs.lst index 03ea24c..98570b2 100644 --- a/raw/incqfs.lst +++ b/raw/incqfs.lst @@ -1,2 +1,2 @@ -data/SRR191411_exact.ser 1 -data/SRR191403_exact.ser 1 +data/SRR191403-k20-Cut1.squeakr +data/SRR191411-k20-Cut1.squeakr diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 44ae727..c5919ed 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,19 +3,22 @@ # most of the relevant API add_library(mantis_core STATIC kmer.cc - hashutil.cc query.cc util.cc - bitvector.cc validatemantis.cc coloreddbg.cc MantisFS.cc - cqf/gqf.c) + squeakrconfig.cc + gqf/gqf.c + gqf/gqf_file.c + gqf/hashutil.c, + colorEncoder.cc + canonKmer.cc) set(MANTIS_DEBUG_CFLAGS "${MANTIS_C_FLAGS};-g") set(MANTIS_DEBUG_CXXFLAGS "${MANTIS_CXX_FLAGS};-g") -set(MANTIS_RELEASE_CFLAGS "${MANTIS_C_FLAGS};-O3") -set(MANTIS_RELEASE_CXXFLAGS "${MANTIS_CXX_FLAGS};-O3") +set(MANTIS_RELEASE_CFLAGS "${MANTIS_C_FLAGS};-O3;-march=native") +set(MANTIS_RELEASE_CXXFLAGS "${MANTIS_CXX_FLAGS};-O3;-march=native") target_include_directories(mantis_core PUBLIC $) target_compile_options(mantis_core PUBLIC "$<$,$>:${MANTIS_DEBUG_CFLAGS}>") @@ -23,6 +26,7 @@ target_compile_options(mantis_core PUBLIC "$<$,$,$>:${MANTIS_RELEASE_CFLAGS}>") target_compile_options(mantis_core PUBLIC "$<$,$>:${MANTIS_RELEASE_CXXFLAGS}>") target_compile_definitions(mantis_core PUBLIC "${ARCH_DEFS}") +set_property(TARGET mantis_core PROPERTY INTERPROCEDURAL_OPTIMIZATION True) # link libmantis_core with the required libraries target_link_libraries(mantis_core @@ -42,6 +46,17 @@ target_compile_options(mantis PUBLIC "$<$,$,$>:${MANTIS_RELEASE_CFLAGS}>") target_compile_options(mantis PUBLIC "$<$,$>:${MANTIS_RELEASE_CXXFLAGS}>") target_compile_definitions(mantis PUBLIC "${ARCH_DEFS}") +set_property(TARGET mantis PROPERTY INTERPROCEDURAL_OPTIMIZATION True) + +add_executable(build_eq_graph build_eq_graph.cc) +target_include_directories(build_eq_graph PUBLIC $) +target_link_libraries(build_eq_graph mantis_core) +target_compile_options(build_eq_graph PUBLIC "$<$,$>:${MANTIS_DEBUG_CFLAGS}>") +target_compile_options(build_eq_graph PUBLIC "$<$,$>:${MANTIS_DEBUG_CXXFLAGS}>") +target_compile_options(build_eq_graph PUBLIC "$<$,$>:${MANTIS_RELEASE_CFLAGS}>") +target_compile_options(build_eq_graph PUBLIC "$<$,$>:${MANTIS_RELEASE_CXXFLAGS}>") +target_compile_definitions(build_eq_graph PUBLIC "${ARCH_DEFS}") +set_property(TARGET build_eq_graph PROPERTY INTERPROCEDURAL_OPTIMIZATION True) # TODO: look more into why this is necessary if (SDSL_INSTALL_PATH) @@ -51,3 +66,48 @@ endif() install(TARGETS mantis RUNTIME DESTINATION bin) + +add_executable(msf MSF.cc) +target_include_directories(msf PUBLIC + $) +target_link_libraries(msf + mantis_core) +set_property(TARGET msf PROPERTY INTERPROCEDURAL_OPTIMIZATION True) + +add_executable(walkMSF walkMSF.cc) +target_include_directories(walkMSF PUBLIC + $) +target_link_libraries(walkMSF + mantis_core) +set_property(TARGET walkMSF PROPERTY INTERPROCEDURAL_OPTIMIZATION True) + +add_executable(mstBoost MST_boost.cc) + target_include_directories(mstBoost PUBLIC + $) +target_link_libraries(mstBoost mantis_core) +set_property(TARGET mstBoost PROPERTY INTERPROCEDURAL_OPTIMIZATION True) + +add_executable(walkCqf walkCqf.cc) +target_include_directories(walkCqf PUBLIC + $) +target_link_libraries(walkCqf + mantis_core) +set_property(TARGET walkCqf PROPERTY INTERPROCEDURAL_OPTIMIZATION True) + + + +#add_executable(walkEqcls walkEqcls.cc) +#target_include_directories(walkEqcls PUBLIC +# $ $) +#find_library(compression_library NAMES SIMDCompressionAndIntersection HINTS "${CMAKE_SOURCE_DIR}/lib") +#target_link_libraries(walkEqcls +# mantis_core ${compression_library}) + + +#add_executable(monochromatic_component_iterator +# monochromatic_component_iterator.cc) +#target_include_directories(monochromatic_component_iterator PUBLIC +# $) +#target_link_libraries(monochromatic_component_iterator +# mantis_core) +#set_property(TARGET monochromatic_component_iterator PROPERTY INTERPROCEDURAL_OPTIMIZATION True) diff --git a/src/MSF.cc b/src/MSF.cc new file mode 100644 index 0000000..990fefd --- /dev/null +++ b/src/MSF.cc @@ -0,0 +1,338 @@ +// +// Created by Fatemeh Almodaresi on 7/11/18. +// +// The algorithm's basic implementation taken from +// https://www.geeksforgeeks.org/kruskals-minimum-spanning-tree-using-stl-in-c/ +// + +#include +#include +#include +#include "MSF.h" +#include "sparsepp/spp.h" + +struct Hop { + uint32_t id; + uint32_t dist; + uint16_t level; + + Hop(uint32_t id, uint32_t dist, uint16_t level) : id(id), dist(dist), level(level) {} + + bool operator<(const Hop &rhs) const { + return level < rhs.level; + } + +}; + +struct Opts { + std::string filename; + uint64_t numNodes; // = std::stoull(argv[2]); + uint16_t numSamples; + std::string eqClsListFile; + std::string outputDir; + uint16_t hops; +}; + + +int main(int argc, char *argv[]) { + /* Let us create above shown weighted + and undirected graph */ + using namespace clipp; + enum class mode { + build, fillGraph, help + }; + mode selected = mode::help; + Opts opt; + + auto build_mode = ( + command("build").set(selected, mode::build), + required("-e", "--edge-filename") & + value("edge_filename", opt.filename) % "File containing list of eq. class edges.", + required("-n", "--eqCls-cnt") & + value("equivalenceClass_count", opt.numNodes) % "Total number of equivalence (color) classes.", + required("-s", "--numSamples") & + value("numSamples", opt.numSamples) % "Total number of experiments (samples).", + required("-c", "--eqCls-lst") & + value("eqCls_list_filename", opt.eqClsListFile) % + "File containing list of equivalence (color) classes.", + required("-o", "--output_dir") & + value("output directory", opt.outputDir) % "Directory that all the int_vectors will be stored in." + ); + + auto fillGraph_mode = ( + command("fillGraph").set(selected, mode::fillGraph), + required("-e", "--edge-filename") & + value("edge_filename", opt.filename) % "File containing list of eq. class edges.", + required("-s", "--numSamples") & + value("numSamples", opt.numSamples) % "Total number of experiments (samples).", + required("-n", "--eqCls-cnt") & + value("equivalenceClass_count", opt.numNodes) % "Total number of equivalence (color) classes.", + required("-c", "--eqCls-lst") & + value("eqCls_list_filename", opt.eqClsListFile) % + "File containing list of equivalence (color) classes.", + required("-o", "--output_dir") & + value("output directory", opt.outputDir) % "Directory that all the int_vectors will be stored in.", + required("-h", "--hops") & + value("hops", opt.hops) % "# of hops to search for each node for a direct link with smaller weight." + ); + + auto cli = ( + (build_mode | fillGraph_mode | command("help").set(selected, mode::help) + ) + ); + + decltype(parse(argc, argv, cli)) res; + try { + res = parse(argc, argv, cli); + } catch (std::exception &e) { + std::cout << "\n\nParsing command line failed with exception: " << e.what() << "\n"; + std::cout << "\n\n"; + std::cout << make_man_page(cli, "MSF"); + return 1; + } + if (!res) { + std::cerr << "Cannot parse the input arguments\n"; + std::exit(1); + } + if (selected == mode::help) { + std::cerr << make_man_page(cli, "MSF"); + std::exit(1); + } + //explore_options_verbose(res); + + std::cerr << "here are the inputs: \n" + << opt.filename << "\n" + << opt.numNodes << "\n" + << opt.numSamples << "\n"; + uint64_t numWrds = (uint64_t) std::ceil((double) opt.numSamples / 64.0); + opt.numNodes++; // number of nodes is one more than input including the zero node + + std::cerr << "Loading all the equivalence classes first .. \n"; + std::vector> eqs; + eqs.reserve(20); + loadEqs(opt.eqClsListFile, eqs); + std::cerr << "Done loading list of equivalence class buckets\n"; + + ifstream file(opt.filename); + + uint16_t w_; + uint32_t n1, n2, edgeCntr{0}, zero{(uint32_t) opt.numNodes - 1}; + if (selected == mode::build) { + Graph g(opt.numSamples); + + { + std::cerr << "Adding edges from 0 to each node with number of set bits in the node as weight .. \n"; + for (uint32_t i = 0; i < opt.numNodes; i++) { + uint16_t ones = sum1s(eqs, i, opt.numSamples, numWrds); + g.addEdge(zero, i, ones); + } + std::cerr << "Done adding 0-end edges.\n"; + std::cerr << "Adding edges between color classes .. \n"; + while (file.good()) { + file >> n1 >> n2 >> w_; + g.addEdge(n1, n2, w_); + edgeCntr++; + } + //file.clear(); + //file.seekg(0, ios::beg); + file.close(); + std::cerr << "Done adding edges between color classes .. \n"; + + std::cerr << "\n# of edges: " << edgeCntr + << "\n"; +// nodes.clear(); + } + g.V = opt.numNodes; + g.mst.resize(opt.numNodes); + //g.V = numNodes; + //ifstream file(filename); + + DisjointSets ds = g.kruskalMSF(opt.numSamples); + std::queue q; + + std::vector degrees(g.mst.size()); + std::vector visited(g.mst.size(), false); + for (auto e = 0; e < g.mst.size(); e++) { + // put all the mst leaves into the queue + degrees[e] = g.mst[e].size(); // size of list of neighbors of each node is the degree of the node + if (degrees[e] == 1) { + q.push(e); // e is a leaf + } + } + // now go run the algorithm to find the root of the tree and all the edge directions + std::cerr << "Creating parentBV...\n"; + sdsl::int_vector<> parentbv(opt.numNodes, 0, ceil(log2(opt.numNodes))); + + bool check = false; + uint64_t nodeCntr{0}; + while (!q.empty()) { + uint32_t node = q.front(); + q.pop(); + visited[node] = true; + if (degrees[node] == 0) { + // this node is the root + // and this should be the end of the loop + // just as a validation, I'll continue and expect the while to end here + if (check) { + std::cerr << "A TERRIBLE BUG!!!\n" + << "finding a node with no edges should only happen once at the root\n"; + std::exit(1); + } + parentbv[node] = node; + check = true; + // Update the total weight to contain the delta of root from bv of zero + uint16_t ones = 1; // If the root is zero itself + if (node != zero) // otherwise + ones = sum1s(eqs, node, opt.numSamples, numWrds); + g.mst_totalWeight += ones; + continue; + } + // what ever is in q (node) is a leaf and has only one edge left + // fetch the pointer to that edge (bucket+idx) + // fetch the two ends of the edge and based on the node id decide which one is the src/dest + // Destination is the one that represents current node which has been selected as leaf sooner than the other end + uint64_t src, dest, weight; + for (auto& n : g.mst[node]) { + auto &e = g.edges[n.bucket][n.idx]; + if (!visited[e.n1] or !visited[e.n2]) { + src = e.n1; + dest = e.n2; + if (!visited[e.n2]) { + std::swap(src, dest); + } + weight = e.weight; + break; + } + } + if (dest == zero) { // we break the tree from node zero + parentbv[zero] = zero; + // we're breaking the tree, so should remove the edge weight from total weights + // But instead we're gonna spend one slot to store 0 as the delta of the zero node from zero node + g.mst_totalWeight = g.mst_totalWeight - weight + 1; + } else { + parentbv[dest] = src; + } + // decrease the degrees for src and dest and add the src if it is now a leaf (no children left) + degrees[dest]--; + degrees[src]--; + if (degrees[src] == 1) { + q.push(src); + } + nodeCntr++; // just a counter for the log + if (nodeCntr % 10000000 == 0) { + std::cerr << nodeCntr << " nodes processed toward root\n"; + } + } + + // create the data structures + std::cerr << "Sum of MST weights: " << g.mst_totalWeight << "\n"; + std::cerr << "Creating deltaBV and bBV...\n"; + sdsl::int_vector<> deltabv(g.mst_totalWeight, 0, ceil(log2(opt.numSamples))); + sdsl::bit_vector bbv(g.mst_totalWeight, 0); + + uint64_t deltaOffset{0}; + for (uint64_t i = 0; i < parentbv.size(); i++) { + std::vector deltas; + if (i == zero) { + deltaOffset++; + } else if (parentbv[i] == zero || parentbv[i] == i) { + deltas = getDeltaList(eqs, i, opt.numSamples, numWrds); + } else { + deltas = getDeltaList(eqs, parentbv[i], i, opt.numSamples, numWrds); + } + for (auto &v : deltas) { + deltabv[deltaOffset] = v; + deltaOffset++; + } + bbv[deltaOffset - 1] = 1; + if (i % 10000000 == 0) { + std::cerr << i << " nodes processed\n"; + } + } + + + std::cerr << "Sum of MST weights: " << g.mst_totalWeight << "\n"; + + sdsl::store_to_file(parentbv, opt.outputDir + "/parents.bv"); + sdsl::store_to_file(deltabv, opt.outputDir + "/deltas.bv"); + sdsl::store_to_file(bbv, opt.outputDir + "/boundary.bv"); + } else if (selected == mode::fillGraph) { + //spp::sparse_hash_map>> nodes; + std::vector>> nodes(opt.numNodes); + std::cerr << "Adding edges between color classes .. \n"; + while (file.good()) { + file >> n1 >> n2 >> w_; + nodes[n1].push_back(std::make_pair(n2, w_)); + nodes[n2].push_back(std::make_pair(n1, w_)); + edgeCntr++; + if (edgeCntr % 10000000 == 0) { + std::cerr << edgeCntr << " passed\n"; + } + } + file.close(); + std::ofstream of(opt.outputDir + "/extraEdges.lst"); + std::cerr << "Done adding edges between color classes .. \n"; + std::cerr << "Total # of nodes : " << nodes.size() << "\n"; + std::cerr << "Hops required: " << opt.hops << "\n"; + uint32_t nodeCntr{0}; + uint64_t eqWrds{static_cast(((opt.numSamples - 1) / 64) + 1)}; + //std::vector> nodeEqs(nodes.size()); + std::cerr << "initialized\n"; + for (auto &neis : nodes) { + std::unordered_set immediateNeighbors; + immediateNeighbors.insert(nodeCntr); + std::priority_queue hops; + std::vector eq1(eqWrds); + buildColor(eqs, eq1, nodeCntr, opt.numSamples); + + //std::cerr << "n " << nodeCntr << " " << neis.size() << "\n"; + uint64_t maxNei{100}; + if (neis.size() > maxNei) continue; + for (auto &nei : neis) { + immediateNeighbors.insert(nei.first); + Hop nh(nei.first, nei.second, 1); + hops.push(nh); + } + + std::unordered_map visited; + while (!hops.empty() && hops.top().level != opt.hops) { + auto nei = hops.top(); + hops.pop(); + //std::cerr << nei.level << " " << nei.id << " " << nei.dist << " " << nodes[nei.id].size() << "\n"; + if (nodes[nei.id].size() > maxNei) continue; + for (auto &neinei : nodes[nei.id]) { + //std::cerr << " " << neinei.first << "\n"; + if (immediateNeighbors.find(neinei.first) == immediateNeighbors.end()) { + Hop nh(neinei.first, neinei.second + nei.dist, nei.level + 1); + if (visited.find(nh.id) == visited.end()) { + visited[nh.id] = nh.dist; + } else if (visited[nh.id] > nh.dist) { + visited[nh.id] = nh.dist; + } + hops.push(nh); + } + } + } + for (auto &kv : visited) { + if (nodeCntr < kv.first) { + std::vector eq2(eqWrds); + buildColor(eqs, eq2, kv.first, opt.numSamples); + uint64_t directDist = 0;//hammingDist(eqs, id, nh.id, opt.numSamples); + for (uint64_t i = 0; i < eq1.size(); i++) { + if (eq1[i] != eq2[i]) + directDist += sdsl::bits::cnt(eq1[i] ^ eq2[i]); + } + if (directDist < kv.second) { + of << nodeCntr << "\t" << kv.first << "\t" << directDist << "\n"; + } + } + } + nodeCntr++; + if (nodeCntr % 1000000 == 0) { + std::cerr << nodeCntr << " passed\n"; + } + } + } + return 0; +} + diff --git a/src/MST_boost.cc b/src/MST_boost.cc new file mode 100644 index 0000000..5cff8b2 --- /dev/null +++ b/src/MST_boost.cc @@ -0,0 +1,131 @@ +// +// Created by Fatemeh Almodaresi on 7/19/18. +// + + +#include +#include +#include +#include +#include +#include "clipp.h" + +struct Opts { + std::string filename; + uint64_t numNodes; // = std::stoull(argv[2]); + uint64_t bucketCnt; // = std::stoull(argv[3]); + uint64_t numSamples; + std::string eqClsListFile; +}; + +int main(int argc, char *argv[]) { + + /* Let us create above shown weighted + and undirected graph */ + using namespace clipp; + enum class mode { + build, ccInfo, help + }; + mode selected = mode::help; + Opts opt; + auto ccInfo_mode = ( + command("ccInfo").set(selected, mode::ccInfo), + required("-e", "--edge-filename") & + value("edge_filename", opt.filename) % "file containing list of eq. class edges.", + required("-n", "--eqCls-cnt") & + value("equivalenceClass_count", opt.numNodes) % "Total number of equivalence (color) classes.", + required("-b", "--bucket-cnt") & + value("bucket_count", opt.bucketCnt) % "Total number of valid distances." + ); + + auto build_mode = ( + command("build").set(selected, mode::build), + required("-e", "--edge-filename") & + value("edge_filename", opt.filename) % "File containing list of eq. class edges.", + required("-n", "--eqCls-cnt") & + value("equivalenceClass_count", opt.numNodes) % "Total number of equivalence (color) classes.", + required("-b", "--bucket-cnt") & + value("bucket_count", opt.bucketCnt) % "Total number of valid distances.", + required("-s", "--numSamples") & + value("numSamples", opt.numSamples) % "Total number of experiments (samples).", + required("-c", "--eqCls-lst") & + value("eqCls_list", opt.eqClsListFile) % "File containing list of equivalence (color) classes." + ); + + auto cli = ( + (build_mode | ccInfo_mode | command("help").set(selected, mode::help) + ) + ); + + decltype(parse(argc, argv, cli)) res; + try { + res = parse(argc, argv, cli); + } catch (std::exception &e) { + std::cout << "\n\nParsing command line failed with exception: " << e.what() << "\n"; + std::cout << "\n\n"; + std::cout << make_man_page(cli, "MSF"); + return 1; + } + + //explore_options_verbose(res); + + if (res) { + switch (selected) { + //case mode::ccInfo: query_main(qopt); break; + //case mode::build: validate_main(vopt); break; + case mode::help: + std::cerr << make_man_page(cli, "MSF"); + break; + } + } + + using namespace boost; + typedef adjacency_list > Graph; + typedef graph_traits::edge_descriptor Edge; + typedef graph_traits::vertex_descriptor Vertex; + typedef std::pair E; + + if (selected == mode::build) { + + Graph g(opt.numNodes+1); + property_map::type weightmap = get(edge_weight, g); + std::ifstream file(opt.filename); + uint64_t n1,n2, zero{opt.numNodes+2}, edgeCntr{0}; + uint32_t w; + for (auto i : irange((uint64_t)0, opt.numNodes)) { + add_edge(0, i, opt.numSamples, g); + edgeCntr++; + if (edgeCntr % 1000000 == 0) { + + } + } + std::cerr << edgeCntr << " zero-end edges\n"; + edgeCntr = 0; + while (file.good()) { + file >> n1 >> n2 >> w; + /*Edge e; + bool inserted;*/ + /*tie(e, inserted) = */add_edge(n1, n2, w, g); + //weightmap[e] = w; + edgeCntr++; + if (edgeCntr % 10000000 == 0) { + std::cerr << edgeCntr << "\n"; + } + } + std::cerr << edgeCntr << " edges\n"; + + std::vector spanning_tree; + + kruskal_minimum_spanning_tree(g, std::back_inserter(spanning_tree)); + + std::cerr << "Done building MST\nTotal # of edges: " << spanning_tree.size() << "\n"; + /*std::cout << "Print the edges in the MST:" << std::endl; + for (std::vector::iterator ei = spanning_tree.begin(); + ei != spanning_tree.end(); ++ei) { + std::cout << source(*ei, g) << " <--> " << target(*ei, g) + << " with weight of " << weight[*ei] + << std::endl; + }*/ + } + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/src/bitvector.cc b/src/bitvector.cc deleted file mode 100644 index 22c10d5..0000000 --- a/src/bitvector.cc +++ /dev/null @@ -1,43 +0,0 @@ -#include "bitvector.h" - -BitVector::BitVector(uint64_t size) : size(size) { - bits = sdsl::bit_vector(size); -} - -void BitVector::reset() { - bits = sdsl::bit_vector(size); -} - -bool BitVector::operator[](uint64_t idx) { - assert(idx < size); - return bits[idx]; -} - -void BitVector::set(uint64_t idx) { - assert(idx < size); - bits[idx] = 1; -} - -void BitVector::resize(const uint64_t len) { - bits.bit_resize(len); - size = len; -} - -BitVectorRRR::BitVectorRRR(std::string& filename) { - sdsl::load_from_file(rrr_bits, filename); - size = rrr_bits.size(); - DEBUG_CDBG("Read rrr bit vector of size " << size << " from file " << - filename); -} - -bool BitVectorRRR::operator[](uint64_t idx) { - assert(idx < size); - return rrr_bits[idx]; -} - -bool BitVectorRRR::serialize(std::string& filename) { - DEBUG_CDBG("Serializing rrr bit vector of size " << size << " to file " << - filename); - return sdsl::store_to_file(rrr_bits, filename); -} - diff --git a/src/canonKmer.cc b/src/canonKmer.cc new file mode 100644 index 0000000..160ceb8 --- /dev/null +++ b/src/canonKmer.cc @@ -0,0 +1,181 @@ +// +// Created by Fatemeh Almodaresi on 8/22/18. +// + +#include "canonKmer.h" +namespace duplicated_dna { + +/////////////// bases ///////////////// + base operator-(base b) { + return (base) ((~((uint64_t) b)) & 0x3ULL); + } + + const base bases[4] = {C, A, T, G}; + const std::map base_from_char = {{'A', A}, + {'C', C}, + {'G', G}, + {'T', T}, + {'N', A}}; + const std::map base_to_char = {{A, 'A'}, + {C, 'C'}, + {G, 'G'}, + {T, 'T'}}; + +///////////// kmers ///////////////////// + kmer::kmer(void) : len(0), val(0) {} + + kmer::kmer(base b) : len(1), val((uint64_t) b) {} + + kmer::kmer(int l, uint64_t v) : len(l), val(v & BITMASK(2 * l)) { + assert(l <= 32); + } + + static uint64_t string_to_kmer_val(std::string s) { + uint64_t val = 0; + for (auto c : s) + val = (val << 2) | ((uint64_t)(base_from_char.at(c))); + return val; + } + + kmer::kmer(std::string s) : len(s.size()), val(string_to_kmer_val(s)) { + assert(s.size() <= 32); + } + +// Convert to string + kmer::operator std::string() const { + std::string s; + for (auto i = 1; i < len + 1; i++) + s = s + base_to_char.at((base) ((val >> (2 * (len - i))) & BITMASK(2))); + return s; + } + + bool operator<(kmer a, kmer b) { + return a.len != b.len ? a.len < b.len : a.val < b.val; + } + + bool operator==(kmer a, kmer b) { + return a.len == b.len && a.val == b.val; + } + + bool operator!=(kmer a, kmer b) { + return !operator==(a, b); + } + +// Return the reverse complement of k + kmer operator-(kmer k) { + uint64_t val = k.val; + val = + (val >> 32) | + (val << 32); + val = + ((val >> 16) & 0x0000ffff0000ffff) | + ((val << 16) & 0xffff0000ffff0000); + val = + ((val >> 8) & 0x00ff00ff00ff00ff) | + ((val << 8) & 0xff00ff00ff00ff00); + val = + ((val >> 4) & 0x0f0f0f0f0f0f0f0f) | + ((val << 4) & 0xf0f0f0f0f0f0f0f0); + val = + ((val >> 2) & 0x3333333333333333) | + ((val << 2) & 0xcccccccccccccccc); + val = ~val; + val >>= 64 - 2 * k.len; + return kmer(k.len, val); + } + +// backwards from standard definition to match kmer.h definition + kmer canonicalize(kmer k) { + return -k < k ? k : -k; + } + +// Return the kmer of length |a| that results from shifting b into a +// from the right + kmer operator<<(kmer a, kmer b) { + uint64_t val = ((a.val << (2 * b.len)) | b.val) & BITMASK(2 * a.len); + return kmer(a.len, val); + } + +// Return the kmer of length |b| that results from shifting b into a +// from the left + kmer operator>>(kmer a, kmer b) { + uint64_t val + = ((b.val >> (2 * a.len)) | (a.val << (2 * (b.len - a.len)))) + & BITMASK(2 * b.len); + return kmer(b.len, val); + } + +// Append two kmers + kmer operator+(kmer a, kmer b) { + int len = a.len + b.len; + assert(len <= 32); + uint64_t val = (a.val << (2 * b.len)) | b.val; + return kmer(len, val); + } + + kmer prefix(kmer k, int len) { return kmer(len, k.val >> (2 * (k.len - len))); } + + kmer suffix(kmer k, int len) { return kmer(len, k.val & BITMASK(2 * len)); } + + bool period_divides(kmer k, uint64_t periodicity) { + static const uint64_t multipliers[33] = + { + 0, + 0x5555555555555555, // 1 + 0x1111111111111111, // 2 + 0x1041041041041041, // 3 + 0x0101010101010101, // 4 + 0x1004010040100401, // 5 + 0x1001001001001001, // 6 + 0x0100040010004001, // 7 + 0x0001000100010001, // 8 + 0x0040001000040001, // 9 + 0x1000010000100001, // 10 + 0x0000100000400001, // 11 + 0x0001000001000001, // 12 + 0x0010000004000001, // 13 + 0x0100000010000001, // 14 + 0x1000000040000001, // 15 + 0x0000000100000001, // 16 + 0x0000000400000001, // 17 + 0x0000001000000001, // 18 + 0x0000004000000001, // 19 + 0x0000010000000001, // 20 + 0x0000040000000001, // 21 + 0x0000100000000001, // 22 + 0x0000400000000001, // 23 + 0x0001000000000001, // 24 + 0x0004000000000001, // 25 + 0x0010000000000001, // 26 + 0x0040000000000001, // 27 + 0x0100000000000001, // 28 + 0x0400000000000001, // 29 + 0x1000000000000001, // 30 + 0x4000000000000001, // 31 + 0x0000000000000001, // 32 + }; + uint64_t piece = k.val & BITMASK(2 * periodicity); + piece = piece * multipliers[periodicity]; + piece = piece & BITMASK(2 * k.len); + return piece == k.val; + } + + uint64_t period(kmer k) { + for (int i = 1; i <= k.len; i++) { + if (period_divides(k, i)) + return i; + } + abort(); + } + + canonical_kmer::canonical_kmer(void) : kmer() {} + + canonical_kmer::canonical_kmer(base b) : kmer(canonicalize(kmer(b))) {} + + canonical_kmer::canonical_kmer(int l, uint64_t v) + : kmer(canonicalize(kmer(l, v))) {} + + canonical_kmer::canonical_kmer(std::string s) : kmer(canonicalize(kmer(s))) {} + + canonical_kmer::canonical_kmer(kmer k) : kmer(canonicalize(k)) {} +} \ No newline at end of file diff --git a/src/colorEncoder.cc b/src/colorEncoder.cc new file mode 100644 index 0000000..80b5315 --- /dev/null +++ b/src/colorEncoder.cc @@ -0,0 +1,358 @@ +// +// Created by Fatemeh Almodaresi on 8/17/18. +// + +#include "colorEncoder.h" +#include +#include +#include + +bool ColorEncoder::addColorClass(uint64_t kmer, uint64_t eqId, const sdsl::bit_vector &bv) { + kmerCntr++; + if (kmerCntr % 10000 == 0) { + (*weightDistFile) << deltaM.getDeltaCnt() << "\n"; + } + // create list of edges to be processed + // 1. zero to node + // 2. list of neighbors + // calc. the distance between the node and any of the neighbors + // that exist and the edge hasn't been seen + duplicated_dna::canonical_kmer cur(k, HashUtil::hash_64i(kmer, BITMASK(cqf.keybits()))); + std::unordered_set, pair_hash> newEdges; + + std::vector setBits; + //stats.tot_hits++; + if (!lru_cache.contains(eqId)) { + setBits = buildColor(bv); + lru_cache.emplace(eqId, setBits); + } //else stats.cache_hits++; + // case 1. edge from zero to the node + if (!hasEdge(zero, eqId)) { + updateMST(zero, eqId, setBits); + addEdge(zero, eqId, setBits.size()); + } + + // case 2. edges between the node and its neighbors + for (auto nei_eqId : neighbors(cur)) { + uint64_t cur_eqId{eqId}; + if (nei_eqId != cur_eqId) { + if (nei_eqId < cur_eqId) { + std::swap(cur_eqId, nei_eqId); + } + if (!hasEdge(cur_eqId, nei_eqId)) { + newEdges.insert(std::make_pair(cur_eqId, nei_eqId)); + } + } + } + for (auto &newEdge : newEdges) { + auto deltas = hammingDist(newEdge.first, newEdge.second); + updateMST(newEdge.first, newEdge.second, deltas); + addEdge(newEdge.first, newEdge.second, deltas.size()); + } + if (newEdges.size()) + return true; + return false; +} + +bool ColorEncoder::serialize() { + (*weightDistFile) << deltaM.getDeltaCnt() << "\n"; + weightDistFile->close(); + std::string statsf = prefix + "/stats.txt"; + std::ofstream stats_out(statsf); + stats_out << "\n\nCACHE STATS:"; + stats_out << "\n\ttotal hits: " << lru_cache.stats().total_hits(); // Hits for any key + stats_out << "\n\ttotal misses: " << lru_cache.stats().total_misses(); // Misses for any key + stats_out << "\n\ttotal hit rate: " << lru_cache.stats().hit_rate() << "\n"; // Hit rate in [0, 1] + stats_out << "\nEDGE STATS:"; + stats_out << "\n\t# of times searching for an edge: " << stats.tot_edge_access_request; + stats_out << "\n\t# of times edge not found: " << stats.add_edge; + stats_out << "\n\t# of times calling operator [] on edge map: " << stats.tot_edge_access; + stats_out << "\n\t# of times calling operator [] on edge map in updateMST: " << stats.edge_access_for_updateMST; + stats_out << "\n\t# of times accessing parentbv in updateMST: " << stats.parentbv_access_for_updateMST; + stats_out << "\n"; + stats_out.close(); + std::string parentbv_file = prefix + "/parents.bv"; + // resize parentbv if it is larger than required size + if (colorClsCnt < parentbv.size()) { + uint64_t newSize = colorClsCnt; + sdsl::int_vector<> parentTmp(newSize, 0, ceil(log2((double)newSize))); + for (uint64_t i = 0; i < newSize; i++) { + parentTmp[i] = parentbv[i]; + } + parentbv = parentTmp; + } + bool parentSuccessfullyStored = sdsl::store_to_file(parentbv, parentbv_file); + std::cerr << " parentbv final size: " << parentbv.size() << " , bits: " << parentbv.bit_size() << "\n"; + parentbv.resize(0); + + return deltaM.serialize(prefix) and parentSuccessfullyStored; +} + +// deltas should *NOT* be passed by reference +bool ColorEncoder::updateMST(uint64_t n1, uint64_t n2, std::vector deltas) { // n2 > n1 + if (n1 > n2) { + std::swap(n1, n2); + } + if (parentbv.size() < colorClsCnt) { + uint64_t newSize = parentbv.size()+parentbv.size()/2 > colorClsCnt? + parentbv.size()+parentbv.size()/2 : colorClsCnt + 1; + sdsl::int_vector<> parentTmp(newSize, 0, ceil(log2((double)newSize))); + for (uint64_t i = 0; i < parentbv.size(); i++) { + parentTmp[i] = parentbv[i]; + } + parentbv = parentTmp; + } + // The only time that we will see the edge zero -> n2 is when n2 is observed for the first time + if (n1 == zero) { + parentbv[n2] = n1; + deltaM.insertDeltas(n2, deltas); + if (colorClsCnt < n2) + colorClsCnt = n2+1; // n2 is the index + return true; + } + // find the max weight edge from each of the ends to their lca, called w1, w2 + uint64_t w1, w2, p1, p2; + uint64_t w = deltas.size(); + std::pair lr = maxWeightsTillLCA(n1, n2); + w1 = lr.first.weight; + w2 = lr.second.weight; + p1 = lr.first.parent; + p2 = lr.second.parent; + // if w >= w1 and w >= w2, happily do nothing + if (w >= w1 and w >= w2) + return false; + if (w1 > w2) { + std::swap(w1, w2); + std::swap(p1, p2); + } + // Now we know that we're gonna add the edge n1 -> n2 + // and remove edge with weight w2 along the path from n2 to the LCA + // update parentbv and deltas for n2 + // change the parent/child relationship + // starting from node n2 toward the c2, child node of the edge with weight w2 + auto parent = n1; + auto child = n2; + while (child != p2) { + uint64_t tmp = parentbv[child]; + parentbv[child] = parent; + auto prevDeltas = deltaM.getDeltas(child); + deltaM.insertDeltas(child, deltas); + deltas = prevDeltas; + parent = child; + child = tmp; + } + return true; +} + +// returns list of set bits +std::vector ColorEncoder::buildColor(uint64_t eqid) { + std::vector eq; + if (eqid == zero) { // if dummy node "zero", return an empty list (no set bits) + return eq; + } + uint64_t numWrds = numSamples/64+1; + eq.reserve(numWrds); + + std::vector flips(numSamples, 0); + std::vector xorflips(numSamples, 0); + uint64_t i{eqid}; + std::vector deltaIndices; + deltaIndices.reserve(numWrds); + bool foundCache = false; + uint32_t iparent = parentbv[i]; + while (i != zero) { + stats.tot_hits++; + if (lru_cache.contains(i)) { + const auto &vs = lru_cache[i]; + stats.cache_hits++; + for (auto v : vs) { + xorflips[v] = 1; + } + foundCache = true; + break; + } + deltaIndices.push_back(i); + i = iparent; + iparent = parentbv[i]; + } + uint64_t pctr{0}; + for (auto index : deltaIndices) { + auto deltas = deltaM.getDeltas(index); + for (auto d : deltas) { + flips[d] ^= 0x01; + } + } + + // return the indices of set bits + uint64_t one = 1; + for (auto i = 0; i < numSamples; i++) { + if (flips[i] ^ xorflips[i]) { + eq.push_back(i); + } + } + return eq; +} + +std::vector ColorEncoder::buildColor(const sdsl::bit_vector &bv) { + std::vector setBits; + setBits.reserve(numSamples); + uint64_t i = 0; + while (i < bv.bit_size()) { + uint64_t bitcnt = numSamples - i >= 64?64:(numSamples - i); + auto wrd = bv.get_int(i, bitcnt); + for (uint64_t c=0; c < bitcnt; c++) { + if ( (wrd >> c) & 0x01) { + setBits.push_back(i+c); + } + } + i+=64; + } + return setBits; +} + +std::vector ColorEncoder::hammingDist(uint64_t i, uint64_t j) { + std::vector res; + auto n1 = buildColor(i); + auto n2 = buildColor(j); + // merge + // with slight difference of not inserting values that appear in both vectors + uint64_t i1{0}, i2{0}; + while (i1 < n1.size() or i2 < n2.size()) { + if (i1 == n1.size()) { + copy(n2.begin()+i2, n2.end(), back_inserter(res)); + i2 = n2.size(); + } + else if (i2 == n2.size()) { + copy(n1.begin()+i1, n1.end(), back_inserter(res)); + i1 = n1.size(); + } + else { + if (n1[i1] < n2[i2]) { + res.push_back(n1[i1]); + i1++; + } else if (n2[i2] < n1[i1]) { + res.push_back(n2[i2]); + i2++; + } else { //n1[i1] == n2[i2] both have a set bit at the same index + i1++; + i2++; + } + } + } + return res; +} + +//TODO we definitely need to discuss this. This function in its current way is super inefficient +// time/space tradeoff +std::pair ColorEncoder::maxWeightsTillLCA(uint64_t n1, uint64_t n2) { + std::vector nodes1; + std::vector nodes2; + uint64_t n = n1; + while (n != zero) { + stats.parentbv_access_for_updateMST++; + nodes1.push_back(n); + n = parentbv[n]; + } + nodes1.push_back(zero); + n = n2; + while (n != zero) { + stats.parentbv_access_for_updateMST++; + nodes2.push_back(n); + n = parentbv[n]; + } + nodes2.push_back(zero); + auto &n1ref = nodes1; + auto &n2ref = nodes2; + if (n1ref.size() < n2ref.size()) { + std::swap(n1ref, n2ref); + } + + // find lca + uint64_t lca = 0; + for (uint64_t j = 0, i = n1ref.size()-n2ref.size(); j < n2ref.size(); i++, j++) { + if (n1ref[i] == n2ref[j]) { + lca = n1ref[i]; + break; + } + } + + // walk from n1 to lca and find the edge with maximum weight + Edge e1; + uint64_t i = 0; + while (n1ref[i] != lca) { + stats.edge_access_for_updateMST++; + auto curW = getEdge(n1ref[i], n1ref[i+1]); + if (e1.weight < curW) { + e1 = Edge(n1ref[i+1], n1ref[i], curW); + } + i++; + } + + // walk from n2 to lca and find the edge with maximum weight + Edge e2; + i = 0; + while (n2ref[i] != lca) { + stats.edge_access_for_updateMST++; + auto curW = getEdge(n2ref[i], n2ref[i+1]); + if (e2.weight < curW) { + e2 = Edge(n2ref[i+1], n2ref[i], curW); + } + i++; + } + return std::make_pair(e1, e2); +} + +std::unordered_set ColorEncoder::neighbors(duplicated_dna::canonical_kmer n) { + std::unordered_set result; + for (const auto b : duplicated_dna::bases) { + uint64_t eqid{0}, idx; + if (exists(b >> n, eqid)) { + result.insert(eqid); + } + if (exists(n << b, eqid)) { + result.insert(eqid); + } + } + return result; +} + +bool ColorEncoder::exists(duplicated_dna::canonical_kmer e, uint64_t &eqid) { + uint64_t tmp = e.val; + KeyObject key(HashUtil::hash_64(tmp, BITMASK(cqf.keybits())), 0, 0); + eqid = cqf.query(key); + return eqid != 0; + // commenting this line, eqIds start from 1 (rather than 0) +/* + if (eq_idx) { + eqid = eq_idx - 1; //note be careful about this -1 here. It'll change many things + return true; + } + return false; +*/ +} + +void ColorEncoder::addEdge(uint64_t i, uint64_t j, uint32_t w) { + stats.tot_edge_access++; + stats.add_edge++; + if (i == j) return; + if (i > j) { + std::swap(i,j); + } + edges[std::make_pair(i, j)] = w; +} + +bool ColorEncoder::hasEdge(uint64_t i, uint64_t j) { + stats.tot_edge_access_request++; + if (i > j) { + std::swap(i, j); + } + return i == j or edges.find(std::make_pair(i, j)) != edges.end(); +} + +uint32_t ColorEncoder::getEdge(uint64_t i, uint64_t j) { + stats.tot_edge_access++; + if (i > j) { + std::swap(i, j); + } + return edges[std::make_pair(i, j)]; +} diff --git a/src/coloreddbg.cc b/src/coloreddbg.cc index 1701b78..de74007 100644 --- a/src/coloreddbg.cc +++ b/src/coloreddbg.cc @@ -1,15 +1,6 @@ /* * ============================================================================ * - * Filename: main.cc - * - * Description: - * - * Version: 1.0 - * Created: 2016-11-10 03:31:54 PM - * Revision: none - * Compiler: gcc - * * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu * Organization: Stony Brook University * @@ -41,26 +32,18 @@ #include #include #include -#include "MantisFS.h" + #include "sparsepp/spp.h" #include "tsl/sparse_map.h" + +#include "MantisFS.h" #include "ProgOpts.h" #include "coloreddbg.h" +#include "squeakrconfig.h" #include "json.hpp" #include "mantis_utils.hpp" #include "mantisconfig.hpp" -// This function read one byte from each page in the iterator buffer. -//uint64_t tmp_sum; -//void handler_function(union sigval sv) { - //CQF::Iterator& it(*((CQF::Iterator*)sv.sival_ptr)); - //unsigned char *start = (unsigned char*)(it.iter.qf->metadata) + it.last_prefetch_offset; - //unsigned char *counter = (unsigned char*)(it.iter.qf->metadata) + it.last_prefetch_offset; - //for (;counter < start + it.buffer_size; counter += 4096) { - //tmp_sum += *counter; - //} -//} - /* * === FUNCTION ============================================================= * Name: main @@ -70,14 +53,6 @@ int build_main ( BuildOpts& opt ) { - /* calling asyc read init */ - struct aioinit aioinit; - memset(&aioinit, 0, sizeof(struct aioinit)); - aioinit.aio_num = 2500; - aioinit.aio_threads = 100; - aioinit.aio_idle_time = 60; - aio_init(&aioinit); - spdlog::logger* console = opt.console.get(); std::ifstream infile(opt.inlist); uint64_t num_samples{0}; @@ -91,16 +66,6 @@ build_main ( BuildOpts& opt ) console->error("Input file {} does not exist or could not be opened.", opt.inlist); std::exit(1); } - //struct timeval start1, end1; - //struct timezone tzp; - - // C++-izing - // This is C++ ... not C. Why do we have raw pointers - // and calloc them. We use vectors instead. - //SampleObject*> *inobjects; - //CQF *cqfs; - std::vector*>> inobjects; - std::vector> cqfs; /** try and create the output directory * and write a file to it. Complain to the user @@ -137,32 +102,64 @@ build_main ( BuildOpts& opt ) jfile.close(); } + std::vector*>> inobjects; + std::vector> cqfs; + // reserve QF structs for input CQFs inobjects.reserve(num_samples); cqfs.reserve(num_samples); // mmap all the input cqfs - std::string cqf_file; + std::string squeakr_file; uint32_t nqf = 0; - uint32_t cutoff; + uint32_t kmer_size{0}; console->info("Reading input Squeakr files."); - while (infile >> cqf_file >> cutoff) { - if (!mantis::fs::FileExists(cqf_file.c_str())) { - console->error("Squeakr file {} does not exist.", cqf_file); + while (infile >> squeakr_file) { + if (!mantis::fs::FileExists(squeakr_file.c_str())) { + console->error("Squeakr file {} does not exist.", squeakr_file); + exit(1); + } + squeakr::squeakrconfig config; + int ret = squeakr::read_config(squeakr_file, &config); + if (ret == squeakr::SQUEAKR_INVALID_VERSION) { + console->error("Squeakr index version is invalid. Expected: {} Available: {}", + squeakr::INDEX_VERSION, config.version); + exit(1); + } + if (ret == squeakr::SQUEAKR_INVALID_ENDIAN) { + console->error("Can't read Squeakr file. It was written on a different endian machine."); exit(1); } - cqfs.emplace_back(cqf_file, true); - std::string sample_id = first_part(first_part(last_part(cqf_file, '/'), + if (cqfs.size() == 0) + kmer_size = config.kmer_size; + else { + if (kmer_size != config.kmer_size) { + console->error("Squeakr file {} has a different k-mer size. Expected: {} Available: {}", + squeakr_file, kmer_size, config.kmer_size); + exit(1); + } + } + if (config.cutoff == 1) { + console->warn("Squeakr file {} is not filtered.", squeakr_file); + } + + cqfs.emplace_back(squeakr_file, CQF_MMAP); + std::string sample_id = first_part(first_part(last_part(squeakr_file, '/'), '.'), '_'); console->info("Reading CQF {} Seed {}",nqf, cqfs[nqf].seed()); - console->info("Sample id {} cut off {}", sample_id, cutoff); + console->info("Sample id {}", sample_id); cqfs.back().dump_metadata(); - inobjects.emplace_back(&cqfs[nqf], cutoff, sample_id, nqf); + inobjects.emplace_back(&cqfs[nqf], sample_id, nqf); + if (!cqfs.front().check_similarity(&cqfs.back())) { + console->error("Passed Squeakr files are not similar.", squeakr_file); + exit(1); + } nqf++; } ColoredDbg*>, KeyObject> cdbg(opt.qbits, inobjects[0].obj->keybits(), + cqfs[0].hash_mode(), inobjects[0].obj->seed(), prefix, nqf); cdbg.set_console(console); @@ -205,11 +202,12 @@ build_main ( BuildOpts& opt ) console->info("Constructing the colored dBG."); + console->info("\n\n\n\n\n\nDONE WITH SAMPLING PHASE\n\n\n\n\n"); // Reconstruct the colored dbg using the new set of equivalence classes. cdbg.construct(inobjects.data(), std::numeric_limits::max()); console->info("Final colored dBG has {} k-mers and {} equivalence classes", - cdbg.get_cqf()->size(), cdbg.get_num_eqclasses()); + cdbg.get_cqf()->dist_elts(), cdbg.get_num_eqclasses()); //cdbg.get_cqf()->dump_metadata(); //DEBUG_CDBG(cdbg.get_cqf()->set_size()); diff --git a/src/cqf/gqf.c b/src/gqf/gqf.c similarity index 64% rename from src/cqf/gqf.c rename to src/gqf/gqf.c index 3715ed7..43d7827 100644 --- a/src/cqf/gqf.c +++ b/src/gqf/gqf.c @@ -1,3 +1,12 @@ +/* + * ============================================================================ + * + * Authors: Prashant Pandey + * Rob Johnson + * + * ============================================================================ + */ + #include #if 0 # include @@ -14,51 +23,31 @@ #include #include -#include "cqf/gqf.h" +#include "gqf/hashutil.h" +#include "gqf/gqf.h" +#include "gqf/gqf_int.h" /****************************************************************** * Code for managing the metadata bits and slots w/o interpreting * * the content of the slots. ******************************************************************/ -/* Must be >= 6. 6 seems fastest. */ -#define BLOCK_OFFSET_BITS (6) - -#define SLOTS_PER_BLOCK (1ULL << BLOCK_OFFSET_BITS) -#define METADATA_WORDS_PER_BLOCK ((SLOTS_PER_BLOCK + 63) / 64) - +#define MAX_VALUE(nbits) ((1ULL << (nbits)) - 1) +#define BITMASK(nbits) \ + ((nbits) == 64 ? 0xffffffffffffffff : MAX_VALUE(nbits)) #define NUM_SLOTS_TO_LOCK (1ULL<<16) #define CLUSTER_SIZE (1ULL<<14) +#define METADATA_WORD(qf,field,slot_index) \ + (get_block((qf), (slot_index) / \ + QF_SLOTS_PER_BLOCK)->field[((slot_index) % QF_SLOTS_PER_BLOCK) / 64]) -#define METADATA_WORD(qf,field,slot_index) (get_block((qf), (slot_index) / \ - SLOTS_PER_BLOCK)->field[((slot_index) % SLOTS_PER_BLOCK) / 64]) +#define GET_NO_LOCK(flag) (flag & QF_NO_LOCK) +#define GET_TRY_ONCE_LOCK(flag) (flag & QF_TRY_ONCE_LOCK) +#define GET_WAIT_FOR_LOCK(flag) (flag & QF_WAIT_FOR_LOCK) +#define GET_KEY_HASH(flag) (flag & QF_KEY_IS_HASH) -#define MAX_VALUE(nbits) ((1ULL << (nbits)) - 1) +#define DISTANCE_FROM_HOME_SLOT_CUTOFF 1000 #define BILLION 1000000000L -#define PAGE_SIZE (1ULL << 12) -#define PAGE_ALIGN(virt) (virt & ~(PAGE_SIZE - 1)) - -typedef struct __attribute__ ((__packed__)) qfblock { - /* Code works with uint16_t, uint32_t, etc, but uint8_t seems just as fast as - * anything else */ - uint8_t offset; - uint64_t occupieds[METADATA_WORDS_PER_BLOCK]; - uint64_t runends[METADATA_WORDS_PER_BLOCK]; - -#if BITS_PER_SLOT == 8 - uint8_t slots[SLOTS_PER_BLOCK]; -#elif BITS_PER_SLOT == 16 - uint16_t slots[SLOTS_PER_BLOCK]; -#elif BITS_PER_SLOT == 32 - uint32_t slots[SLOTS_PER_BLOCK]; -#elif BITS_PER_SLOT == 64 - uint64_t slots[SLOTS_PER_BLOCK]; -#elif BITS_PER_SLOT != 0 - uint8_t slots[SLOTS_PER_BLOCK * BITS_PER_SLOT / 8]; -#else - uint8_t slots[]; -#endif -} qfblock; #ifdef DEBUG #define PRINT_DEBUG 1 @@ -69,8 +58,8 @@ typedef struct __attribute__ ((__packed__)) qfblock { #define DEBUG_CQF(fmt, ...) \ do { if (PRINT_DEBUG) fprintf(stderr, fmt, __VA_ARGS__); } while (0) -#define PRINT_CQF(fmt, ...) \ - do { fprintf(stdout, fmt, __VA_ARGS__); } while (0) +#define DEBUG_DUMP(qf) \ + do { if (PRINT_DEBUG) qf_dump_metadata(qf); } while (0) static __inline__ unsigned long long rdtsc(void) { @@ -80,57 +69,57 @@ static __inline__ unsigned long long rdtsc(void) } #ifdef LOG_WAIT_TIME -static inline bool qf_spin_lock(QF *cf, volatile int *lock, uint64_t idx, - enum lock flag) +static inline bool qf_spin_lock(QF *qf, volatile int *lock, uint64_t idx, + uint8_t flag) { struct timespec start, end; bool ret; clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); - if (flag != LOCK_AND_SPIN) { + if (GET_WAIT_FOR_LOCK(flag) != QF_WAIT_FOR_LOCK) { ret = !__sync_lock_test_and_set(lock, 1); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); - cf->mem->wait_times[idx].locks_acquired_single_attempt++; - cf->mem->wait_times[idx].total_time_single += BILLION * (end.tv_sec - + qf->runtimedata->wait_times[idx].locks_acquired_single_attempt++; + qf->runtimedata->wait_times[idx].total_time_single += BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; } else { if (!__sync_lock_test_and_set(lock, 1)) { clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); - cf->mem->wait_times[idx].locks_acquired_single_attempt++; - cf->mem->wait_times[idx].total_time_single += BILLION * (end.tv_sec - + qf->runtimedata->wait_times[idx].locks_acquired_single_attempt++; + qf->runtimedata->wait_times[idx].total_time_single += BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; } else { while (__sync_lock_test_and_set(lock, 1)) while (*lock); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); - cf->mem->wait_times[idx].total_time_spinning += BILLION * (end.tv_sec - + qf->runtimedata->wait_times[idx].total_time_spinning += BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec; } ret = true; } - cf->mem->wait_times[idx].locks_taken++; + qf->runtimedata->wait_times[idx].locks_taken++; return ret; /*start = rdtsc();*/ /*if (!__sync_lock_test_and_set(lock, 1)) {*/ /*clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);*/ - /*cf->mem->wait_times[idx].locks_acquired_single_attempt++;*/ - /*cf->mem->wait_times[idx].total_time_single += BILLION * (end.tv_sec - + /*qf->runtimedata->wait_times[idx].locks_acquired_single_attempt++;*/ + /*qf->runtimedata->wait_times[idx].total_time_single += BILLION * (end.tv_sec - * start.tv_sec) + end.tv_nsec - start.tv_nsec;*/ /*} else {*/ /*while (__sync_lock_test_and_set(lock, 1))*/ /*while (*lock);*/ /*clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);*/ - /*cf->mem->wait_times[idx].total_time_spinning += BILLION * (end.tv_sec - + /*qf->runtimedata->wait_times[idx].total_time_spinning += BILLION * (end.tv_sec - * start.tv_sec) + end.tv_nsec - start.tv_nsec;*/ /*}*/ /*end = rdtsc();*/ - /*cf->mem->wait_times[idx].locks_taken++;*/ + /*qf->runtimedata->wait_times[idx].locks_taken++;*/ /*return;*/ } #else @@ -138,9 +127,9 @@ static inline bool qf_spin_lock(QF *cf, volatile int *lock, uint64_t idx, * Try to acquire a lock once and return even if the lock is busy. * If spin flag is set, then spin until the lock is available. */ -static inline bool qf_spin_lock(volatile int *lock, enum lock flag) +static inline bool qf_spin_lock(volatile int *lock, uint8_t flag) { - if (flag != LOCK_AND_SPIN) { + if (GET_WAIT_FOR_LOCK(flag) != QF_WAIT_FOR_LOCK) { return !__sync_lock_test_and_set(lock, 1); } else { while (__sync_lock_test_and_set(lock, 1)) @@ -158,30 +147,32 @@ static inline void qf_spin_unlock(volatile int *lock) return; } -static bool qf_lock(QF *cf, uint64_t hash_bucket_index, enum lock flag, bool - small) +static bool qf_lock(QF *qf, uint64_t hash_bucket_index, bool small, uint8_t + runtime_lock) { uint64_t hash_bucket_lock_offset = hash_bucket_index % NUM_SLOTS_TO_LOCK; if (small) { #ifdef LOG_WAIT_TIME - if (!qf_spin_lock(cf, &cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], - hash_bucket_index/NUM_SLOTS_TO_LOCK, flag)) + if (!qf_spin_lock(qf, &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], + hash_bucket_index/NUM_SLOTS_TO_LOCK, + runtime_lock)) return false; if (NUM_SLOTS_TO_LOCK - hash_bucket_lock_offset <= CLUSTER_SIZE) { - if (!qf_spin_lock(cf, &cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], - hash_bucket_index/NUM_SLOTS_TO_LOCK+1, flag)) { - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); + if (!qf_spin_lock(qf, &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], + hash_bucket_index/NUM_SLOTS_TO_LOCK+1, + runtime_lock)) { + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); return false; } } #else - if (!qf_spin_lock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], - flag)) + if (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], + runtime_lock)) return false; if (NUM_SLOTS_TO_LOCK - hash_bucket_lock_offset <= CLUSTER_SIZE) { - if (!qf_spin_lock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], - flag)) { - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); + if (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], + runtime_lock)) { + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); return false; } } @@ -190,48 +181,48 @@ static bool qf_lock(QF *cf, uint64_t hash_bucket_index, enum lock flag, bool #ifdef LOG_WAIT_TIME if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= CLUSTER_SIZE) { - if (!qf_spin_lock(cf, - &cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1], - flag)) + if (!qf_spin_lock(qf, + &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1], + runtime_lock)) return false; } - if (!qf_spin_lock(cf, - &cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], - flag)) { + if (!qf_spin_lock(qf, + &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], + runtime_lock)) { if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= CLUSTER_SIZE) - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); return false; } - if (!qf_spin_lock(cf, &cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], - flag)) { - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); + if (!qf_spin_lock(qf, &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], + runtime_lock)) { + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= CLUSTER_SIZE) - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); return false; } #else if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= CLUSTER_SIZE) { if - (!qf_spin_lock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1], - flag)) + (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1], + runtime_lock)) return false; } - if (!qf_spin_lock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], - flag)) { + if (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], + runtime_lock)) { if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= CLUSTER_SIZE) - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); return false; } - if (!qf_spin_lock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], - flag)) { - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); + if (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], + runtime_lock)) { + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= CLUSTER_SIZE) - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); return false; } #endif @@ -239,32 +230,33 @@ static bool qf_lock(QF *cf, uint64_t hash_bucket_index, enum lock flag, bool return true; } -static void qf_unlock(QF *cf, uint64_t hash_bucket_index, bool small) +static void qf_unlock(QF *qf, uint64_t hash_bucket_index, bool small) { uint64_t hash_bucket_lock_offset = hash_bucket_index % NUM_SLOTS_TO_LOCK; if (small) { if (NUM_SLOTS_TO_LOCK - hash_bucket_lock_offset <= CLUSTER_SIZE) { - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1]); } - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); } else { - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1]); - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= CLUSTER_SIZE) - qf_spin_unlock(&cf->mem->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); + qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); } } -static void modify_metadata(QF *cf, uint64_t *metadata, int cnt) +static void modify_metadata(QF *qf, uint64_t *metadata, int cnt) { #ifdef LOG_WAIT_TIME - qf_spin_lock(cf, &cf->mem->metadata_lock,cf->num_locks, LOCK_AND_SPIN); + qf_spin_lock(qf, &qf->runtimedata->metadata_lock, + qf->runtimedata->num_locks, QF_WAIT_FOR_LOCK); #else - qf_spin_lock(&cf->mem->metadata_lock, LOCK_AND_SPIN); + qf_spin_lock(&qf->runtimedata->metadata_lock, QF_WAIT_FOR_LOCK); #endif *metadata = *metadata + cnt; - qf_spin_unlock(&cf->mem->metadata_lock); + qf_spin_unlock(&qf->runtimedata->metadata_lock); return; } @@ -451,7 +443,7 @@ static inline uint64_t bitselectv(const uint64_t val, int ignore, int rank) return bitselect(val & ~BITMASK(ignore % 64), rank); } -#if BITS_PER_SLOT > 0 +#if QF_BITS_PER_SLOT > 0 static inline qfblock * get_block(const QF *qf, uint64_t block_index) { return &qf->blocks[block_index]; @@ -460,38 +452,38 @@ static inline qfblock * get_block(const QF *qf, uint64_t block_index) static inline qfblock * get_block(const QF *qf, uint64_t block_index) { return (qfblock *)(((char *)qf->blocks) + block_index * (sizeof(qfblock) + - SLOTS_PER_BLOCK * qf->metadata->bits_per_slot / 8)); + QF_SLOTS_PER_BLOCK * qf->metadata->bits_per_slot / 8)); } #endif static inline int is_runend(const QF *qf, uint64_t index) { - return (METADATA_WORD(qf, runends, index) >> ((index % SLOTS_PER_BLOCK) % + return (METADATA_WORD(qf, runends, index) >> ((index % QF_SLOTS_PER_BLOCK) % 64)) & 1ULL; } static inline int is_occupied(const QF *qf, uint64_t index) { - return (METADATA_WORD(qf, occupieds, index) >> ((index % SLOTS_PER_BLOCK) % + return (METADATA_WORD(qf, occupieds, index) >> ((index % QF_SLOTS_PER_BLOCK) % 64)) & 1ULL; } -#if BITS_PER_SLOT == 8 || BITS_PER_SLOT == 16 || BITS_PER_SLOT == 32 || BITS_PER_SLOT == 64 +#if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 || QF_BITS_PER_SLOT == 64 static inline uint64_t get_slot(const QF *qf, uint64_t index) { assert(index < qf->metadata->xnslots); - return get_block(qf, index / SLOTS_PER_BLOCK)->slots[index % SLOTS_PER_BLOCK]; + return get_block(qf, index / QF_SLOTS_PER_BLOCK)->slots[index % QF_SLOTS_PER_BLOCK]; } static inline void set_slot(const QF *qf, uint64_t index, uint64_t value) { assert(index < qf->metadata->xnslots); - get_block(qf, index / SLOTS_PER_BLOCK)->slots[index % SLOTS_PER_BLOCK] = + get_block(qf, index / QF_SLOTS_PER_BLOCK)->slots[index % QF_SLOTS_PER_BLOCK] = value & BITMASK(qf->metadata->bits_per_slot); } -#elif BITS_PER_SLOT > 0 +#elif QF_BITS_PER_SLOT > 0 /* Little-endian code .... Big-endian is TODO */ @@ -501,11 +493,11 @@ static inline uint64_t get_slot(const QF *qf, uint64_t index) * to generate buggy code. :/ */ assert(index < qf->metadata->xnslots); uint64_t *p = (uint64_t *)&get_block(qf, index / - SLOTS_PER_BLOCK)->slots[(index % - SLOTS_PER_BLOCK) - * BITS_PER_SLOT / 8]; - return (uint64_t)(((*p) >> (((index % SLOTS_PER_BLOCK) * BITS_PER_SLOT) % - 8)) & BITMASK(BITS_PER_SLOT)); + QF_SLOTS_PER_BLOCK)->slots[(index % + QF_SLOTS_PER_BLOCK) + * QF_BITS_PER_SLOT / 8]; + return (uint64_t)(((*p) >> (((index % QF_SLOTS_PER_BLOCK) * QF_BITS_PER_SLOT) % + 8)) & BITMASK(QF_BITS_PER_SLOT)); } static inline void set_slot(const QF *qf, uint64_t index, uint64_t value) @@ -514,13 +506,13 @@ static inline void set_slot(const QF *qf, uint64_t index, uint64_t value) * to generate buggy code. :/ */ assert(index < qf->metadata->xnslots); uint64_t *p = (uint64_t *)&get_block(qf, index / - SLOTS_PER_BLOCK)->slots[(index % - SLOTS_PER_BLOCK) - * BITS_PER_SLOT / 8]; + QF_SLOTS_PER_BLOCK)->slots[(index % + QF_SLOTS_PER_BLOCK) + * QF_BITS_PER_SLOT / 8]; uint64_t t = *p; - uint64_t mask = BITMASK(BITS_PER_SLOT); + uint64_t mask = BITMASK(QF_BITS_PER_SLOT); uint64_t v = value; - int shift = ((index % SLOTS_PER_BLOCK) * BITS_PER_SLOT) % 8; + int shift = ((index % QF_SLOTS_PER_BLOCK) * QF_BITS_PER_SLOT) % 8; mask <<= shift; v <<= shift; t &= ~mask; @@ -538,10 +530,10 @@ static inline uint64_t get_slot(const QF *qf, uint64_t index) /* Should use __uint128_t to support up to 64-bit remainders, but gcc seems * to generate buggy code. :/ */ uint64_t *p = (uint64_t *)&get_block(qf, index / - SLOTS_PER_BLOCK)->slots[(index % - SLOTS_PER_BLOCK) + QF_SLOTS_PER_BLOCK)->slots[(index % + QF_SLOTS_PER_BLOCK) * qf->metadata->bits_per_slot / 8]; - return (uint64_t)(((*p) >> (((index % SLOTS_PER_BLOCK) * + return (uint64_t)(((*p) >> (((index % QF_SLOTS_PER_BLOCK) * qf->metadata->bits_per_slot) % 8)) & BITMASK(qf->metadata->bits_per_slot)); } @@ -552,13 +544,13 @@ static inline void set_slot(const QF *qf, uint64_t index, uint64_t value) /* Should use __uint128_t to support up to 64-bit remainders, but gcc seems * to generate buggy code. :/ */ uint64_t *p = (uint64_t *)&get_block(qf, index / - SLOTS_PER_BLOCK)->slots[(index % - SLOTS_PER_BLOCK) + QF_SLOTS_PER_BLOCK)->slots[(index % + QF_SLOTS_PER_BLOCK) * qf->metadata->bits_per_slot / 8]; uint64_t t = *p; uint64_t mask = BITMASK(qf->metadata->bits_per_slot); uint64_t v = value; - int shift = ((index % SLOTS_PER_BLOCK) * qf->metadata->bits_per_slot) % 8; + int shift = ((index % QF_SLOTS_PER_BLOCK) * qf->metadata->bits_per_slot) % 8; mask <<= shift; v <<= shift; t &= ~mask; @@ -579,14 +571,14 @@ static inline uint64_t block_offset(const QF *qf, uint64_t blockidx) get_block(qf, blockidx)->offset < BITMASK(8*sizeof(qf->blocks[0].offset))) return get_block(qf, blockidx)->offset; - return run_end(qf, SLOTS_PER_BLOCK * blockidx - 1) - SLOTS_PER_BLOCK * + return run_end(qf, QF_SLOTS_PER_BLOCK * blockidx - 1) - QF_SLOTS_PER_BLOCK * blockidx + 1; } static inline uint64_t run_end(const QF *qf, uint64_t hash_bucket_index) { - uint64_t bucket_block_index = hash_bucket_index / SLOTS_PER_BLOCK; - uint64_t bucket_intrablock_offset = hash_bucket_index % SLOTS_PER_BLOCK; + uint64_t bucket_block_index = hash_bucket_index / QF_SLOTS_PER_BLOCK; + uint64_t bucket_intrablock_offset = hash_bucket_index % QF_SLOTS_PER_BLOCK; uint64_t bucket_blocks_offset = block_offset(qf, bucket_block_index); uint64_t bucket_intrablock_rank = bitrank(get_block(qf, @@ -597,17 +589,17 @@ static inline uint64_t run_end(const QF *qf, uint64_t hash_bucket_index) if (bucket_blocks_offset <= bucket_intrablock_offset) return hash_bucket_index; else - return SLOTS_PER_BLOCK * bucket_block_index + bucket_blocks_offset - 1; + return QF_SLOTS_PER_BLOCK * bucket_block_index + bucket_blocks_offset - 1; } uint64_t runend_block_index = bucket_block_index + bucket_blocks_offset / - SLOTS_PER_BLOCK; - uint64_t runend_ignore_bits = bucket_blocks_offset % SLOTS_PER_BLOCK; + QF_SLOTS_PER_BLOCK; + uint64_t runend_ignore_bits = bucket_blocks_offset % QF_SLOTS_PER_BLOCK; uint64_t runend_rank = bucket_intrablock_rank - 1; uint64_t runend_block_offset = bitselectv(get_block(qf, runend_block_index)->runends[0], runend_ignore_bits, runend_rank); - if (runend_block_offset == SLOTS_PER_BLOCK) { + if (runend_block_offset == QF_SLOTS_PER_BLOCK) { if (bucket_blocks_offset == 0 && bucket_intrablock_rank == 0) { /* The block begins in empty space, and this bucket is in that region of * empty space */ @@ -622,11 +614,11 @@ static inline uint64_t run_end(const QF *qf, uint64_t hash_bucket_index) runend_block_offset = bitselectv(get_block(qf, runend_block_index)->runends[0], runend_ignore_bits, runend_rank); - } while (runend_block_offset == SLOTS_PER_BLOCK); + } while (runend_block_offset == QF_SLOTS_PER_BLOCK); } } - uint64_t runend_index = SLOTS_PER_BLOCK * runend_block_index + + uint64_t runend_index = QF_SLOTS_PER_BLOCK * runend_block_index + runend_block_offset; if (runend_index < hash_bucket_index) return hash_bucket_index; @@ -636,11 +628,11 @@ static inline uint64_t run_end(const QF *qf, uint64_t hash_bucket_index) static inline int offset_lower_bound(const QF *qf, uint64_t slot_index) { - const qfblock * b = get_block(qf, slot_index / SLOTS_PER_BLOCK); - const uint64_t slot_offset = slot_index % SLOTS_PER_BLOCK; + const qfblock * b = get_block(qf, slot_index / QF_SLOTS_PER_BLOCK); + const uint64_t slot_offset = slot_index % QF_SLOTS_PER_BLOCK; const uint64_t boffset = b->offset; const uint64_t occupieds = b->occupieds[0] & BITMASK(slot_offset+1); - assert(SLOTS_PER_BLOCK == 64); + assert(QF_SLOTS_PER_BLOCK == 64); if (boffset <= slot_offset) { const uint64_t runends = (b->runends[0] & BITMASK(slot_offset)) >> boffset; return popcnt(occupieds) - popcnt(runends); @@ -689,15 +681,15 @@ static inline uint64_t shift_into_b(const uint64_t a, const uint64_t b, return a_component | b_shifted | (b & b_mask); } -#if BITS_PER_SLOT == 8 || BITS_PER_SLOT == 16 || BITS_PER_SLOT == 32 || BITS_PER_SLOT == 64 +#if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 || QF_BITS_PER_SLOT == 64 static inline void shift_remainders(QF *qf, uint64_t start_index, uint64_t empty_index) { - uint64_t start_block = start_index / SLOTS_PER_BLOCK; - uint64_t start_offset = start_index % SLOTS_PER_BLOCK; - uint64_t empty_block = empty_index / SLOTS_PER_BLOCK; - uint64_t empty_offset = empty_index % SLOTS_PER_BLOCK; + uint64_t start_block = start_index / QF_SLOTS_PER_BLOCK; + uint64_t start_offset = start_index % QF_SLOTS_PER_BLOCK; + uint64_t empty_block = empty_index / QF_SLOTS_PER_BLOCK; + uint64_t empty_offset = empty_index % QF_SLOTS_PER_BLOCK; assert (start_index <= empty_index && empty_index < qf->metadata->xnslots); @@ -706,9 +698,9 @@ static inline void shift_remainders(QF *qf, uint64_t start_index, uint64_t &get_block(qf, empty_block)->slots[0], empty_offset * sizeof(qf->blocks[0].slots[0])); get_block(qf, empty_block)->slots[0] = get_block(qf, - empty_block-1)->slots[SLOTS_PER_BLOCK-1]; + empty_block-1)->slots[QF_SLOTS_PER_BLOCK-1]; empty_block--; - empty_offset = SLOTS_PER_BLOCK-1; + empty_offset = QF_SLOTS_PER_BLOCK-1; } memmove(&get_block(qf, empty_block)->slots[start_offset+1], @@ -750,26 +742,26 @@ static inline void qf_dump_block(const QF *qf, uint64_t i) printf("%-192d", get_block(qf, i)->offset); printf("\n"); - for (j = 0; j < SLOTS_PER_BLOCK; j++) + for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) printf("%02lx ", j); printf("\n"); - for (j = 0; j < SLOTS_PER_BLOCK; j++) + for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) printf(" %d ", (get_block(qf, i)->occupieds[j/64] & (1ULL << (j%64))) ? 1 : 0); printf("\n"); - for (j = 0; j < SLOTS_PER_BLOCK; j++) + for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) printf(" %d ", (get_block(qf, i)->runends[j/64] & (1ULL << (j%64))) ? 1 : 0); printf("\n"); -#if BITS_PER_SLOT == 8 || BITS_PER_SLOT == 16 || BITS_PER_SLOT == 32 - for (j = 0; j < SLOTS_PER_BLOCK; j++) +#if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 + for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) printf("%02x ", get_block(qf, i)->slots[j]); -#elif BITS_PER_SLOT == 64 - for (j = 0; j < SLOTS_PER_BLOCK; j++) +#elif QF_BITS_PER_SLOT == 64 + for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) printf("%02lx ", get_block(qf, i)->slots[j]); #else - for (j = 0; j < SLOTS_PER_BLOCK * qf->metadata->bits_per_slot / 8; j++) + for (j = 0; j < QF_SLOTS_PER_BLOCK * qf->metadata->bits_per_slot / 8; j++) printf("%02x ", get_block(qf, i)->slots[j]); #endif @@ -779,7 +771,6 @@ static inline void qf_dump_block(const QF *qf, uint64_t i) } void qf_dump_metadata(const QF *qf) { - printf("Seed: %u\n", qf->metadata->seed); printf("Slots: %lu Occupied: %lu Elements: %lu Distinct: %lu\n", qf->metadata->nslots, qf->metadata->noccupied_slots, @@ -856,7 +847,7 @@ static inline void shift_runends(QF *qf, int64_t first, uint64_t last, } -static inline void insert_replace_slots_and_shift_remainders_and_runends_and_offsets(QF *qf, +static inline bool insert_replace_slots_and_shift_remainders_and_runends_and_offsets(QF *qf, int operation, uint64_t bucket_index, uint64_t overwrite_index, @@ -866,27 +857,29 @@ static inline void insert_replace_slots_and_shift_remainders_and_runends_and_off { uint64_t empties[67]; uint64_t i; + int64_t j; int64_t ninserts = total_remainders - noverwrites; uint64_t insert_index = overwrite_index + noverwrites; if (ninserts > 0) { /* First, shift things to create n empty spaces where we need them. */ find_next_n_empty_slots(qf, insert_index, ninserts, empties); - - for (i = 0; i < ninserts - 1; i++) - shift_slots(qf, empties[i+1] + 1, empties[i] - 1, i + 1); + if (empties[0] >= qf->metadata->xnslots) { + return false; + } + for (j = 0; j < ninserts - 1; j++) + shift_slots(qf, empties[j+1] + 1, empties[j] - 1, j + 1); shift_slots(qf, insert_index, empties[ninserts - 1] - 1, ninserts); - for (i = 0; i < ninserts - 1; i++) - shift_runends(qf, empties[i+1] + 1, empties[i] - 1, i + 1); + for (j = 0; j < ninserts - 1; j++) + shift_runends(qf, empties[j+1] + 1, empties[j] - 1, j + 1); shift_runends(qf, insert_index, empties[ninserts - 1] - 1, ninserts); - for (i = noverwrites; i < total_remainders - 1; i++) METADATA_WORD(qf, runends, overwrite_index + i) &= ~(1ULL << (((overwrite_index + i) % - SLOTS_PER_BLOCK) + QF_SLOTS_PER_BLOCK) % 64)); switch (operation) { @@ -894,20 +887,20 @@ static inline void insert_replace_slots_and_shift_remainders_and_runends_and_off assert (noverwrites == 0); METADATA_WORD(qf, runends, overwrite_index + total_remainders - 1) |= 1ULL << (((overwrite_index + total_remainders - 1) % - SLOTS_PER_BLOCK) % 64); + QF_SLOTS_PER_BLOCK) % 64); break; case 1: /* append to bucket */ METADATA_WORD(qf, runends, overwrite_index + noverwrites - 1) &= - ~(1ULL << (((overwrite_index + noverwrites - 1) % SLOTS_PER_BLOCK) % + ~(1ULL << (((overwrite_index + noverwrites - 1) % QF_SLOTS_PER_BLOCK) % 64)); METADATA_WORD(qf, runends, overwrite_index + total_remainders - 1) |= 1ULL << (((overwrite_index + total_remainders - 1) % - SLOTS_PER_BLOCK) % 64); + QF_SLOTS_PER_BLOCK) % 64); break; case 2: /* insert into bucket */ METADATA_WORD(qf, runends, overwrite_index + total_remainders - 1) &= ~(1ULL << (((overwrite_index + total_remainders - 1) % - SLOTS_PER_BLOCK) % 64)); + QF_SLOTS_PER_BLOCK) % 64)); break; default: fprintf(stderr, "Invalid operation %d\n", operation); @@ -915,21 +908,21 @@ static inline void insert_replace_slots_and_shift_remainders_and_runends_and_off } uint64_t npreceding_empties = 0; - for (i = bucket_index / SLOTS_PER_BLOCK + 1; i <= empties[0]/SLOTS_PER_BLOCK; i++) { - while (npreceding_empties < ninserts && - empties[ninserts - 1 - npreceding_empties] / SLOTS_PER_BLOCK < i) + for (i = bucket_index / QF_SLOTS_PER_BLOCK + 1; i <= empties[0]/QF_SLOTS_PER_BLOCK; i++) { + while ((int64_t)npreceding_empties < ninserts && + empties[ninserts - 1 - npreceding_empties] / QF_SLOTS_PER_BLOCK < i) npreceding_empties++; - + if (get_block(qf, i)->offset + ninserts - npreceding_empties < BITMASK(8*sizeof(qf->blocks[0].offset))) get_block(qf, i)->offset += ninserts - npreceding_empties; else get_block(qf, i)->offset = (uint8_t) BITMASK(8*sizeof(qf->blocks[0].offset)); } } - + for (i = 0; i < total_remainders; i++) set_slot(qf, overwrite_index + i, remainders[i]); - + modify_metadata(qf, &qf->metadata->noccupied_slots, ninserts); static uint64_t counter = 0; @@ -942,9 +935,11 @@ static inline void insert_replace_slots_and_shift_remainders_and_runends_and_off bucket_index, overwrite_index, overwrite_index - bucket_index, bucket_index / (float)qf->metadata->nslots); } + + return true; } -static inline void remove_replace_slots_and_shift_remainders_and_runends_and_offsets(QF *qf, +static inline int remove_replace_slots_and_shift_remainders_and_runends_and_offsets(QF *qf, int operation, uint64_t bucket_index, uint64_t overwrite_index, @@ -976,6 +971,7 @@ static inline void remove_replace_slots_and_shift_remainders_and_runends_and_off uint64_t current_bucket = bucket_index; uint64_t current_slot = overwrite_index + total_remainders; uint64_t current_distance = old_length - total_remainders; + int ret_current_distance = current_distance; while (current_distance > 0) { if (is_runend(qf, current_slot + current_distance - 1)) { @@ -1016,7 +1012,7 @@ static inline void remove_replace_slots_and_shift_remainders_and_runends_and_off // Then find the runend slot corresponding to the last run in the // original_bucket block. // Update the offset of the block to which it belongs. - uint64_t original_block = original_bucket / SLOTS_PER_BLOCK; + uint64_t original_block = original_bucket / QF_SLOTS_PER_BLOCK; while (1 && old_length > total_remainders) { // we only update offsets if we shift/delete anything int32_t last_occupieds_bit = bitscanreverse(get_block(qf, original_block)->occupieds[0]); // there is nothing in the block @@ -1025,25 +1021,25 @@ static inline void remove_replace_slots_and_shift_remainders_and_runends_and_off break; get_block(qf, original_block + 1)->offset = 0; } else { - uint64_t last_occupieds_hash_index = SLOTS_PER_BLOCK * original_block + last_occupieds_bit; + uint64_t last_occupieds_hash_index = QF_SLOTS_PER_BLOCK * original_block + last_occupieds_bit; uint64_t runend_index = run_end(qf, last_occupieds_hash_index); // runend spans across the block // update the offset of the next block - if (runend_index / SLOTS_PER_BLOCK == original_block) { // if the run ends in the same block + if (runend_index / QF_SLOTS_PER_BLOCK == original_block) { // if the run ends in the same block if (get_block(qf, original_block + 1)->offset == 0) break; get_block(qf, original_block + 1)->offset = 0; - } else if (runend_index / SLOTS_PER_BLOCK == original_block + 1) { // if the last run spans across one block - if (get_block(qf, original_block + 1)->offset == (runend_index % SLOTS_PER_BLOCK) + 1) + } else if (runend_index / QF_SLOTS_PER_BLOCK == original_block + 1) { // if the last run spans across one block + if (get_block(qf, original_block + 1)->offset == (runend_index % QF_SLOTS_PER_BLOCK) + 1) break; - get_block(qf, original_block + 1)->offset = (runend_index % SLOTS_PER_BLOCK) + 1; + get_block(qf, original_block + 1)->offset = (runend_index % QF_SLOTS_PER_BLOCK) + 1; } else { // if the last run spans across multiple blocks uint64_t i; - for (i = original_block + 1; i < runend_index / SLOTS_PER_BLOCK - 1; i++) - get_block(qf, i)->offset = SLOTS_PER_BLOCK; - if (get_block(qf, runend_index / SLOTS_PER_BLOCK)->offset == (runend_index % SLOTS_PER_BLOCK) + 1) + for (i = original_block + 1; i < runend_index / QF_SLOTS_PER_BLOCK - 1; i++) + get_block(qf, i)->offset = QF_SLOTS_PER_BLOCK; + if (get_block(qf, runend_index / QF_SLOTS_PER_BLOCK)->offset == (runend_index % QF_SLOTS_PER_BLOCK) + 1) break; - get_block(qf, runend_index / SLOTS_PER_BLOCK)->offset = (runend_index % SLOTS_PER_BLOCK) + 1; + get_block(qf, runend_index / QF_SLOTS_PER_BLOCK)->offset = (runend_index % QF_SLOTS_PER_BLOCK) + 1; } } original_block++; @@ -1056,6 +1052,8 @@ static inline void remove_replace_slots_and_shift_remainders_and_runends_and_off modify_metadata(qf, &qf->metadata->ndistinct_elts, -1); /*qf->metadata->ndistinct_elts--;*/ } + + return ret_current_distance; } /***************************************************************************** @@ -1217,15 +1215,16 @@ static inline uint64_t next_slot(QF *qf, uint64_t current) return current; } -static inline bool insert1(QF *qf, __uint128_t hash, enum lock flag) +static inline int insert1(QF *qf, __uint128_t hash, uint8_t runtime_lock) { + int ret_distance = 0; uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); uint64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; - uint64_t hash_bucket_block_offset = hash_bucket_index % SLOTS_PER_BLOCK; + uint64_t hash_bucket_block_offset = hash_bucket_index % QF_SLOTS_PER_BLOCK; - if (flag != NO_LOCK) { - if (!qf_lock(qf, hash_bucket_index, flag, /*small*/ true)) - return false; + if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { + if (!qf_lock(qf, hash_bucket_index, /*small*/ true, runtime_lock)) + return QF_COULDNT_LOCK; } if (is_empty(qf, hash_bucket_index) /* might_be_empty(qf, hash_bucket_index) && runend_index == hash_bucket_index */) { METADATA_WORD(qf, runends, hash_bucket_index) |= 1ULL << @@ -1233,7 +1232,8 @@ static inline bool insert1(QF *qf, __uint128_t hash, enum lock flag) set_slot(qf, hash_bucket_index, hash_remainder); METADATA_WORD(qf, occupieds, hash_bucket_index) |= 1ULL << (hash_bucket_block_offset % 64); - + + ret_distance = 0; modify_metadata(qf, &qf->metadata->ndistinct_elts, 1); modify_metadata(qf, &qf->metadata->noccupied_slots, 1); modify_metadata(qf, &qf->metadata->nelts, 1); @@ -1403,44 +1403,36 @@ static inline bool insert1(QF *qf, __uint128_t hash, enum lock flag) if (operation >= 0) { uint64_t empty_slot_index = find_first_empty_slot(qf, runend_index+1); - + if (empty_slot_index >= qf->metadata->xnslots) { + return QF_NO_SPACE; + } shift_remainders(qf, insert_index, empty_slot_index); set_slot(qf, insert_index, new_value); - - static uint64_t counter = 0; - static uint64_t last_cnt = 0; - counter++; - if (counter % 10000000 == 0 && - counter != last_cnt) { - last_cnt = counter; - fprintf(stdout, "Home slot: %ld Insertion slot: %ld Difference: %ld Fraction done: %lf\n", - hash_bucket_index, insert_index, insert_index - hash_bucket_index, - hash_bucket_index / (float)qf->metadata->nslots); - } + ret_distance = insert_index - hash_bucket_index; shift_runends(qf, insert_index, empty_slot_index-1, 1); switch (operation) { case 0: METADATA_WORD(qf, runends, insert_index) |= 1ULL << ((insert_index % - SLOTS_PER_BLOCK) + QF_SLOTS_PER_BLOCK) % 64); break; case 1: METADATA_WORD(qf, runends, insert_index-1) &= ~(1ULL << (((insert_index-1) % - SLOTS_PER_BLOCK) % + QF_SLOTS_PER_BLOCK) % 64)); METADATA_WORD(qf, runends, insert_index) |= 1ULL << ((insert_index % - SLOTS_PER_BLOCK) + QF_SLOTS_PER_BLOCK) % 64); break; case 2: METADATA_WORD(qf, runends, insert_index) &= ~(1ULL << ((insert_index % - SLOTS_PER_BLOCK) % + QF_SLOTS_PER_BLOCK) % 64)); break; default: @@ -1452,8 +1444,8 @@ static inline bool insert1(QF *qf, __uint128_t hash, enum lock flag) * and block of the empty slot * */ uint64_t i; - for (i = hash_bucket_index / SLOTS_PER_BLOCK + 1; i <= - empty_slot_index/SLOTS_PER_BLOCK; i++) { + for (i = hash_bucket_index / QF_SLOTS_PER_BLOCK + 1; i <= + empty_slot_index/QF_SLOTS_PER_BLOCK; i++) { if (get_block(qf, i)->offset < BITMASK(8*sizeof(qf->blocks[0].offset))) get_block(qf, i)->offset++; assert(get_block(qf, i)->offset != 0); @@ -1465,24 +1457,25 @@ static inline bool insert1(QF *qf, __uint128_t hash, enum lock flag) (hash_bucket_block_offset % 64); } - if (flag != NO_LOCK) { + if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { qf_unlock(qf, hash_bucket_index, /*small*/ true); } - return true; + return ret_distance; } -static inline bool insert(QF *qf, __uint128_t hash, uint64_t count, enum lock - flag) +static inline int insert(QF *qf, __uint128_t hash, uint64_t count, uint8_t + runtime_lock) { + int ret_distance = 0; uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); uint64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; - uint64_t hash_bucket_block_offset = hash_bucket_index % SLOTS_PER_BLOCK; + uint64_t hash_bucket_block_offset = hash_bucket_index % QF_SLOTS_PER_BLOCK; /*uint64_t hash_bucket_lock_offset = hash_bucket_index % NUM_SLOTS_TO_LOCK;*/ - if (flag != NO_LOCK) { - if (!qf_lock(qf, hash_bucket_index, flag, /*small*/ false)) - return false; + if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { + if (!qf_lock(qf, hash_bucket_index, /*small*/ false, runtime_lock)) + return QF_COULDNT_LOCK; } uint64_t runend_index = run_end(qf, hash_bucket_index); @@ -1501,7 +1494,7 @@ static inline bool insert(QF *qf, __uint128_t hash, uint64_t count, enum lock modify_metadata(qf, &qf->metadata->nelts, 1); /* This trick will, I hope, keep the fast case fast. */ if (count > 1) { - insert(qf, hash, count - 1, NO_LOCK); + insert(qf, hash, count - 1, QF_NO_LOCK); } } else { /* Non-empty slot */ uint64_t new_values[67]; @@ -1509,16 +1502,20 @@ static inline bool insert(QF *qf, __uint128_t hash, uint64_t count, enum lock hash_bucket_index - 1) + 1; + bool ret; if (!is_occupied(qf, hash_bucket_index)) { /* Empty bucket, but its slot is occupied. */ uint64_t *p = encode_counter(qf, hash_remainder, count, &new_values[67]); - insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, - 0, - hash_bucket_index, - runstart_index, - p, - &new_values[67] - p, - 0); + ret = insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, + 0, + hash_bucket_index, + runstart_index, + p, + &new_values[67] - p, + 0); + if (!ret) + return QF_NO_SPACE; modify_metadata(qf, &qf->metadata->ndistinct_elts, 1); + ret_distance = runstart_index - hash_bucket_index; } else { /* Non-empty bucket */ uint64_t current_remainder, current_count, current_end; @@ -1536,36 +1533,45 @@ static inline bool insert(QF *qf, __uint128_t hash, uint64_t count, enum lock then append a counter for this remainder to the run. */ if (current_remainder < hash_remainder) { uint64_t *p = encode_counter(qf, hash_remainder, count, &new_values[67]); - insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, - 1, /* Append to bucket */ - hash_bucket_index, - current_end + 1, - p, - &new_values[67] - p, - 0); + ret = insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, + 1, /* Append to bucket */ + hash_bucket_index, + current_end + 1, + p, + &new_values[67] - p, + 0); + if (!ret) + return QF_NO_SPACE; modify_metadata(qf, &qf->metadata->ndistinct_elts, 1); + ret_distance = (current_end + 1) - hash_bucket_index; /* Found a counter for this remainder. Add in the new count. */ } else if (current_remainder == hash_remainder) { uint64_t *p = encode_counter(qf, hash_remainder, current_count + count, &new_values[67]); - insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, + ret = insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, is_runend(qf, current_end) ? 1 : 2, hash_bucket_index, runstart_index, p, &new_values[67] - p, current_end - runstart_index + 1); + if (!ret) + return QF_NO_SPACE; + ret_distance = runstart_index - hash_bucket_index; /* No counter for this remainder, but there are larger remainders, so we're not appending to the bucket. */ } else { uint64_t *p = encode_counter(qf, hash_remainder, count, &new_values[67]); - insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, - 2, /* Insert to bucket */ - hash_bucket_index, - runstart_index, - p, - &new_values[67] - p, - 0); + ret = insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, + 2, /* Insert to bucket */ + hash_bucket_index, + runstart_index, + p, + &new_values[67] - p, + 0); + if (!ret) + return QF_NO_SPACE; modify_metadata(qf, &qf->metadata->ndistinct_elts, 1); + ret_distance = runstart_index - hash_bucket_index; } } METADATA_WORD(qf, occupieds, hash_bucket_index) |= 1ULL << (hash_bucket_block_offset % 64); @@ -1573,29 +1579,30 @@ static inline bool insert(QF *qf, __uint128_t hash, uint64_t count, enum lock modify_metadata(qf, &qf->metadata->nelts, count); } - if (flag != NO_LOCK) { + if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { qf_unlock(qf, hash_bucket_index, /*small*/ false); } - return true; + return ret_distance; } -inline static bool _remove(QF *qf, __uint128_t hash, uint64_t count, enum lock - flag) +inline static int _remove(QF *qf, __uint128_t hash, uint64_t count, uint8_t + runtime_lock) { + int ret_numfreedslots = 0; uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); uint64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; uint64_t current_remainder, current_count, current_end; uint64_t new_values[67]; - if (flag != NO_LOCK) { - if (!qf_lock(qf, hash_bucket_index, flag, /*small*/ false)) - return false; + if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { + if (!qf_lock(qf, hash_bucket_index, /*small*/ false, runtime_lock)) + return -2; } /* Empty bucket */ if (!is_occupied(qf, hash_bucket_index)) - return false; + return -1; uint64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, hash_bucket_index - 1) + 1; uint64_t original_runstart_index = runstart_index; @@ -1609,7 +1616,7 @@ inline static bool _remove(QF *qf, __uint128_t hash, uint64_t count, enum lock } /* remainder not found in the given run */ if (current_remainder != hash_remainder) - return false; + return -1; if (original_runstart_index == runstart_index && is_runend(qf, current_end)) only_item_in_the_run = 1; @@ -1618,7 +1625,7 @@ inline static bool _remove(QF *qf, __uint128_t hash, uint64_t count, enum lock uint64_t *p = encode_counter(qf, hash_remainder, count > current_count ? 0 : current_count - count, &new_values[67]); - remove_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, + ret_numfreedslots = remove_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, only_item_in_the_run, hash_bucket_index, runstart_index, @@ -1630,28 +1637,30 @@ inline static bool _remove(QF *qf, __uint128_t hash, uint64_t count, enum lock modify_metadata(qf, &qf->metadata->nelts, -count); /*qf->metadata->nelts -= count;*/ - if (flag != NO_LOCK) { + if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { qf_unlock(qf, hash_bucket_index, /*small*/ false); } - return true; + return ret_numfreedslots; } /*********************************************************************** * Code that uses the above to implement key-value-counter operations. * ***********************************************************************/ -void qf_init(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t value_bits, - bool mem, const char * path, uint32_t seed) +uint64_t qf_init(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t value_bits, + enum qf_hashmode hash, uint32_t seed, void* buffer, uint64_t + buffer_len) { uint64_t num_slots, xnslots, nblocks; uint64_t key_remainder_bits, bits_per_slot; uint64_t size; + uint64_t total_num_bytes; assert(popcnt(nslots) == 1); /* nslots must be a power of 2 */ num_slots = nslots; xnslots = nslots + 10*sqrt((double)nslots); - nblocks = (xnslots + SLOTS_PER_BLOCK - 1) / SLOTS_PER_BLOCK; + nblocks = (xnslots + QF_SLOTS_PER_BLOCK - 1) / QF_SLOTS_PER_BLOCK; key_remainder_bits = key_bits; while (nslots > 1) { assert(key_remainder_bits > 0); @@ -1660,52 +1669,26 @@ void qf_init(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t value_bits, } bits_per_slot = key_remainder_bits + value_bits; - assert (BITS_PER_SLOT == 0 || BITS_PER_SLOT == qf->metadata->bits_per_slot); + assert (QF_BITS_PER_SLOT == 0 || QF_BITS_PER_SLOT == qf->metadata->bits_per_slot); assert(bits_per_slot > 1); -#if BITS_PER_SLOT == 8 || BITS_PER_SLOT == 16 || BITS_PER_SLOT == 32 || BITS_PER_SLOT == 64 +#if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 || QF_BITS_PER_SLOT == 64 size = nblocks * sizeof(qfblock); #else - size = nblocks * (sizeof(qfblock) + SLOTS_PER_BLOCK * bits_per_slot / 8); + size = nblocks * (sizeof(qfblock) + QF_SLOTS_PER_BLOCK * bits_per_slot / 8); #endif - qf->mem = (qfmem *)calloc(sizeof(qfmem), 1); - - if (mem) { - qf->metadata = (qfmetadata *)calloc(sizeof(qfmetadata), 1); - if (qf->metadata == NULL) { - perror("Can't allocate qf metadata"); - exit(EXIT_FAILURE); - } - qf->blocks = (qfblock *)calloc(size, 1); - if (qf->blocks == NULL) { - perror("Can't allocate qf blocks"); - exit(EXIT_FAILURE); - } - } else { - int ret; - uint64_t mmap_size = size + sizeof(qfmetadata); + total_num_bytes = sizeof(qfmetadata) + size; + if (buffer == NULL || total_num_bytes > buffer_len) + return total_num_bytes; - qf->mem->fd = open(path, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU); - if (qf->mem->fd < 0) { - perror("Couldn't open file:\n"); - exit(EXIT_FAILURE); - } - ret = fallocate(qf->mem->fd, 0, 0, mmap_size); - if (ret < 0) { - perror("Couldn't fallocate file:\n"); - exit(EXIT_FAILURE); - } - qf->metadata = (qfmetadata *)mmap(NULL, mmap_size, PROT_READ | - PROT_WRITE, MAP_SHARED, qf->mem->fd, 0); - ret = madvise(qf->metadata, mmap_size, MADV_RANDOM); - if (ret < 0) { - perror("Couldn't fallocate file:\n"); - exit(EXIT_FAILURE); - } - qf->blocks = (qfblock *)(qf->metadata + 1); - } + memset(buffer, 0, total_num_bytes); + qf->metadata = (qfmetadata *)(buffer); + qf->blocks = (qfblock *)(qf->metadata + 1); - qf->metadata->size = size; + qf->metadata->magic_endian_number = MAGIC_NUMBER; + qf->metadata->auto_resize = 0; + qf->metadata->hash_mode = hash; + qf->metadata->total_size_in_bytes = size; qf->metadata->seed = seed; qf->metadata->nslots = num_slots; qf->metadata->xnslots = xnslots; @@ -1716,120 +1699,126 @@ void qf_init(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t value_bits, qf->metadata->range = qf->metadata->nslots; qf->metadata->range <<= qf->metadata->key_remainder_bits; - qf->metadata->nblocks = (qf->metadata->xnslots + SLOTS_PER_BLOCK - 1) / - SLOTS_PER_BLOCK; + qf->metadata->nblocks = (qf->metadata->xnslots + QF_SLOTS_PER_BLOCK - 1) / + QF_SLOTS_PER_BLOCK; qf->metadata->nelts = 0; qf->metadata->ndistinct_elts = 0; qf->metadata->noccupied_slots = 0; - qf->metadata->num_locks = (qf->metadata->xnslots/NUM_SLOTS_TO_LOCK)+2; + + qf->runtimedata->num_locks = (qf->metadata->xnslots/NUM_SLOTS_TO_LOCK)+2; + qf->runtimedata->f_info.filepath = NULL; /* initialize all the locks to 0 */ - qf->mem->metadata_lock = 0; - qf->mem->locks = (volatile int *)calloc(qf->metadata->num_locks, + qf->runtimedata->metadata_lock = 0; + qf->runtimedata->locks = (volatile int *)calloc(qf->runtimedata->num_locks, sizeof(volatile int)); + if (qf->runtimedata->locks == NULL) { + perror("Couldn't allocate memory for runtime locks."); + exit(EXIT_FAILURE); + } #ifdef LOG_WAIT_TIME - qf->mem->wait_times = (wait_time_data* )calloc(qf->metadata->num_locks+1, - sizeof(wait_time_data)); + qf->runtimedata->wait_times = (wait_time_data* + )calloc(qf->runtimedata->num_locks+1, + sizeof(wait_time_data)); + if (qf->runtimedata->wait_times == NULL) { + perror("Couldn't allocate memory for runtime wait_times."); + exit(EXIT_FAILURE); + } #endif -} -/* The caller should call qf_init on the dest QF before calling this function. - */ -void qf_copy(QF *dest, QF *src) -{ - DEBUG_CQF("%s\n","Source CQF"); - DEBUG_DUMP(src); - memcpy(dest->mem, src->mem, sizeof(qfmem)); - memcpy(dest->metadata, src->metadata, sizeof(qfmetadata)); - memcpy(dest->blocks, src->blocks, src->metadata->size); - DEBUG_CQF("%s\n","Destination CQF after copy."); - DEBUG_DUMP(dest); + return total_num_bytes; } -/* free up the memory if the QF is in memory. - * else unmap the mapped memory from pagecache. - * - * It does not delete the file on disk for on-disk QF. - */ -void qf_destroy(QF *qf, bool mem) +uint64_t qf_use(QF* qf, void* buffer, uint64_t buffer_len) { - assert(qf->blocks != NULL); - if (mem) { - free(qf->mem); - free(qf->metadata); - free(qf->blocks); - } else { - munmap(qf->metadata, qf->metadata->size + sizeof(qfmetadata)); - close(qf->mem->fd); + qf->metadata = (qfmetadata *)(buffer); + if (qf->metadata->total_size_in_bytes + sizeof(qfmetadata) > buffer_len) { + return qf->metadata->total_size_in_bytes + sizeof(qfmetadata); } -} + qf->blocks = (qfblock *)(qf->metadata + 1); -void qf_close(QF *qf) -{ - assert(qf->blocks != NULL); - munmap(qf->metadata, qf->metadata->size + sizeof(qfmetadata)); - close(qf->mem->fd); + qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); + if (qf->runtimedata == NULL) { + perror("Couldn't allocate memory for runtime data."); + exit(EXIT_FAILURE); + } + /* initialize all the locks to 0 */ + qf->runtimedata->metadata_lock = 0; + qf->runtimedata->locks = (volatile int *)calloc(qf->runtimedata->num_locks, + sizeof(volatile int)); + if (qf->runtimedata->locks == NULL) { + perror("Couldn't allocate memory for runtime locks."); + exit(EXIT_FAILURE); + } +#ifdef LOG_WAIT_TIME + qf->runtimedata->wait_times = (wait_time_data* + )calloc(qf->runtimedata->num_locks+1, + sizeof(wait_time_data)); + if (qf->runtimedata->wait_times == NULL) { + perror("Couldn't allocate memory for runtime wait_times."); + exit(EXIT_FAILURE); + } +#endif + + return sizeof(qfmetadata) + qf->metadata->total_size_in_bytes; } -/* - * Will read the on-disk QF using mmap. - * Data won't be copied in memory. - * - */ -void qf_read(QF *qf, const char *path) +void *qf_destroy(QF *qf) { - struct stat sb; - int ret; + assert(qf->runtimedata != NULL); + free(qf->runtimedata); - qf->mem = (qfmem *)calloc(sizeof(qfmem), 1); - qf->mem->fd = open(path, O_RDWR, S_IRWXU); - if (qf->mem->fd < 0) { - fprintf(stderr, "Couldn't open file: %s\n", path); - exit(EXIT_FAILURE); - } + return (void*)qf->metadata; +} - ret = fstat (qf->mem->fd, &sb); - if ( ret < 0) { - perror ("fstat"); - exit(EXIT_FAILURE); - } +bool qf_malloc(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t + value_bits, enum qf_hashmode hash, uint32_t seed) +{ + uint64_t total_num_bytes = qf_init(qf, nslots, key_bits, value_bits, + hash, seed, NULL, 0); - if (!S_ISREG (sb.st_mode)) { - fprintf (stderr, "%s is not a file.\n", path); + void *buffer = malloc(total_num_bytes); + if (buffer == NULL) { + perror("Couldn't allocate memory for the CQF."); exit(EXIT_FAILURE); } - qf->metadata = (qfmetadata *)mmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, - qf->mem->fd, 0); - - //ret = madvise(qf->metadata, sb.st_size, MADV_SEQUENTIAL); - if (ret < 0) { - perror("Couldn't madvice of memory:\n"); + qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); + if (qf->runtimedata == NULL) { + perror("Couldn't allocate memory for runtime data."); exit(EXIT_FAILURE); } - /*DEBUG_CQF("Mmaped %ld bytes at addr %lx\n", sb.st_size, (uint64_t)qf->metadata);*/ + uint64_t init_size = qf_init(qf, nslots, key_bits, value_bits, hash, seed, + buffer, total_num_bytes); - qf->blocks = (qfblock *)(qf->metadata + 1); + if (init_size == total_num_bytes) + return true; + else + return false; } -void qf_drop_pages(const QF *qf, uint64_t start_idx, uint64_t end_idx) { - int ret; - qfblock *start_addr = get_block(qf, start_idx / SLOTS_PER_BLOCK); - uint64_t start_page = PAGE_ALIGN((uint64_t)start_addr); - qfblock *end_addr = get_block(qf, end_idx / SLOTS_PER_BLOCK); - uint64_t end_page = PAGE_ALIGN((uint64_t)end_addr); - uint64_t len = (end_page - start_page); - /*DEBUG_CQF("Droping %ld bytes starting from from %lx\n", len, start_page);*/ - ret = madvise((void*)start_page, len, MADV_DONTNEED); - if (ret < 0) { - perror("Couldn't madvice of memory:\n"); - exit(EXIT_FAILURE); +bool qf_free(QF *qf) +{ + assert(qf->metadata != NULL); + void *buffer = qf_destroy(qf); + if (buffer != NULL) { + free(buffer); + return true; } + + return false; } -const unsigned char *qf_get_addr(const QF *qf, uint64_t idx) { - return (unsigned char*)PAGE_ALIGN((uint64_t)get_block(qf, idx / SLOTS_PER_BLOCK)); +void qf_copy(QF *dest, const QF *src) +{ + DEBUG_CQF("%s\n","Source CQF"); + DEBUG_DUMP(src); + memcpy(dest->runtimedata, src->runtimedata, sizeof(qfruntime)); + memcpy(dest->metadata, src->metadata, sizeof(qfmetadata)); + memcpy(dest->blocks, src->blocks, src->metadata->total_size_in_bytes); + DEBUG_CQF("%s\n","Destination CQF after copy."); + DEBUG_DUMP(dest); } void qf_reset(QF *qf) @@ -1839,84 +1828,221 @@ void qf_reset(QF *qf) qf->metadata->noccupied_slots = 0; #ifdef LOG_WAIT_TIME - memset(qf->wait_times, 0, (qf->metadata->num_locks+1)*sizeof(wait_time_data)); + memset(qf->wait_times, 0, + (qf->runtimedata->num_locks+1)*sizeof(wait_time_data)); #endif -#if BITS_PER_SLOT == 8 || BITS_PER_SLOT == 16 || BITS_PER_SLOT == 32 || BITS_PER_SLOT == 64 +#if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 || QF_BITS_PER_SLOT == 64 memset(qf->blocks, 0, qf->metadata->nblocks* sizeof(qfblock)); #else - memset(qf->blocks, 0, qf->metadata->nblocks*(sizeof(qfblock) + SLOTS_PER_BLOCK * + memset(qf->blocks, 0, qf->metadata->nblocks*(sizeof(qfblock) + QF_SLOTS_PER_BLOCK * qf->metadata->bits_per_slot / 8)); #endif } -void qf_serialize(const QF *qf, const char *filename) +int64_t qf_resize_malloc(QF *qf, uint64_t nslots) { - FILE *fout; - fout = fopen(filename, "wb+"); - if (fout == NULL) { - perror("Error opening file for serializing\n"); - exit(EXIT_FAILURE); - } + QF new_qf; + if (!qf_malloc(&new_qf, nslots, qf->metadata->key_bits, + qf->metadata->value_bits, qf->metadata->hash_mode, + qf->metadata->seed)) + return false; + if (qf->metadata->auto_resize) + qf_set_auto_resize(&new_qf, true); - fwrite(qf->metadata, sizeof(qfmetadata), 1, fout); + // copy keys from qf into new_qf + QFi qfi; + qf_iterator_from_position(qf, &qfi, 0); + int64_t ret_numkeys = 0; + do { + uint64_t key, value, count; + qfi_get_hash(&qfi, &key, &value, &count); + qfi_next(&qfi); + int ret = qf_insert(&new_qf, key, value, count, QF_NO_LOCK | QF_KEY_IS_HASH); + if (ret < 0) { + fprintf(stderr, "Failed to insert key: %ld into the new CQF.\n", key); + return ret; + } + ret_numkeys++; + } while(!qfi_end(&qfi)); - /* we don't serialize the locks */ - fwrite(qf->blocks, qf->metadata->size, 1, fout); + qf_free(qf); + memcpy(qf, &new_qf, sizeof(QF)); - fclose(fout); + return ret_numkeys; } -void qf_deserialize(QF *qf, const char *filename) +uint64_t qf_resize(QF* qf, uint64_t nslots, void* buffer, uint64_t buffer_len) { - FILE *fin; - fin = fopen(filename, "rb"); - if (fin == NULL) { - perror("Error opening file for deserializing\n"); + QF new_qf; + new_qf.runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); + if (new_qf.runtimedata == NULL) { + perror("Couldn't allocate memory for runtime data.\n"); exit(EXIT_FAILURE); } - qf->mem = (qfmem *)calloc(sizeof(qfmem), 1); - qf->metadata = (qfmetadata *)calloc(sizeof(qfmetadata), 1); + uint64_t init_size = qf_init(&new_qf, nslots, qf->metadata->key_bits, + qf->metadata->value_bits, + qf->metadata->hash_mode, qf->metadata->seed, + buffer, buffer_len); - fread(qf->metadata, sizeof(qfmetadata), 1, fin); + if (init_size > buffer_len) + return init_size; - /* initlialize the locks in the QF */ - qf->metadata->num_locks = (qf->metadata->xnslots/NUM_SLOTS_TO_LOCK)+2; - qf->mem->metadata_lock = 0; - /* initialize all the locks to 0 */ - qf->mem->locks = (volatile int *)calloc(qf->metadata->num_locks, sizeof(volatile int)); + if (qf->metadata->auto_resize) + qf_set_auto_resize(&new_qf, true); - qf->blocks = (qfblock *)calloc(qf->metadata->size, 1); - fread(qf->blocks, qf->metadata->size, 1, fin); + // copy keys from qf into new_qf + QFi qfi; + qf_iterator_from_position(qf, &qfi, 0); + do { + uint64_t key, value, count; + qfi_get_hash(&qfi, &key, &value, &count); + qfi_next(&qfi); + int ret = qf_insert(&new_qf, key, value, count, QF_NO_LOCK | QF_KEY_IS_HASH); + if (ret < 0) { + fprintf(stderr, "Failed to insert key: %ld into the new CQF.\n", key); + abort(); + } + } while(!qfi_end(&qfi)); + + qf_free(qf); + memcpy(qf, &new_qf, sizeof(QF)); - fclose(fin); + return init_size; } -bool qf_insert(QF *qf, uint64_t key, uint64_t value, uint64_t count, enum lock - flag) +void qf_set_auto_resize(QF* qf, bool enabled) { + if (enabled) + qf->metadata->auto_resize = 1; + else + qf->metadata->auto_resize = 0; +} + +int qf_insert(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t + flags) +{ + // We fill up the CQF up to 95% load factor. + // This is a very conservative check. + if (qf->metadata->noccupied_slots >= qf->metadata->nslots * 0.95) { + if (qf->metadata->auto_resize) { + fprintf(stdout, "Resizing the CQF.\n"); + qf_resize_malloc(qf, qf->metadata->nslots * 2); + } else + return QF_NO_SPACE; + } + if (count == 0) + return 0; + + if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { + if (qf->metadata->hash_mode == QF_HASH_DEFAULT) + key = MurmurHash64A(((void *)&key), sizeof(key), + qf->metadata->seed) % qf->metadata->range; + else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) + key = hash_64(key, BITMASK(qf->metadata->key_bits)); + } uint64_t hash = (key << qf->metadata->value_bits) | (value & BITMASK(qf->metadata->value_bits)); + int ret; if (count == 1) - return insert1(qf, hash, flag); + ret = insert1(qf, hash, flags); else - return insert(qf, hash, count, flag); + ret = insert(qf, hash, count, flags); + + // check for fullness based on the distance from the home slot to the slot + // in which the key is inserted + if (ret == QF_NO_SPACE || ret > DISTANCE_FROM_HOME_SLOT_CUTOFF) { + float load_factor = qf->metadata->noccupied_slots / + (float)qf->metadata->nslots; + fprintf(stdout, "Load factor: %lf\n", load_factor); + if (qf->metadata->auto_resize) { + fprintf(stdout, "Resizing the CQF.\n"); + if (qf_resize_malloc(qf, qf->metadata->nslots * 2) > 0) { + if (ret == QF_NO_SPACE) { + if (count == 1) + ret = insert1(qf, hash, flags); + else + ret = insert(qf, hash, count, flags); + } + fprintf(stderr, "Resize finished.\n"); + } else { + fprintf(stderr, "Resize failed\n"); + ret = QF_NO_SPACE; + } + } else { + fprintf(stderr, "The CQF is filling up.\n"); + ret = QF_NO_SPACE; + } + } + return ret; +} + +int qf_set_count(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t + flags) +{ + if (count == 0) + return 0; + + uint64_t cur_count = qf_count_key_value(qf, key, value, flags); + int64_t delta = count - cur_count; + + int ret; + if (delta == 0) + ret = 0; + else if (delta > 0) + ret = qf_insert(qf, key, value, delta, flags); + else + ret = qf_remove(qf, key, value, labs(delta), flags); + + return ret; } -/* count = 0 would remove the key completely. */ -void qf_remove(QF *qf, uint64_t key, uint64_t value, uint64_t count, enum lock - flag) +int qf_remove(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t + flags) { if (count == 0) - count = qf_count_key_value(qf, key, value); + return true; + if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { + if (qf->metadata->hash_mode == QF_HASH_DEFAULT) + key = MurmurHash64A(((void *)&key), sizeof(key), + qf->metadata->seed) % qf->metadata->range; + else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) + key = hash_64(key, BITMASK(qf->metadata->key_bits)); + } uint64_t hash = (key << qf->metadata->value_bits) | (value & BITMASK(qf->metadata->value_bits)); - _remove(qf, hash, count, flag); + return _remove(qf, hash, count, flags); } -uint64_t qf_count_key_value(const QF *qf, uint64_t key, uint64_t value) +int qf_delete_key_value(QF *qf, uint64_t key, uint64_t value, uint8_t flags) { + uint64_t count = qf_count_key_value(qf, key, value, flags); + if (count == 0) + return true; + + if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { + if (qf->metadata->hash_mode == QF_HASH_DEFAULT) + key = MurmurHash64A(((void *)&key), sizeof(key), + qf->metadata->seed) % qf->metadata->range; + else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) + key = hash_64(key, BITMASK(qf->metadata->key_bits)); + } + uint64_t hash = (key << qf->metadata->value_bits) | (value & + BITMASK(qf->metadata->value_bits)); + return _remove(qf, hash, count, flags); +} + +uint64_t qf_count_key_value(const QF *qf, uint64_t key, uint64_t value, + uint8_t flags) +{ + if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { + if (qf->metadata->hash_mode == QF_HASH_DEFAULT) + key = MurmurHash64A(((void *)&key), sizeof(key), + qf->metadata->seed) % qf->metadata->range; + else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) + key = hash_64(key, BITMASK(qf->metadata->key_bits)); + } uint64_t hash = (key << qf->metadata->value_bits) | (value & BITMASK(qf->metadata->value_bits)); uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); @@ -1945,19 +2071,142 @@ uint64_t qf_count_key_value(const QF *qf, uint64_t key, uint64_t value) return 0; } +uint64_t qf_query(const QF *qf, uint64_t key, uint64_t *value, uint8_t flags) +{ + if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { + if (qf->metadata->hash_mode == QF_HASH_DEFAULT) + key = MurmurHash64A(((void *)&key), sizeof(key), + qf->metadata->seed) % qf->metadata->range; + else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) + key = hash_64(key, BITMASK(qf->metadata->key_bits)); + } + uint64_t hash = key; + uint64_t hash_remainder = hash & BITMASK(qf->metadata->key_remainder_bits); + int64_t hash_bucket_index = hash >> qf->metadata->key_remainder_bits; + + if (!is_occupied(qf, hash_bucket_index)) + return 0; + + int64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, + hash_bucket_index-1) + + 1; + if (runstart_index < hash_bucket_index) + runstart_index = hash_bucket_index; + + /* printf("MC RUNSTART: %02lx RUNEND: %02lx\n", runstart_index, runend_index); */ + + uint64_t current_remainder, current_count, current_end; + do { + current_end = decode_counter(qf, runstart_index, ¤t_remainder, + ¤t_count); + *value = current_remainder & BITMASK(qf->metadata->value_bits); + current_remainder = current_remainder >> qf->metadata->value_bits; + if (current_remainder == hash_remainder) { + return current_count; + } + runstart_index = current_end + 1; + } while (!is_runend(qf, current_end)); + + return 0; +} + +int64_t qf_get_unique_index(const QF *qf, uint64_t key, uint64_t value, + uint8_t flags) +{ + if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { + if (qf->metadata->hash_mode == QF_HASH_DEFAULT) + key = MurmurHash64A(((void *)&key), sizeof(key), + qf->metadata->seed) % qf->metadata->range; + else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) + key = hash_64(key, BITMASK(qf->metadata->key_bits)); + } + uint64_t hash = (key << qf->metadata->value_bits) | (value & + BITMASK(qf->metadata->value_bits)); + uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); + int64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; + + if (!is_occupied(qf, hash_bucket_index)) + return QF_DOESNT_EXIST; + + int64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, + hash_bucket_index-1) + + 1; + if (runstart_index < hash_bucket_index) + runstart_index = hash_bucket_index; + + /* printf("MC RUNSTART: %02lx RUNEND: %02lx\n", runstart_index, runend_index); */ + + uint64_t current_remainder, current_count, current_end; + do { + current_end = decode_counter(qf, runstart_index, ¤t_remainder, + ¤t_count); + if (current_remainder == hash_remainder) + return runstart_index; + + runstart_index = current_end + 1; + } while (!is_runend(qf, current_end)); + + return QF_DOESNT_EXIST; +} + +enum qf_hashmode qf_get_hashmode(const QF *qf) { + return qf->metadata->hash_mode; +} +uint64_t qf_get_hash_seed(const QF *qf) { + return qf->metadata->seed; +} +__uint128_t qf_get_hash_range(const QF *qf) { + return qf->metadata->range; +} + +bool qf_is_auto_resize_enabled(const QF *qf) { + if (qf->metadata->auto_resize == 1) + return true; + return false; +} +uint64_t qf_get_total_size_in_bytes(const QF *qf) { + return qf->metadata->total_size_in_bytes; +} +uint64_t qf_get_nslots(const QF *qf) { + return qf->metadata->nslots; +} +uint64_t qf_get_num_occupied_slots(const QF *qf) { + return qf->metadata->noccupied_slots; +} + +uint64_t qf_get_num_key_bits(const QF *qf) { + return qf->metadata->key_bits; +} +uint64_t qf_get_num_value_bits(const QF *qf) { + return qf->metadata->value_bits; +} +uint64_t qf_get_num_key_remainder_bits(const QF *qf) { + return qf->metadata->key_remainder_bits; +} +uint64_t qf_get_bits_per_slot(const QF *qf) { + return qf->metadata->bits_per_slot; +} + +uint64_t qf_get_sum_of_counts(const QF *qf) { + return qf->metadata->nelts; +} +uint64_t qf_get_num_distinct_key_value_pairs(const QF *qf) { + return qf->metadata->ndistinct_elts; +} + /* initialize the iterator at the run corresponding * to the position index */ -bool qf_iterator(const QF *qf, QFi *qfi, uint64_t position) +int64_t qf_iterator_from_position(const QF *qf, QFi *qfi, uint64_t position) { if (position == 0xffffffffffffffff) { qfi->current = 0xffffffffffffffff; qfi->qf = qf; - return false; + return QFI_INVALID; } assert(position < qf->metadata->nslots); if (!is_occupied(qf, position)) { - uint64_t block_index = position / SLOTS_PER_BLOCK; + uint64_t block_index = position; uint64_t idx = bitselect(get_block(qf, block_index)->occupieds[0], 0); if (idx == 64) { while(idx == 64 && block_index < qf->metadata->nblocks) { @@ -1965,7 +2214,7 @@ bool qf_iterator(const QF *qf, QFi *qfi, uint64_t position) idx = bitselect(get_block(qf, block_index)->occupieds[0], 0); } } - position = block_index * SLOTS_PER_BLOCK + idx; + position = block_index * QF_SLOTS_PER_BLOCK + idx; } qfi->qf = qf; @@ -1978,26 +2227,41 @@ bool qf_iterator(const QF *qf, QFi *qfi, uint64_t position) #ifdef LOG_CLUSTER_LENGTH qfi->c_info = (cluster_data* )calloc(qf->metadata->nslots/32, sizeof(cluster_data)); + if (qfi->c_info == NULL) { + perror("Couldn't allocate memory for c_info."); + exit(EXIT_FAILURE); + } qfi->cur_start_index = position; qfi->cur_length = 1; #endif if (qfi->current >= qf->metadata->nslots) - return false; - return true; + return QFI_INVALID; + return qfi->current; } -bool qf_iterator_hash(const QF *qf, QFi *qfi, uint64_t hash) +int64_t qf_iterator_key_value(const QF *qf, QFi *qfi, uint64_t key, uint64_t + value, uint8_t flags) { - if (hash >= qf->metadata->range) { + if (key >= qf->metadata->range) { qfi->current = 0xffffffffffffffff; qfi->qf = qf; - return false; + return QFI_INVALID; } qfi->qf = qf; qfi->num_clusters = 0; + if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { + if (qf->metadata->hash_mode == QF_HASH_DEFAULT) + key = MurmurHash64A(((void *)&key), sizeof(key), + qf->metadata->seed) % qf->metadata->range; + else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) + key = hash_64(key, BITMASK(qf->metadata->key_bits)); + } + uint64_t hash = (key << qf->metadata->value_bits) | (value & + BITMASK(qf->metadata->value_bits)); + uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); uint64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; bool flag = false; @@ -2005,8 +2269,8 @@ bool qf_iterator_hash(const QF *qf, QFi *qfi, uint64_t hash) // If a run starts at "position" move the iterator to point it to the // smallest key greater than or equal to "hash". if (is_occupied(qf, hash_bucket_index)) { - int64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, - hash_bucket_index-1) + uint64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, + hash_bucket_index-1) + 1; if (runstart_index < hash_bucket_index) runstart_index = hash_bucket_index; @@ -2032,7 +2296,7 @@ bool qf_iterator_hash(const QF *qf, QFi *qfi, uint64_t hash) if (!is_occupied(qf, hash_bucket_index) || !flag) { uint64_t position = hash_bucket_index; assert(position < qf->metadata->nslots); - uint64_t block_index = position / SLOTS_PER_BLOCK; + uint64_t block_index = position / QF_SLOTS_PER_BLOCK; uint64_t idx = bitselect(get_block(qf, block_index)->occupieds[0], 0); if (idx == 64) { while(idx == 64 && block_index < qf->metadata->nblocks) { @@ -2040,7 +2304,7 @@ bool qf_iterator_hash(const QF *qf, QFi *qfi, uint64_t hash) idx = bitselect(get_block(qf, block_index)->occupieds[0], 0); } } - position = block_index * SLOTS_PER_BLOCK + idx; + position = block_index * QF_SLOTS_PER_BLOCK + idx; qfi->run = position; qfi->current = position == 0 ? 0 : run_end(qfi->qf, position-1) + 1; if (qfi->current < position) @@ -2048,13 +2312,15 @@ bool qf_iterator_hash(const QF *qf, QFi *qfi, uint64_t hash) } if (qfi->current >= qf->metadata->nslots) - return false; - return true; + return QFI_INVALID; + return qfi->current; } -int qfi_get(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t *count) +static int qfi_get(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t + *count) { - assert(qfi->current < qfi->qf->metadata->nslots); + if (qfi_end(qfi)) + return QFI_INVALID; uint64_t current_remainder, current_count; decode_counter(qfi->qf, qfi->current, ¤t_remainder, ¤t_count); @@ -2062,46 +2328,60 @@ int qfi_get(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t *count) *value = current_remainder & BITMASK(qfi->qf->metadata->value_bits); current_remainder = current_remainder >> qfi->qf->metadata->value_bits; *key = (qfi->run << qfi->qf->metadata->key_remainder_bits) | current_remainder; - *count = current_count; + *count = current_count; - /*qfi->current = end_index;*/ //get should not change the current index - //of the iterator return 0; } -int qfi_next(QFi *qfi) { - return qfi_nextx(qfi, NULL); +int qfi_get_key(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t + *count) +{ + *key = *value = *count = 0; + int ret = qfi_get(qfi, key, value, count); + if (ret == 0) { + if (qfi->qf->metadata->hash_mode == QF_HASH_DEFAULT) { + *key = 0; *value = 0; *count = 0; + return QF_INVALID; + } else if (qfi->qf->metadata->hash_mode == QF_HASH_INVERTIBLE) + *key = hash_64i(*key, BITMASK(qfi->qf->metadata->key_bits)); + } + + return ret; } -int qfi_nextx(QFi *qfi, uint64_t* read_offset) + +int qfi_get_hash(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t + *count) { - uint64_t block_index = qfi->run / SLOTS_PER_BLOCK; - qfblock* addr = get_block(qfi->qf, block_index); - if (read_offset) *read_offset = (char*)addr - (char*)(qfi->qf->metadata); + *key = *value = *count = 0; + return qfi_get(qfi, key, value, count); +} +int qfi_next(QFi *qfi) +{ if (qfi_end(qfi)) - return 1; + return QFI_INVALID; else { /* move to the end of the current counter*/ uint64_t current_remainder, current_count; qfi->current = decode_counter(qfi->qf, qfi->current, ¤t_remainder, ¤t_count); - + if (!is_runend(qfi->qf, qfi->current)) { qfi->current++; #ifdef LOG_CLUSTER_LENGTH qfi->cur_length++; #endif - if (qfi->current > qfi->qf->metadata->nslots) - return 1; + if (qfi_end(qfi)) + return QFI_INVALID; return 0; - } - else { + } else { #ifdef LOG_CLUSTER_LENGTH /* save to check if the new current is the new cluster. */ uint64_t old_current = qfi->current; #endif - uint64_t rank = bitrank(addr->occupieds[0], - qfi->run % SLOTS_PER_BLOCK); + uint64_t block_index = qfi->run / QF_SLOTS_PER_BLOCK; + uint64_t rank = bitrank(get_block(qfi->qf, block_index)->occupieds[0], + qfi->run % QF_SLOTS_PER_BLOCK); uint64_t next_run = bitselect(get_block(qfi->qf, block_index)->occupieds[0], rank); @@ -2116,9 +2396,9 @@ int qfi_nextx(QFi *qfi, uint64_t* read_offset) if (block_index == qfi->qf->metadata->nblocks) { /* set the index values to max. */ qfi->run = qfi->current = qfi->qf->metadata->xnslots; - return 1; + return QFI_INVALID; } - qfi->run = block_index * SLOTS_PER_BLOCK + next_run; + qfi->run = block_index * QF_SLOTS_PER_BLOCK + next_run; qfi->current++; if (qfi->current < qfi->run) qfi->current = qfi->run; @@ -2140,12 +2420,11 @@ int qfi_nextx(QFi *qfi, uint64_t* read_offset) } } -inline int qfi_end(const QFi *qfi) +bool qfi_end(const QFi *qfi) { if (qfi->current >= qfi->qf->metadata->xnslots /*&& is_runend(qfi->qf, qfi->current)*/) - return 1; - else - return 0; + return true; + return false; } /* @@ -2159,55 +2438,66 @@ inline int qfi_end(const QFi *qfi) * insert(min, ic) * increment either ia or ib, whichever is minimum. */ -void qf_merge(QF *qfa, QF *qfb, QF *qfc, enum lock flag) +void qf_merge(const QF *qfa, const QF *qfb, QF *qfc) { QFi qfia, qfib; - qf_iterator(qfa, &qfia, 0); - qf_iterator(qfb, &qfib, 0); + qf_iterator_from_position(qfa, &qfia, 0); + qf_iterator_from_position(qfb, &qfib, 0); + + if (qfa->metadata->hash_mode != qfc->metadata->hash_mode && + qfa->metadata->seed != qfc->metadata->seed && + qfb->metadata->hash_mode != qfc->metadata->hash_mode && + qfb->metadata->seed != qfc->metadata->seed) { + fprintf(stderr, "Output QF and input QFs do not have the same hash mode or seed.\n"); + exit(1); + } uint64_t keya, valuea, counta, keyb, valueb, countb; - qfi_get(&qfia, &keya, &valuea, &counta); - qfi_get(&qfib, &keyb, &valueb, &countb); + qfi_get_hash(&qfia, &keya, &valuea, &counta); + qfi_get_hash(&qfib, &keyb, &valueb, &countb); do { if (keya < keyb) { - qf_insert(qfc, keya, valuea, counta, flag); + qf_insert(qfc, keya, valuea, counta, QF_NO_LOCK | QF_KEY_IS_HASH); qfi_next(&qfia); - qfi_get(&qfia, &keya, &valuea, &counta); + qfi_get_hash(&qfia, &keya, &valuea, &counta); } else { - qf_insert(qfc, keyb, valueb, countb, flag); + qf_insert(qfc, keyb, valueb, countb, QF_NO_LOCK | QF_KEY_IS_HASH); qfi_next(&qfib); - qfi_get(&qfib, &keyb, &valueb, &countb); + qfi_get_hash(&qfib, &keyb, &valueb, &countb); } } while(!qfi_end(&qfia) && !qfi_end(&qfib)); if (!qfi_end(&qfia)) { do { - qfi_get(&qfia, &keya, &valuea, &counta); - qf_insert(qfc, keya, valuea, counta, flag); + qfi_get_hash(&qfia, &keya, &valuea, &counta); + qf_insert(qfc, keya, valuea, counta, QF_NO_LOCK | QF_KEY_IS_HASH); } while(!qfi_next(&qfia)); } if (!qfi_end(&qfib)) { do { - qfi_get(&qfib, &keyb, &valueb, &countb); - qf_insert(qfc, keyb, valueb, countb, flag); + qfi_get_hash(&qfib, &keyb, &valueb, &countb); + qf_insert(qfc, keyb, valueb, countb, QF_NO_LOCK | QF_KEY_IS_HASH); } while(!qfi_next(&qfib)); } - - return; } /* * Merge an array of qfs into the resultant QF */ -void qf_multi_merge(QF *qf_arr[], int nqf, QF *qfr, enum lock flag) +void qf_multi_merge(const QF *qf_arr[], int nqf, QF *qfr) { int i; QFi qfi_arr[nqf]; int smallest_idx = 0; uint64_t smallest_key = UINT64_MAX; for (i=0; imetadata->hash_mode != qfr->metadata->hash_mode && + qf_arr[i]->metadata->seed != qfr->metadata->seed) { + fprintf(stderr, "Output QF and input QFs do not have the same hash mode or seed.\n"); + exit(1); + } + qf_iterator_from_position(qf_arr[i], &qfi_arr[i], 0); } DEBUG_CQF("Merging %d CQFs\n", nqf); @@ -2221,7 +2511,7 @@ void qf_multi_merge(QF *qf_arr[], int nqf, QF *qfr, enum lock flag) uint64_t values[nqf]; uint64_t counts[nqf]; for (i=0; imetadata->hash_mode != qfb->metadata->hash_mode && + qfa->metadata->seed != qfb->metadata->seed) { + fprintf(stderr, "Input QFs do not have the same hash mode or seed.\n"); + exit(1); + } // create the iterator on the larger QF. - if (qfa->metadata->size > qfb->metadata->size) { + if (qfa->metadata->total_size_in_bytes > qfb->metadata->total_size_in_bytes) + { qf_mem = qfb; qf_disk = qfa; } else { @@ -2277,12 +2575,12 @@ uint64_t qf_inner_product(QF *qfa, QF *qfb) qf_disk = qfb; } - qf_iterator(qf_disk, &qfi, 0); + qf_iterator_from_position(qf_disk, &qfi, 0); do { uint64_t key = 0, value = 0, count = 0; uint64_t count_mem; - qfi_get(&qfi, &key, &value, &count); - if ((count_mem = qf_count_key_value(qf_mem, key, 0)) > 0) { + qfi_get_hash(&qfi, &key, &value, &count); + if ((count_mem = qf_count_key_value(qf_mem, key, 0, QF_KEY_IS_HASH)) > 0) { acc += count*count_mem; } } while (!qfi_next(&qfi)); @@ -2291,13 +2589,22 @@ uint64_t qf_inner_product(QF *qfa, QF *qfb) } /* find cosine similarity between two QFs. */ -void qf_intersect(QF *qfa, QF *qfb, QF *qfr) +void qf_intersect(const QF *qfa, const QF *qfb, QF *qfr) { QFi qfi; - QF *qf_mem, *qf_disk; + const QF *qf_mem, *qf_disk; + + if (qfa->metadata->hash_mode != qfr->metadata->hash_mode && + qfa->metadata->seed != qfr->metadata->seed && + qfb->metadata->hash_mode != qfr->metadata->hash_mode && + qfb->metadata->seed != qfr->metadata->seed) { + fprintf(stderr, "Output QF and input QFs do not have the same hash mode or seed.\n"); + exit(1); + } // create the iterator on the larger QF. - if (qfa->metadata->size > qfb->metadata->size) { + if (qfa->metadata->total_size_in_bytes > qfb->metadata->total_size_in_bytes) + { qf_mem = qfb; qf_disk = qfa; } else { @@ -2305,17 +2612,17 @@ void qf_intersect(QF *qfa, QF *qfb, QF *qfr) qf_disk = qfb; } - qf_iterator(qf_disk, &qfi, 0); + qf_iterator_from_position(qf_disk, &qfi, 0); do { uint64_t key = 0, value = 0, count = 0; - qfi_get(&qfi, &key, &value, &count); - if (qf_count_key_value(qf_mem, key, 0) > 0) - qf_insert(qfr, key, value, count, NO_LOCK); + qfi_get_hash(&qfi, &key, &value, &count); + if (qf_count_key_value(qf_mem, key, 0, QF_KEY_IS_HASH) > 0) + qf_insert(qfr, key, value, count, QF_NO_LOCK | QF_KEY_IS_HASH); } while (!qfi_next(&qfi)); } /* magnitude of a QF. */ -uint64_t qf_magnitude(QF *qf) +uint64_t qf_magnitude(const QF *qf) { return sqrt(qf_inner_product(qf, qf)); } diff --git a/src/gqf/gqf_file.c b/src/gqf/gqf_file.c new file mode 100644 index 0000000..a9365b5 --- /dev/null +++ b/src/gqf/gqf_file.c @@ -0,0 +1,254 @@ +/* + * ============================================================================ + * + * Authors: Prashant Pandey + * Rob Johnson + * + * ============================================================================ + */ + +#include +#if 0 +# include +#else +# define assert(x) +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gqf/hashutil.h" +#include "gqf/gqf.h" +#include "gqf/gqf_int.h" +#include "gqf/gqf_file.h" + +#define NUM_SLOTS_TO_LOCK (1ULL<<16) + +bool qf_initfile(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t + value_bits, enum qf_hashmode hash, uint32_t seed, char* + filename, int prot) +{ + uint64_t total_num_bytes = qf_init(qf, nslots, key_bits, value_bits, hash, + seed, NULL, 0); + + int ret; + qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); + if (qf->runtimedata == NULL) { + perror("Couldn't allocate memory for runtime data."); + exit(EXIT_FAILURE); + } + qf->runtimedata->f_info.fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, S_IRWXU); + if (qf->runtimedata->f_info.fd < 0) { + perror("Couldn't open file."); + exit(EXIT_FAILURE); + } + ret = posix_fallocate(qf->runtimedata->f_info.fd, 0, total_num_bytes); + if (ret < 0) { + perror("Couldn't fallocate file:\n"); + exit(EXIT_FAILURE); + } + qf->metadata = (qfmetadata *)mmap(NULL, total_num_bytes, prot, MAP_SHARED, + qf->runtimedata->f_info.fd, 0); + if (qf->metadata == MAP_FAILED) { + perror("Couldn't mmap metadata."); + exit(EXIT_FAILURE); + } + ret = madvise(qf->metadata, total_num_bytes, MADV_RANDOM); + if (ret < 0) { + perror("Couldn't fallocate file."); + exit(EXIT_FAILURE); + } + qf->blocks = (qfblock *)(qf->metadata + 1); + + uint64_t init_size = qf_init(qf, nslots, key_bits, value_bits, hash, seed, + qf->metadata, total_num_bytes); + qf->runtimedata->f_info.filepath = (char *)malloc(strlen(filename) + 1); + if (qf->runtimedata->f_info.filepath == NULL) { + perror("Couldn't allocate memory for runtime f_info filepath."); + exit(EXIT_FAILURE); + } + strcpy(qf->runtimedata->f_info.filepath, filename); + + if (init_size == total_num_bytes) + return true; + else + return false; +} + +uint64_t qf_usefile(QF* qf, const char* filename, int prot) +{ + struct stat sb; + int ret; + + qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); + if (qf->runtimedata == NULL) { + perror("Couldn't allocate memory for runtime data."); + exit(EXIT_FAILURE); + } + qf->runtimedata->f_info.fd = open(filename, O_RDONLY, S_IRUSR); + if (qf->runtimedata->f_info.fd < 0) { + perror("Couldn't open file."); + exit(EXIT_FAILURE); + } + + ret = fstat (qf->runtimedata->f_info.fd, &sb); + if ( ret < 0) { + perror ("fstat"); + exit(EXIT_FAILURE); + } + + if (!S_ISREG (sb.st_mode)) { + fprintf (stderr, "%s is not a file.\n", filename); + exit(EXIT_FAILURE); + } + + qf->runtimedata->f_info.filepath = (char *)malloc(strlen(filename) + 1); + if (qf->runtimedata->f_info.filepath == NULL) { + perror("Couldn't allocate memory for runtime f_info filepath."); + exit(EXIT_FAILURE); + } + strcpy(qf->runtimedata->f_info.filepath, filename); + /* initialize all the locks to 0 */ + qf->runtimedata->metadata_lock = 0; + qf->runtimedata->locks = (volatile int *)calloc(qf->runtimedata->num_locks, + sizeof(volatile int)); + if (qf->runtimedata->locks == NULL) { + perror("Couldn't allocate memory for runtime locks."); + exit(EXIT_FAILURE); + } +#ifdef LOG_WAIT_TIME + qf->runtimedata->wait_times = (wait_time_data* )calloc(qf->runtimedata->num_locks+1, + sizeof(wait_time_data)); + if (qf->runtimedata->wait_times == NULL) { + perror("Couldn't allocate memory for runtime wait_times."); + exit(EXIT_FAILURE); + } +#endif + qf->metadata = (qfmetadata *)mmap(NULL, sb.st_size, prot, MAP_SHARED, + qf->runtimedata->f_info.fd, 0); + if (qf->metadata == MAP_FAILED) { + perror("Couldn't mmap metadata."); + exit(EXIT_FAILURE); + } + if (qf->metadata->magic_endian_number != MAGIC_NUMBER) { + fprintf(stderr, "Can't read the CQF. It was written on a different endian machine."); + exit(EXIT_FAILURE); + } + qf->blocks = (qfblock *)(qf->metadata + 1); + + return sizeof(qfmetadata) + qf->metadata->total_size_in_bytes; +} + +bool qf_closefile(QF* qf) +{ + assert(qf->metadata != NULL); + int fd = qf->runtimedata->f_info.fd; + uint64_t size = qf->metadata->total_size_in_bytes + sizeof(qfmetadata); + void *buffer = qf_destroy(qf); + if (buffer != NULL) { + munmap(buffer, size); + close(fd); + return true; + } + + return false; +} + +bool qf_deletefile(QF* qf) +{ + assert(qf->metadata != NULL); + char *path = (char *)malloc(strlen(qf->runtimedata->f_info.filepath) + 1); + if (qf->runtimedata->f_info.filepath == NULL) { + perror("Couldn't allocate memory for runtime f_info filepath."); + exit(EXIT_FAILURE); + } + strcpy(path, qf->runtimedata->f_info.filepath); + if (qf_closefile(qf)) { + remove(path); + return true; + } + + return false; +} + +uint64_t qf_serialize(const QF *qf, const char *filename) +{ + FILE *fout; + fout = fopen(filename, "wb+"); + if (fout == NULL) { + perror("Error opening file for serializing."); + exit(EXIT_FAILURE); + } + fwrite(qf->metadata, sizeof(qfmetadata), 1, fout); + fwrite(qf->blocks, qf->metadata->total_size_in_bytes, 1, fout); + fclose(fout); + + return sizeof(qfmetadata) + qf->metadata->total_size_in_bytes; +} + +uint64_t qf_deserialize(QF *qf, const char *filename) +{ + FILE *fin; + fin = fopen(filename, "rb"); + if (fin == NULL) { + perror("Error opening file for deserializing."); + exit(EXIT_FAILURE); + } + + qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); + if (qf->runtimedata == NULL) { + perror("Couldn't allocate memory for runtime data."); + exit(EXIT_FAILURE); + } + qf->metadata = (qfmetadata *)calloc(sizeof(qfmetadata), 1); + if (qf->metadata == NULL) { + perror("Couldn't allocate memory for metadata."); + exit(EXIT_FAILURE); + } + int ret = fread(qf->metadata, sizeof(qfmetadata), 1, fin); + if (ret < 1) { + perror("Couldn't read metadata from file."); + exit(EXIT_FAILURE); + } + if (qf->metadata->magic_endian_number != MAGIC_NUMBER) { + fprintf(stderr, "Can't read the CQF. It was written on a different endian machine."); + exit(EXIT_FAILURE); + } + + qf->runtimedata->f_info.filepath = (char *)malloc(strlen(filename) + 1); + if (qf->runtimedata->f_info.filepath == NULL) { + perror("Couldn't allocate memory for runtime f_info filepath."); + exit(EXIT_FAILURE); + } + strcpy(qf->runtimedata->f_info.filepath, filename); + /* initlialize the locks in the QF */ + qf->runtimedata->num_locks = (qf->metadata->xnslots/NUM_SLOTS_TO_LOCK)+2; + qf->runtimedata->metadata_lock = 0; + /* initialize all the locks to 0 */ + qf->runtimedata->locks = (volatile int *)calloc(qf->runtimedata->num_locks, + sizeof(volatile int)); + if (qf->runtimedata->locks == NULL) { + perror("Couldn't allocate memory for runtime locks."); + exit(EXIT_FAILURE); + } + qf->blocks = (qfblock *)calloc(qf->metadata->total_size_in_bytes, 1); + if (qf->blocks == NULL) { + perror("Couldn't allocate memory for blocks."); + exit(EXIT_FAILURE); + } + ret = fread(qf->blocks, qf->metadata->total_size_in_bytes, 1, fin); + if (ret < 1) { + perror("Couldn't read metadata from file."); + exit(EXIT_FAILURE); + } + fclose(fin); + + return sizeof(qfmetadata) + qf->metadata->total_size_in_bytes; +} + diff --git a/src/hashutil.cc b/src/gqf/hashutil.c similarity index 89% rename from src/hashutil.cc rename to src/gqf/hashutil.c index 257b992..4fc4be5 100644 --- a/src/hashutil.cc +++ b/src/gqf/hashutil.c @@ -1,6 +1,6 @@ /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ // Pulled from lookup3.c by Bob Jenkins -#include "hashutil.h" +#include "gqf/hashutil.h" //----------------------------------------------------------------------------- // MurmurHash2, by Austin Appleby @@ -59,15 +59,10 @@ uint32_t MurmurHash(const void* buf, size_t len, uint32_t seed) return h; } -uint32_t MurmurHash(const std::string &s, uint32_t seed) -{ - return MurmurHash(s.data(), s.length(), seed); -} - // Thomas Wang's integer hash functions. See // for a snapshot. -uint64_t HashUtil::hash_64(uint64_t key, uint64_t mask) +uint64_t hash_64(uint64_t key, uint64_t mask) { key = (~key + (key << 21)) & mask; // key = (key << 21) - key - 1; key = key ^ key >> 24; @@ -81,7 +76,7 @@ uint64_t HashUtil::hash_64(uint64_t key, uint64_t mask) // The inversion of hash_64(). Modified from // -uint64_t HashUtil::hash_64i(uint64_t key, uint64_t mask) +uint64_t hash_64i(uint64_t key, uint64_t mask) { uint64_t tmp; @@ -118,7 +113,7 @@ uint64_t HashUtil::hash_64i(uint64_t key, uint64_t mask) return key; } -__uint128_t HashUtil::MurmurHash128A ( const void * key, int len, +__uint128_t MurmurHash128A ( const void * key, int len, unsigned int seed1, unsigned int seed2 ) { __uint128_t ret_hash; @@ -138,7 +133,7 @@ __uint128_t HashUtil::MurmurHash128A ( const void * key, int len, // 64-bit hash for 64-bit platforms -uint64_t HashUtil::MurmurHash64A ( const void * key, int len, unsigned int seed ) +uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed ) { const uint64_t m = 0xc6a4a7935bd1e995; const int r = 47; @@ -164,13 +159,13 @@ uint64_t HashUtil::MurmurHash64A ( const void * key, int len, unsigned int seed switch(len & 7) { - case 7: h ^= uint64_t(data2[6]) << 48; - case 6: h ^= uint64_t(data2[5]) << 40; - case 5: h ^= uint64_t(data2[4]) << 32; - case 4: h ^= uint64_t(data2[3]) << 24; - case 3: h ^= uint64_t(data2[2]) << 16; - case 2: h ^= uint64_t(data2[1]) << 8; - case 1: h ^= uint64_t(data2[0]); + case 7: h ^= (uint64_t)data2[6] << 48; + case 6: h ^= (uint64_t)data2[5] << 40; + case 5: h ^= (uint64_t)data2[4] << 32; + case 4: h ^= (uint64_t)data2[3] << 24; + case 3: h ^= (uint64_t)data2[2] << 16; + case 2: h ^= (uint64_t)data2[1] << 8; + case 1: h ^= (uint64_t)data2[0]; h *= m; }; @@ -184,7 +179,7 @@ uint64_t HashUtil::MurmurHash64A ( const void * key, int len, unsigned int seed // 64-bit hash for 32-bit platforms -uint64_t HashUtil::MurmurHash64B ( const void * key, int len, unsigned int seed ) +uint64_t MurmurHash64B ( const void * key, int len, unsigned int seed ) { const unsigned int m = 0x5bd1e995; const int r = 24; @@ -235,7 +230,7 @@ uint64_t HashUtil::MurmurHash64B ( const void * key, int len, unsigned int seed return h; } -uint64_t HashUtil::AES_HASH(uint64_t x) +uint64_t AES_HASH(uint64_t x) { const uint64_t round_keys[32] = { // These were generated by hashing some randomly chosen files on my laptop diff --git a/src/kmer.cc b/src/kmer.cc index 07974a8..d65a7d1 100644 --- a/src/kmer.cc +++ b/src/kmer.cc @@ -66,11 +66,11 @@ string int_to_str(uint64_t kmer, uint64_t kmer_size) inline int Kmer::reverse_complement_base(int x) { return 3 - x; } /* Calculate the revsese complement of a kmer */ -uint64_t Kmer::reverse_complement(uint64_t kmer, uint64_t kmer_size) +__int128_t Kmer::reverse_complement(__int128_t kmer, uint64_t kmer_size) { - uint64_t rc = 0; + __int128_t rc = 0; uint8_t base = 0; - for (int i=0; i>= 2; @@ -85,62 +85,12 @@ uint64_t Kmer::reverse_complement(uint64_t kmer, uint64_t kmer_size) * Return true if the kmer is greater than or equal to its * reverse complement. * */ -inline bool Kmer::compare_kmers(uint64_t kmer, uint64_t kmer_rev) +bool Kmer::compare_kmers(__int128_t kmer, __int128_t kmer_rev) { return kmer >= kmer_rev; } -/* This code is taken from Jellyfish 2.0 - * git@github.com:gmarcais/Jellyfish.git - * */ - -// Checkered mask. cmask is every other bit on -// (0x55). cmask is two bits one, two bits off (0x33). Etc. -template -struct cmask { - static const U v = - (cmask::v << (2 * len)) | (((U)1 << len) - 1); -}; -template -struct cmask { - static const U v = 0; -}; - -// Fast reverse complement of one word through bit tweedling. -inline uint32_t Kmer::word_reverse_complement(uint32_t w) { - typedef uint64_t U; - w = ((w >> 2) & cmask::v) | ((w & cmask::v) << 2); - w = ((w >> 4) & cmask::v) | ((w & cmask::v) << 4); - w = ((w >> 8) & cmask::v) | ((w & cmask::v) << 8); - w = ( w >> 16 ) | ( w << 16); - return ((U)-1) - w; -} - -inline int64_t Kmer::word_reverse_complement(uint64_t w) { - typedef uint64_t U; - w = ((w >> 2) & cmask::v) | ((w & cmask::v) << 2); - w = ((w >> 4) & cmask::v) | ((w & cmask::v) << 4); - w = ((w >> 8) & cmask::v) | ((w & cmask::v) << 8); - w = ((w >> 16) & cmask::v) | ((w & cmask::v) << 16); - w = ( w >> 32 ) | ( w << 32); - return ((U)-1) - w; -} - -#ifdef HAVE_INT128 -inline static unsigned __int128 Kmer::word_reverse_complement(unsigned __int128 w) { - typedef unsigned __int128 U; - w = ((w >> 2) & cmask::v) | ((w & cmask::v) << 2); - w = ((w >> 4) & cmask::v) | ((w & cmask::v) << 4); - w = ((w >> 8) & cmask::v) | ((w & cmask::v) << 8); - w = ((w >> 16) & cmask::v) | ((w & cmask::v) << 16); - w = ((w >> 32) & cmask::v) | ((w & cmask::v) << 32); - w = ( w >> 64 ) | ( w << 64); - return ((U)-1) - w; -} -#endif - -mantis::QuerySets Kmer::parse_kmers(const char *filename, uint32_t seed, - uint64_t range, uint64_t kmer_size, +mantis::QuerySets Kmer::parse_kmers(const char *filename, uint64_t kmer_size, uint64_t& total_kmers) { mantis::QuerySets multi_kmers; total_kmers = 0; @@ -179,10 +129,7 @@ mantis::QuerySets Kmer::parse_kmers(const char *filename, uint32_t seed, else item = first_rev; - // hash the kmer using murmurhash/xxHash before adding to the list - //item = HashUtil::MurmurHash64A(((void*)&item), sizeof(item), seed); - item = HashUtil::hash_64(item, BITMASK(2*kmer_size)); - kmers_set.insert(item % range); + kmers_set.insert(item); uint64_t next = (first << 2) & BITMASK(2*kmer_size); uint64_t next_rev = first_rev >> 2; @@ -206,10 +153,7 @@ mantis::QuerySets Kmer::parse_kmers(const char *filename, uint32_t seed, else item = next_rev; - // hash the kmer using murmurhash/xxHash before adding to the list - //item = HashUtil::MurmurHash64A(((void*)&item), sizeof(item), seed); - item = HashUtil::hash_64(item, BITMASK(2*kmer_size)); - kmers_set.insert(item % range); + kmers_set.insert(item); next = (next << 2) & BITMASK(2*kmer_size); next_rev = next_rev >> 2; diff --git a/src/monochromatic_component_iterator.cc b/src/monochromatic_component_iterator.cc new file mode 100644 index 0000000..efd8324 --- /dev/null +++ b/src/monochromatic_component_iterator.cc @@ -0,0 +1,561 @@ +// +// Created by Fatemeh Almodaresi on 6/4/18. +// + +#include "monochromatic_component_iterator.h" + +#define EQS_PER_SLOT 20000000 + +uint64_t start_time; + +namespace dna { + + /////////////// bases ///////////////// + base operator-(base b) { + return (base) ((~((uint64_t) b)) & 0x3ULL); + } + + const base bases[4] = {C, A, T, G}; + const std::map base_from_char = {{'A', A}, + {'C', C}, + {'G', G}, + {'T', T}, + {'N', A}}; + const std::map base_to_char = {{A, 'A'}, + {C, 'C'}, + {G, 'G'}, + {T, 'T'}}; + + ///////////// kmers ///////////////////// + kmer::kmer(void) : len(0), val(0) {} + + kmer::kmer(base b) : len(1), val((uint64_t) b) {} + + kmer::kmer(int l, uint64_t v) : len(l), val(v & BITMASK(2 * l)) { + assert(l <= 32); + } + + static uint64_t string_to_kmer_val(std::string s) { + uint64_t val = 0; + for (auto c : s) + val = (val << 2) | ((uint64_t) (base_from_char.at(c))); + return val; + } + + kmer::kmer(std::string s) : len(s.size()), val(string_to_kmer_val(s)) { + assert(s.size() <= 32); + } + + // Convert to string + kmer::operator std::string() const { + std::string s; + for (auto i = 1; i < len + 1; i++) + s = s + base_to_char.at((base) ((val >> (2 * (len - i))) & BITMASK(2))); + return s; + } + + bool operator<(kmer a, kmer b) { + return a.len != b.len ? a.len < b.len : a.val < b.val; + } + + bool operator==(kmer a, kmer b) { + return a.len == b.len && a.val == b.val; + } + + bool operator!=(kmer a, kmer b) { + return !operator==(a, b); + } + + // Return the reverse complement of k + kmer operator-(kmer k) { + uint64_t val = k.val; + val = + (val >> 32) | + (val << 32); + val = + ((val >> 16) & 0x0000ffff0000ffff) | + ((val << 16) & 0xffff0000ffff0000); + val = + ((val >> 8) & 0x00ff00ff00ff00ff) | + ((val << 8) & 0xff00ff00ff00ff00); + val = + ((val >> 4) & 0x0f0f0f0f0f0f0f0f) | + ((val << 4) & 0xf0f0f0f0f0f0f0f0); + val = + ((val >> 2) & 0x3333333333333333) | + ((val << 2) & 0xcccccccccccccccc); + val = ~val; + val >>= 64 - 2 * k.len; + return kmer(k.len, val); + } + + // backwards from standard definition to match kmer.h definition + kmer canonicalize(kmer k) { + return -k < k ? k : -k; + } + + // Return the kmer of length |a| that results from shifting b into a + // from the right + kmer operator<<(kmer a, kmer b) { + uint64_t val = ((a.val << (2 * b.len)) | b.val) & BITMASK(2 * a.len); + return kmer(a.len, val); + } + + // Return the kmer of length |b| that results from shifting b into a + // from the left + kmer operator>>(kmer a, kmer b) { + uint64_t val + = ((b.val >> (2 * a.len)) | (a.val << (2 * (b.len - a.len)))) + & BITMASK(2 * b.len); + return kmer(b.len, val); + } + + // Append two kmers + kmer operator+(kmer a, kmer b) { + int len = a.len + b.len; + assert(len <= 32); + uint64_t val = (a.val << (2 * b.len)) | b.val; + return kmer(len, val); + } + + kmer prefix(kmer k, int len) { return kmer(len, k.val >> (2 * (k.len - len))); } + + kmer suffix(kmer k, int len) { return kmer(len, k.val & BITMASK(2 * len)); } + + bool period_divides(kmer k, uint64_t periodicity) { + static const uint64_t multipliers[33] = + { + 0, + 0x5555555555555555, // 1 + 0x1111111111111111, // 2 + 0x1041041041041041, // 3 + 0x0101010101010101, // 4 + 0x1004010040100401, // 5 + 0x1001001001001001, // 6 + 0x0100040010004001, // 7 + 0x0001000100010001, // 8 + 0x0040001000040001, // 9 + 0x1000010000100001, // 10 + 0x0000100000400001, // 11 + 0x0001000001000001, // 12 + 0x0010000004000001, // 13 + 0x0100000010000001, // 14 + 0x1000000040000001, // 15 + 0x0000000100000001, // 16 + 0x0000000400000001, // 17 + 0x0000001000000001, // 18 + 0x0000004000000001, // 19 + 0x0000010000000001, // 20 + 0x0000040000000001, // 21 + 0x0000100000000001, // 22 + 0x0000400000000001, // 23 + 0x0001000000000001, // 24 + 0x0004000000000001, // 25 + 0x0010000000000001, // 26 + 0x0040000000000001, // 27 + 0x0100000000000001, // 28 + 0x0400000000000001, // 29 + 0x1000000000000001, // 30 + 0x4000000000000001, // 31 + 0x0000000000000001, // 32 + }; + uint64_t piece = k.val & BITMASK(2 * periodicity); + piece = piece * multipliers[periodicity]; + piece = piece & BITMASK(2 * k.len); + return piece == k.val; + } + + uint64_t period(kmer k) { + for (int i = 1; i <= k.len; i++) { + if (period_divides(k, i)) + return i; + } + abort(); + } + + canonical_kmer::canonical_kmer(void) : kmer() {} + + canonical_kmer::canonical_kmer(base b) : kmer(canonicalize(kmer(b))) {} + + canonical_kmer::canonical_kmer(int l, uint64_t v) + : kmer(canonicalize(kmer(l, v))) {} + + canonical_kmer::canonical_kmer(std::string s) : kmer(canonicalize(kmer(s))) {} + + canonical_kmer::canonical_kmer(kmer k) : kmer(canonicalize(k)) {} + +} + + +//////////////////////////////// monochromatic_component_iterator ////////////////////////// + +monochromatic_component_iterator::monochromatic_component_iterator(const CQF *g, + std::vector> + +&bvin, +uint64_t num_samplesin +) +: + +cqf (g), it(g->begin(0)), bvs(bvin), num_samples(num_samplesin) { + // initialize cqf iterator + k = cqf->keybits() / 2; // 2-bit encoded + std::cerr << "k : " << k << "\n"; + sdsl::util::assign(visited, sdsl::bit_vector(cqf->capacity(), 0)); + std::cerr << "kmers: " << cqf->size() << "\n"; + std::cerr << "slots: " << cqf->capacity() << "\n"; + withMax0.resize(9); + sdsl::bit_vector d(num_samples, 0); + eqclass_map[d] = 0; +} + +monochromatic_component_iterator::work_item +monochromatic_component_iterator::front(std::queue &w) { + return w.front(); +} + +bool monochromatic_component_iterator::done() { return it.done(); } + +void monochromatic_component_iterator::operator++(void) { + + if (it.done()) return; // don't cross the bound (undefined behaviour) + ++it; + auto keyFromKmer = [this](KeyObject keyobj) { + return HashUtil::hash_64i(keyobj.key, BITMASK(this->cqf->keybits())); + }; + while (!it.done()) { + if (visitedKeys.find(keyFromKmer(*it)) != visitedKeys.end()) { + //if ((bool)(visited[it.iter.current])) { + ++it; + } else { + break; + } + } +} + +Mc_stats monochromatic_component_iterator::operator*(void) { + if (!work.empty()) { + std::cerr << "Throw Exception. The work queue should be empty at this point.\n"; + std::exit(1); + } + Mc_stats res; + if (it.done()) return res; + + KeyObject keyobj = *it; + node root(k, HashUtil::hash_64i(keyobj.key, BITMASK(cqf->keybits()))); + monochromatic_component_iterator::work_item neww = {root, it.iter.current, keyobj.count}; + work.push(neww); + + while (!work.empty()) { + work_item w = front(work); + work.pop(); + // pass over those that have been already visited + if (visitedKeys.find(w.curr.val) != visitedKeys.end()) { + //if (visited[w.idx]) { + continue; + } + //std::cerr << "for w " << std::string(w.curr) << " : "; + for (auto &neighbor : neighbors(w)) { + //std::cerr << "n " << std::string(neighbor.curr) << " "; + if (neighbor.curr != w.curr) { + /*if (neighbor.colorid != w.colorid) { + res.min_dist = std::min(res.min_dist, manhattanDist(neighbor.colorid, w.colorid)); + }*/ + if (visitedKeys.find(neighbor.curr.val) == visitedKeys.end()) { + //if (visited[neighbor.idx] == 0) { + if (neighbor.colorid == w.colorid) { + work.push(neighbor); + } + } + } + } + //std::cerr << "\n"; + visitedKeys.insert(w.curr.val); + res.nodeCnt++; + visited[w.idx] = 1; // set the corresponding bit + cntr++; + if (visitedKeys.size() % 1000000 == 0) + //if (cntr % 1000000 == 0) + std::cerr << "visited " << cntr << " kmers\n"; + //std::cerr << " idx " << w.idx << " visited " << visited[w.idx] << "\n"; + + } + return res; +} + +std::set +monochromatic_component_iterator::neighbors(monochromatic_component_iterator::work_item n) { + std::set result; + for (const auto b : dna::bases) { + uint64_t eqid, idx; + if (exists(b >> n.curr/*b + n.curr*/, idx, eqid)) + if (eqid != n.colorid) // ignore the neighbor if it's a self-loop + result.insert(work_item(b >> n.curr, idx, eqid)); + if (exists(n.curr << b/*n.curr + b*/, idx, eqid)) + if (eqid != n.colorid) + result.insert(work_item(n.curr << b, idx, eqid)); + } + return result; +} + +bool monochromatic_component_iterator::exists(edge e, uint64_t &idx, uint64_t &eqid) { + uint64_t tmp = e.val; + KeyObject key(HashUtil::hash_64(tmp, BITMASK(cqf->keybits())), 0, 0); + auto eq_idx = cqf->queryValAndIdx(key); + if (eq_idx.first) { + eqid = eq_idx.first - 1; + idx = eq_idx.second; + return true; + } + return false; +} + +void monochromatic_component_iterator::buildColor(std::vector &eq, uint64_t eqid) { + uint64_t i{0}, bitcnt{0}, wrdcnt{0}; + uint64_t idx = eqid / EQS_PER_SLOT; + uint64_t offset = eqid % EQS_PER_SLOT; + //std::cerr << eqid << " " << num_samples << " " << idx << " " << offset << "\n"; + while (i < num_samples) { + bitcnt = std::min(num_samples - i, (uint64_t) 64); + uint64_t wrd = (bvs[idx]).get_int(offset * num_samples + i, bitcnt); + eq[wrdcnt++] = wrd; + i += bitcnt; + } +} + +uint64_t monochromatic_component_iterator::manhattanDist(uint64_t eqid1, uint64_t eqid2) { + uint64_t dist{0}; + std::vector eq1(((num_samples - 1) / 64) + 1), eq2(((num_samples - 1) / 64) + 1); + buildColor(eq1, eqid1); + buildColor(eq2, eqid2); + + for (uint64_t i = 0; i < eq1.size(); i++) { + if (eq1[i] != eq2[i]) + dist += sdsl::bits::cnt(eq1[i] ^ eq2[i]); + } + return dist; + +} + +__uint128_t monochromatic_component_iterator::manhattanDistBvHash(uint64_t eqid1, + uint64_t eqid2, + uint64_t num_samples = 2586) { + sdsl::bit_vector dist(num_samples, 0); + std::vector eq1(((num_samples - 1) / 64) + 1), eq2(((num_samples - 1) / 64) + 1); + buildColor(eq1, eqid1); + buildColor(eq2, eqid2); + + for (uint64_t i = 0; i < eq1.size(); i++) { + uint64_t bitcnt = std::min(this->num_samples - (i * 64), (uint64_t) 64); + dist.set_int((i * 64), (eq1[i] ^ eq2[i]), bitcnt); + //std::cerr << i << " " << eq1[i] << " " << eq2[i] << " " << (eq1[i] ^ eq2[i]) << "\n"; + } + __uint128_t dist_hash = HashUtil::MurmurHash128A((void *) dist.data(), + dist.capacity() / 8, 2038074743, + 2038074751); + return dist_hash; +} + +void monochromatic_component_iterator::manhattanDistBvHash(uint64_t eqid1, + uint64_t eqid2, + sdsl::bit_vector &dist, + uint64_t num_samples = 2586) { + std::vector eq1(((num_samples - 1) / 64) + 1), eq2(((num_samples - 1) / 64) + 1); + buildColor(eq1, eqid1); + buildColor(eq2, eqid2); + + for (uint64_t i = 0; i < eq1.size(); i++) { + uint64_t bitcnt = std::min(this->num_samples - (i * 64), (uint64_t) 64); + dist.set_int((i * 64), (eq1[i] ^ eq2[i]), bitcnt); + //std::cerr << i << " " << eq1[i] << " " << eq2[i] << " " << (eq1[i] ^ eq2[i]) << "\n"; + } +} + + +void monochromatic_component_iterator::neighborDist(uint64_t cntrr) { + KeyObject keyobj = *it; +/* + if (keyobj.count == 0) + std::cerr << "we have 0\n"; + if (keyobj.count >= 19216547) + std::cerr << cntrr << ", keyobj cnt: " << keyobj.count << "\n"; +*/ + node curn(k, HashUtil::hash_64i(keyobj.key, BITMASK(cqf->keybits()))); + work_item cur = {curn, it.iter.current, keyobj.count - 1}; + uint64_t mind{UINTMAX_MAX}, meand{0}, maxd{0}, neighborCnt{0}; + for (auto &nei : neighbors(cur)) { + neighborCnt++; + if (nei.colorid != cur.colorid) { + auto d = manhattanDist(nei.colorid, cur.colorid); + mind = std::min(mind, d); + maxd = std::max(maxd, d); + meand += d; + } else { + mind = 0; + } + } + // if the node is isolated (or only has a self-loop) it should have maximum possible distance + if (neighborCnt == 0) { + isolatedCnt++; + return; + } + + if (!maxd) { + withMax0[neighborCnt]++; + return; + } + // when we get here, neighborCnt is > 0, and mind is not UINT_MAX + std::cout << neighborCnt << "\t" << mind << "\t" + << (meand / neighborCnt) << "\t" << maxd << "\n"; +} + +void monochromatic_component_iterator::buildEqGraph(uint64_t cntrr) { + KeyObject keyobj = *it; + node curn(k, HashUtil::hash_64i(keyobj.key, BITMASK(cqf->keybits()))); + work_item cur = {curn, it.iter.current, keyobj.count - 1}; + uint64_t neighborCnt{0}; + for (auto &nei : neighbors(cur)) { + neighborCnt++; + if (nei.colorid < cur.colorid) { + uint16_t d = (uint16_t) manhattanDist(nei.colorid, cur.colorid); + //if (d <= distThreshold) { + Edge e(static_cast(nei.colorid), static_cast(cur.colorid)); + if (edges.find(e) == edges.end()) { + edges[e] = d; + } else if (edges[e] > d) { + edges[e] = d; + } + //} + } + } + +} + +void monochromatic_component_iterator::uniqNeighborDist(uint64_t num_samples) { + KeyObject keyobj = *it; + node curn(k, HashUtil::hash_64i(keyobj.key, BITMASK(cqf->keybits()))); + work_item cur = {curn, it.iter.current, keyobj.count - 1}; + uint64_t mind{UINTMAX_MAX}, meand{0}, maxd{0}, neighborCnt{0}; + //std::cerr << " current : " << cur.colorid << "\n"; + for (auto &nei : neighbors(cur)) { + neighborCnt++; + sdsl::bit_vector d(num_samples, 0); + if (nei.colorid != cur.colorid) { + //xstd::cerr << nei.colorid << "\n"; + //auto d = manhattanDistBvHash(nei.colorid, cur.colorid, num_samples); + manhattanDistBvHash(nei.colorid, cur.colorid, d, num_samples); + if (eqclass_map.find(d) == eqclass_map.end()) { + eqclass_map[d] = 1; + } else { + eqclass_map[d]++; + } + } /*else { + eqclass_map[d]++; + }*/ + } +} + +/* + * === FUNCTION ============================================================ + * Name: main + * Description: + * =========================================================================== + */ + +int main(int argc, char *argv[]) { + + std::string command = argv[1]; + std::string cqf_file = argv[2]; + std::string eqlistfile = argv[3]; + uint64_t num_samples = std::stoull(argv[4]); + //uint64_t num_samples = 2586; + if (argc > 4) + num_samples = std::stoull(argv[4]); + std::cerr << "num samples: " << num_samples << "\n"; + CQF cqf(cqf_file, true); + std::cerr << "cqf loaded: " << cqf.size() << "\n"; + std::string eqfile; + std::ifstream eqlist(eqlistfile); + std::vector> + bvs; + bvs.reserve(20); + if (eqlist.is_open()) { + uint64_t accumTotalEqCls = 0; + while (getline(eqlist, eqfile)) { + sdsl::rrr_vector<63> bv; + bvs.push_back(bv); + sdsl::load_from_file(bvs.back(), eqfile); + } + } + //BitVectorRRR bv(eqfile); + std::cerr << "num eq clss: " << ((bvs.size() - 1) * EQS_PER_SLOT * num_samples + bvs.back().size()) / num_samples + << "\n"; + monochromatic_component_iterator mci(&cqf, bvs, num_samples); + if (command == "monocomp") { + while (!mci.done()) { + std::cout << (*mci).nodeCnt << "\n"; + ++mci; + } + } else if (command == "neighborDist") { + size_t cntrr = 0; + while (!mci.done()) { + mci.neighborDist(cntrr++); + ++mci; + if (cntrr % 10000000 == 0) { + std::cerr << cntrr << " done\n"; + } + } + std::cerr << "\n\nIn order of 0 to 8:\n"; + uint64_t total0s{0}; + for (auto val : mci.withMax0) { + std::cerr << val << ","; + total0s += val; + } + std::cerr << "\ntotal 0s: " << total0s - mci.withMax0[0] + << "\ntota isolated kmers: " << mci.isolatedCnt + << "\n"; + } else if (command == "buildEqGraph") { + size_t cntrr = 0; + while (!mci.done()) { + mci.buildEqGraph(cntrr++); + ++mci; + if (cntrr % 10000000 == 0) { + std::cerr << cntrr << " kmers & " << mci.edges.size() << " edges\n"; + } + } + + for (auto &kv : mci.edges) { + std::cout << kv.first.n1 << "\t" << kv.first.n2 << "\t" << kv.second << "\n"; + } + } else if (command == "uniquDistanceDistribution") { + uint64_t cntr{0}; + while (!mci.done()) { + mci.uniqNeighborDist(num_samples); + if (++cntr % 1000000 == 0) { + std::cerr << cntr << " kmers & " << mci.eqclass_map.size() << " unique distances.\n"; + } + ++mci; + } + std::cerr << "writing the list of distinct distances:\n"; + sdsl::bit_vector outbv(mci.eqclass_map.size() * num_samples, 0); + std::cerr << "total cnt: " << mci.eqclass_map.size() << " " + << (mci.eqclass_map.size() * num_samples) << "\n"; + cntr = 0; + for (auto &eq_keyval : mci.eqclass_map) { + std::cout << eq_keyval.second << "\n"; + auto &bv = eq_keyval.first; + uint64_t b = 0; + while (b < num_samples) { + uint64_t bitcnt = std::min(num_samples - b, (uint64_t) 64); + uint64_t wrd = bv.get_int(b, bitcnt); + outbv.set_int(num_samples * cntr + b, wrd, bitcnt); + b += bitcnt; + } + cntr++; + //std::cerr << cntr << "\n"; + } + uint64_t till = cqf_file.find_last_of('/'); + sdsl::rrr_vector<> cbv(outbv); + sdsl::store_to_file(cbv, cqf_file.substr(0, till + 1) + "dist_bv.rrr"); + } +} diff --git a/src/query.cc b/src/query.cc index f90ff5b..43be0ee 100644 --- a/src/query.cc +++ b/src/query.cc @@ -1,15 +1,6 @@ /* * ============================================================================ * - * Filename: query.cc - * - * Description: - * - * Version: 1.0 - * Created: 2017-10-27 12:56:50 AM - * Revision: none - * Compiler: gcc - * * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu * Organization: Stony Brook University * @@ -68,14 +59,17 @@ void output_results(mantis::QuerySets& multi_kmers, opfile << cnt++ << '\t' << kmers.size() << '\n'; mantis::QueryResult result = cdbg.find_samples(kmers); for (auto it = result.begin(); it != result.end(); ++it) { - opfile << cdbg.get_sample(it->first) << '\t' << it->second << '\n'; + if (*it > 0) { + auto i = std::distance(result.begin(), it); + opfile << i << '\t' << *it << '\n'; + } + //opfile << cdbg.get_sample(it->first) << '\t' << it->second << '\n'; } //++qctr; } } } - void output_results_json(mantis::QuerySets& multi_kmers, ColoredDbg*>, KeyObject>& cdbg, std::ofstream& opfile) { @@ -90,10 +84,17 @@ void output_results_json(mantis::QuerySets& multi_kmers, //std::sort(kmers.begin(), kmers.end()); opfile << "{ \"qnum\": " << cnt++ << ", \"num_kmers\": " << kmers.size() << ", \"res\": {\n"; mantis::QueryResult result = cdbg.find_samples(kmers); + bool first{true}; for (auto it = result.begin(); it != result.end(); ++it) { - opfile << " \"" <first) << "\": " << it->second ; - if (std::next(it) != result.end()) { - opfile << ",\n"; + if (*it > 0) { + if (!first) {opfile << ",\n"; first=false;} + auto i = std::distance(result.begin(), it); + opfile << " \"" <keybits() / 2; console->info("Read colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + cdbg.get_cqf()->dist_elts(), cdbg.get_num_bitvectors()); //cdbg.get_cqf()->dump_metadata(); //CQF cqf(query_file, false); @@ -169,8 +170,6 @@ int query_main (QueryOpts& opt) uint32_t seed = 2038074743; uint64_t total_kmers = 0; mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), kmer_size, total_kmers); console->info("Total k-mers to query: {}", total_kmers); diff --git a/src/squeakrconfig.cc b/src/squeakrconfig.cc new file mode 100644 index 0000000..3be2693 --- /dev/null +++ b/src/squeakrconfig.cc @@ -0,0 +1,32 @@ +/* + * ===================================================================================== + * + * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu + * Organization: Stony Brook University + * + * ===================================================================================== + */ + +#include // std::ifstream + +#include "squeakrconfig.h" + +namespace squeakr { + + int read_config(std::string file, squeakrconfig *config) { + // seek to the end of the file and read the k-mer size + std::ifstream squeakr_file(file, std::ofstream::in); + squeakr_file.seekg(0, squeakr_file.end); + uint64_t file_size = squeakr_file.tellg(); + squeakr_file.seekg(file_size - sizeof(squeakrconfig)); + squeakr_file.read((char*)config, sizeof(squeakrconfig)); + squeakr_file.close(); + if (config->endianness != ENDIANNESS) { + return SQUEAKR_INVALID_ENDIAN; + } + if (config->version != INDEX_VERSION) { + return SQUEAKR_INVALID_VERSION; + } + return 0; + } +} diff --git a/src/validatemantis.cc b/src/validatemantis.cc index 6aa2ddd..45f31e8 100644 --- a/src/validatemantis.cc +++ b/src/validatemantis.cc @@ -1,15 +1,6 @@ /* * ===================================================================================== * - * Filename: validate_mantis.cc - * - * Description: - * - * Version: 1.0 - * Created: 2017-10-31 12:13:49 PM - * Revision: none - * Compiler: gcc - * * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu * Organization: Stony Brook University * @@ -61,7 +52,7 @@ validate_main ( ValidateOpts& opt ) { spdlog::logger* console = opt.console.get(); - // Read experiment CQFs and cutoffs + // Read experiment CQFs std::ifstream infile(opt.inlist); uint64_t num_samples{0}; if (infile.is_open()) { @@ -82,30 +73,21 @@ validate_main ( ValidateOpts& opt ) cqfs = (CQF*)calloc(num_samples, sizeof(CQF)); - // Read cutoffs files - //std::unordered_map cutoffs; - //std::string sample_id; - //while (cutofffile >> sample_id >> cutoff) { - //std::pair pair(last_part(sample_id, '/'), cutoff); - //cutoffs.insert(pair); - //} - // mmap all the input cqfs std::string cqf_file; uint32_t nqf = 0; - uint32_t cutoff; - while (infile >> cqf_file >> cutoff) { + while (infile >> cqf_file) { if (!mantis::fs::FileExists(cqf_file.c_str())) { console->error("Squeakr file {} does not exist.", cqf_file); exit(1); } - cqfs[nqf] = CQF(cqf_file, false); + cqfs[nqf] = CQF(cqf_file, CQF_FREAD); std::string sample_id = first_part(first_part(last_part(cqf_file, '/'), '.'), '_'); console->info("Reading CQF {} Seed {}", nqf, cqfs[nqf].seed()); - console->info("Sample id {} cut-off {}", sample_id, cutoff); + console->info("Sample id {} cut-off {}", sample_id); cqfs[nqf].dump_metadata(); - inobjects[nqf] = SampleObject*>(&cqfs[nqf], cutoff, + inobjects[nqf] = SampleObject*>(&cqfs[nqf], sample_id, nqf); nqf++; } @@ -132,39 +114,33 @@ validate_main ( ValidateOpts& opt ) uint64_t kmer_size = cdbg.get_cqf()->keybits() / 2; console->info("Read colored dbg with {} k-mers and {} color classes", - cdbg.get_cqf()->size(), cdbg.get_num_bitvectors()); + cdbg.get_cqf()->dist_elts(), cdbg.get_num_bitvectors()); std::string query_file = opt.query_file; console->info("Reading query kmers from disk."); uint32_t seed = 2038074743; uint64_t total_kmers = 0; mantis::QuerySets multi_kmers = Kmer::parse_kmers(query_file.c_str(), - seed, - cdbg.range(), kmer_size, total_kmers); console->info("Total k-mers to query: {}", total_kmers); - // Query kmers in each experiment CQF ignoring kmers below the cutoff. + // Query kmers in each experiment CQF // Maintain the fraction of kmers present in each experiment CQF. std::vector> ground_truth; - std::vector> cdbg_output; + std::vector cdbg_output; bool fail{false}; for (auto kmers : multi_kmers) { std::unordered_map fraction_present; for (uint64_t i = 0; i < nqf; i++) { - uint32_t cutoff = inobjects[i].cutoff; for (auto kmer : kmers) { KeyObject k(kmer, 0, 0); - uint64_t count = cqfs[i].query(k); - if (count < cutoff) - continue; - else - fraction_present[inobjects[i].id] += 1; + uint64_t count = cqfs[i].query(k, 0); + fraction_present[inobjects[i].id] += 1; } } // Query kmers in the cdbg - std::unordered_map result = cdbg.find_samples(kmers); + auto result = cdbg.find_samples(kmers); // Validate the cdbg output for (uint64_t i = 0; i < nqf; i++) diff --git a/src/walkCqf.cc b/src/walkCqf.cc new file mode 100644 index 0000000..95a087a --- /dev/null +++ b/src/walkCqf.cc @@ -0,0 +1,216 @@ +/* + * =========================================================================== + * + * Filename: kmer_query.cc + * + * Description: + * + * Version: 1.0 + * Created: 04/27/2016 08:48:26 AM + * Revision: none + * Compiler: gcc + * + * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu + * Organization: Stony Brook University + * + * =========================================================================== + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cqf.h" +#include "hashutil.h" +#include "kmer.h" + + +#define BITMASK(nbits) ((nbits) == 64 ? 0xffffffffffffffff : (1ULL << (nbits)) - 1ULL) + +struct Iterator { + QFi qfi; + uint64_t kmer; + uint64_t val; + uint64_t cnt; + Iterator(const QF& cqf) { + if (qf_iterator(&cqf, &qfi, 0)) get_key(); + } + void next() { + qfi_next(&qfi); + get_key(); + } + bool end() const { + return qfi_end(&qfi); + } + bool operator>(const Iterator& rhs) const { + return key() > rhs.key(); + } + const uint64_t key() const { return kmer; } + const uint64_t value() const { return val; } + const uint64_t count() const { return cnt; } + +private: + void get_key() { + qfi_get(&qfi, &kmer, &val, &cnt); + } +}; + +/* Print elapsed time using the start and end timeval */ +void print_time_elapsed(string desc, struct timeval* start, struct timeval* end) +{ + struct timeval elapsed; + if (start->tv_usec > end->tv_usec) { + end->tv_usec += 1000000; + end->tv_sec--; + } + elapsed.tv_usec = end->tv_usec - start->tv_usec; + elapsed.tv_sec = end->tv_sec - start->tv_sec; + float time_elapsed = (elapsed.tv_sec * 1000000 + elapsed.tv_usec)/1000000.f; + std::cout << desc << "Total Time Elapsed: " << to_string(time_elapsed) << " seconds" << std::endl; +} + +void run_filter(std::string ds_file, + std::string out_file, + uint64_t cutoff, + uint64_t approximate_num_of_kmers_greater_than_cutoff) { + struct timeval start, end; + struct timezone tzp; + + //Initialize the QF + gettimeofday(&start, &tzp); + cout << "Reading the input CQF off disk" << std::endl; + CQF cqf(ds_file, false); + cout << "Done loading cqf in time: "; + gettimeofday(&end, &tzp); + print_time_elapsed("", &start, &end); + Iterator it(cqf.cqf);//= cqf.begin(0); + + uint64_t quotientBits = std::ceil(std::log2(approximate_num_of_kmers_greater_than_cutoff))+1; + std::cout << "quotientBits : " << quotientBits << "\n"; + std::cout << "keybits : " << cqf.keybits() << "\n"; + CQF newCqf(quotientBits, cqf.keybits(), cqf.seed()); + gettimeofday(&start, &tzp); + uint64_t cntr = 1; + uint64_t insertedCntr = 0; + while (!it.end()) { + //KeyObject k = *it; + KeyObject k(it.key(), it.value(), it.count()); + if (k.count >= cutoff) { + k.count = 1;//cutoff; + newCqf.insert(k); + insertedCntr++; + } + //++it; + it.next(); + if (cntr % 10000000 == 0) { + std::cout << cntr << " kmers passed, " + << insertedCntr << " kmers inserted" ; + //<< newCqf.noccupied_slots() << " slots occupied, " + //<< " load factor: " + //<< static_cast(newCqf.noccupied_slots())/static_cast(1ULL << quotientBits) << "\n"; + } + cntr++; + } + newCqf.serialize(out_file); + gettimeofday(&end, &tzp); + print_time_elapsed("", &start, &end); + +} + +void run_list_kmers(std::string ds_file, + std::string out_file) { +/* + struct timeval start, end; + struct timezone tzp; + + //Initialize the QF + gettimeofday(&start, &tzp); + cout << "Reading the input CQF off disk" << std::endl; + CQF cqf(ds_file, false); + cout << "Done loading cqf in time: "; + gettimeofday(&end, &tzp); + print_time_elapsed("", &start, &end); + //typename CQF::Iterator it = cqf.begin(0); + + gettimeofday(&start, &tzp); + std::ofstream fout(out_file, ios::out); + while (!it.end()) { + //KeyObject k = *it; + KeyObject k(it.key(), it.value(), it.count()); + // kmers.push_back(HashUtil::hash_64i(k.key, BITMASK(cqf.keybits()))); + uint64_t kint = HashUtil::hash_64i(k.key, BITMASK(cqf.keybits())); + //if (k.count >= cutoff) { + std::string kstr = Kmer::int_to_str(kint); + fout << kstr << "\t" << k.count << "\n"; + //} + //++it; + it.next(); + } + fout.close(); + gettimeofday(&end, &tzp); + print_time_elapsed("", &start, &end); +*/ +} + +/* + * === FUNCTION ============================================================ + * Name: main + * Description: + * =========================================================================== + */ + +int main ( int argc, char *argv[] ) +{ + std::cout << argc << " ....... \n"; + std::string command = argv[1]; + if (command != "filter" and command != "list_kmers") { + std::cerr << "ERROR: command can only be filter or list_kmers.\n"; + exit(1); + } + + std::string ds_file = argv[2]; + cout << ds_file << "\n"; + std::string out_file = argv[3]; + cout << out_file << "\n"; + uint64_t cutoff = 0; + uint64_t approximate_num_of_kmers_greater_than_cutoff = 0; + if (command == "filter") { + if (argc < 6) { + std::cerr << "ERROR: missing last argument for filter command\n"; + exit(1); + } + cutoff = stoi(argv[4]); + cout << cutoff << "\n"; + cout << argv[5] << "\n"; + approximate_num_of_kmers_greater_than_cutoff = stoull(argv[5]); + } + + if (command == "filter") { + run_filter(ds_file, out_file, cutoff, approximate_num_of_kmers_greater_than_cutoff); + } + else if (command == "listKmers") { + run_list_kmers(ds_file, out_file); + } + + return EXIT_SUCCESS; +} /* ---------- end of function main ---------- */ + diff --git a/src/walkMSF.cc b/src/walkMSF.cc new file mode 100644 index 0000000..46a1ff7 --- /dev/null +++ b/src/walkMSF.cc @@ -0,0 +1,494 @@ +// +// Created by Fatemeh Almodaresi on 7/20/18. +// + +#include +#include +#include +#include "MSF.h" +#include "cqf.h" +#include "common_types.h" +#include "CLI/CLI.hpp" +#include "CLI/Timer.hpp" +#include "kmer.h" +#include "lrucache.hpp" +#include "hashutil.h" +#include "lru/lru.hpp" +#include "tsl/hopscotch_map.h" +#include "nonstd/optional.hpp" +#include + +struct QueryStats { + uint32_t cnt = 0, cacheCntr = 0, noCacheCntr{0}; + uint64_t totSel{0}; + std::chrono::duration selectTime{0}; + std::chrono::duration flipTime{0}; + uint64_t totEqcls{0}; + uint64_t rootedNonZero{0}; + uint64_t nextCacheUpdate{10000}; + uint64_t globalQueryNum{0}; + std::vector buffer; + uint64_t numSamples{0}; + tsl::hopscotch_map numOcc; + bool trySample{false}; +}; + + +namespace mantis{ + namespace util { + class int_hasher { + public: + size_t operator()(uint64_t i) const { + return HashUtil::MurmurHash64A(reinterpret_cast(&i), sizeof(i), 8675309); + } + }; + } +} + +using LRUCacheMap = LRU::Cache>;// cache::lru_cache , mantis::util::int_hasher>; + +class RankScores { +public: + RankScores(uint64_t nranks) {rs_.resize(nranks);} + + std::unordered_map& operator[](uint32_t r) { + if (r > maxRank_) { + maxRank_ = std::min(r, static_cast(rs_.size()-1)); + } + return (r < rs_.size()) ? rs_[r] : rs_.back(); + } + + void clear() { + for (auto& m : rs_){ m.clear(); } + maxRank_ = 0; + } + + uint32_t maxRank() const { return maxRank_; } + +private: + std::vector> rs_; + uint32_t maxRank_{0}; +}; + + +class MSFQuery { + +private: + uint64_t numSamples; + uint64_t numWrds; + uint32_t zero; + sdsl::bit_vector bbv; + +public: + sdsl::int_vector<> parentbv; + sdsl::int_vector<> deltabv; + sdsl::bit_vector::select_1_type sbbv; + + MSFQuery(uint64_t numSamplesIn) : numSamples(numSamplesIn) { + numWrds = (uint64_t) std::ceil((double) numSamples / 64.0); + } + + void loadIdx(std::string indexDir) { + sdsl::load_from_file(parentbv, indexDir + "/parents.bv"); + sdsl::load_from_file(deltabv, indexDir + "/deltas.bv"); + sdsl::load_from_file(bbv, indexDir + "/boundary.bv"); + sbbv = sdsl::bit_vector::select_1_type(&bbv); + zero = 0; //parentbv.size() - 1; // maximum color id which + std::cerr << "Loaded the new color class index\n"; + std::cerr << "--> parent size: " << parentbv.size() << "\n" + << "--> delta size: " << deltabv.size() << "\n" + << "--> boundary size: " << bbv.size() << "\n"; + } + + std::vector buildColor(uint64_t eqid, QueryStats &queryStats, + LRUCacheMap *lru_cache, + RankScores* rs, + nonstd::optional& toDecode, // output param. Also decode these + bool all = true) { + eqid++; + (void)rs; + std::vector flips(numSamples); + std::vector xorflips(numSamples, 0); + uint64_t i{eqid}, from{0}, to{0}; + int64_t height{0}; + auto& froms = queryStats.buffer; + froms.clear(); + queryStats.totEqcls++; + bool foundCache = false; + uint32_t iparent = parentbv[i]; + while (iparent != i) { + if (lru_cache and lru_cache->contains(i)) { + const auto &vs = (*lru_cache)[i]; + for (auto v : vs) { + xorflips[v] = 1; + } + queryStats.cacheCntr++; + foundCache = true; + break; + } + from = (i > 0) ? (sbbv(i) + 1) : 0; + froms.push_back(from); + + if (queryStats.trySample) { + auto& occ = queryStats.numOcc[iparent]; + ++occ; + if ((!toDecode) and + (occ > 10) and + (height > 10) and + (lru_cache and + !lru_cache->contains(iparent))) { + toDecode = iparent; + } + } + i = iparent; + iparent = parentbv[i]; + ++queryStats.totSel; + ++height; + } + if (!foundCache and i != zero) { + from = (i > 0) ? (sbbv(i) + 1) : 0; + froms.push_back(from); + ++queryStats.totSel; + queryStats.rootedNonZero++; + ++height; + } + uint64_t pctr{0}; + for (auto f : froms) { + bool found = false; + uint64_t wrd{0}; + uint64_t offset{0}; + auto start = f; + do { + wrd = bbv.get_int(start, 64); + for (uint64_t j = 0; j < 64; j++) { + flips[deltabv[start + j]] ^= 0x01; + if ((wrd >> j) & 0x01) { + found = true; + break; + } + } + start += 64; + } while (!found); + } + + if (!all) { // return the indices of set bits + std::vector eq; + eq.reserve(numWrds); + uint64_t one = 1; + for (i = 0; i < numSamples; i++) { + if (flips[i] ^ xorflips[i]) { + eq.push_back(i); + } + } + return eq; + } + std::vector eq(numWrds); + uint64_t one = 1; + for (i = 0; i < numSamples; i++) { + if (flips[i] ^ xorflips[i]) { + uint64_t idx = i / 64; + eq[idx] = eq[idx] | (one << (i % 64)); + } + } + return eq; + } + +}; + +mantis::QueryResult findSamples(const mantis::QuerySet &kmers, + CQF &dbg, MSFQuery &msfQuery, + LRUCacheMap& lru_cache, + RankScores* rs, + QueryStats &queryStats) { + std::unordered_map query_eqclass_map; + for (auto k : kmers) { + KeyObject key(k, 0, 0); + uint64_t eqclass = dbg.query(key); + if (eqclass) + query_eqclass_map[eqclass] += 1; + } + + mantis::QueryResult sample_map(queryStats.numSamples,0); + size_t numPerLevel = 10; + nonstd::optional toDecode{nonstd::nullopt}; + nonstd::optional dummy{nonstd::nullopt}; + + for (auto it = query_eqclass_map.begin(); it != query_eqclass_map.end(); ++it) { + auto eqclass_id = it->first - 1; + auto count = it->second; + + std::vector setbits; + if (lru_cache.contains(eqclass_id)) { + setbits = lru_cache[eqclass_id];//.get(eqclass_id); + queryStats.cacheCntr++; + } else { + queryStats.noCacheCntr++; + toDecode.reset(); + dummy.reset(); + queryStats.trySample = (queryStats.noCacheCntr % 10 == 0); + setbits = msfQuery.buildColor(eqclass_id, queryStats, &lru_cache, rs, toDecode, false); + lru_cache.emplace(eqclass_id, setbits); + if ((queryStats.trySample) and toDecode) { + auto s = msfQuery.buildColor(*toDecode, queryStats, nullptr, nullptr, dummy, false); + lru_cache.emplace(*toDecode, s); + } + } + for (auto sb : setbits) { + sample_map[sb] += count; + } + + ++queryStats.globalQueryNum; + /* + if (queryStats.globalQueryNum > queryStats.nextCacheUpdate) { + for (int64_t i = rs.maxRank(); i > 50; i-=50) { + auto& m = rs[i]; + if (m.size() > numPerLevel) { + std::vector> pairs; + pairs.reserve(m.size()); + std::copy(m.begin(), m.end(), std::back_inserter(pairs)); + std::nth_element(pairs.begin(), pairs.begin()+numPerLevel, pairs.end(), + [](const std::pair &a, const std::pair &b) { + return a.second > b.second; + }); + for (auto pit = pairs.begin(); pit != pairs.begin()+numPerLevel; ++pit) { + if(!lru_cache.exists(pit->first)) { + auto v = msfQuery.buildColor(pit->first, queryStats, nullptr, nullptr, false); + lru_cache.put(pit->first, v); + } + } + } else if(m.size() > 0){ + for (auto pit = m.begin(); pit != m.end(); ++pit) { + if(!lru_cache.exists(pit->first)) { + auto v = msfQuery.buildColor(pit->first, queryStats, nullptr, nullptr, false); + lru_cache.put(pit->first, v); + } + } + } + + } + queryStats.nextCacheUpdate += 10000; + rs.clear(); + }*/ + } + + return sample_map; +} + +std::pair recursiveSteps(uint32_t idx, sdsl::int_vector<> &parentbv, + std::vector> &steps, + uint32_t zero) { + if (idx == zero) { + steps[idx].second = zero; + return steps[idx]; // 0 + } + if (steps[idx].first != 0) + return steps[idx]; + if (parentbv[idx] == idx) { + steps[idx].first = 1; // to retrieve the representative + steps[idx].second = idx; + return steps[idx]; + } + auto ret = recursiveSteps(parentbv[idx], parentbv, steps, zero); + steps[idx].first = ret.first + 1; + steps[idx].second = ret.second; + return steps[idx]; +} + +struct Opts { + std::string indexDir; + uint64_t numSamples; + std::string eqlistfile; + std::string cqffile; + std::string outputfile; + std::string queryfile; +}; + +int main(int argc, char *argv[]) { + ios_base::sync_with_stdio(false); + using namespace clipp; + enum class mode { + validate, steps, decodeAllEqs, query, help + }; + mode selected = mode::help; + Opts opt; + + auto validate_mode = ( + command("validate").set(selected, mode::validate), + required("-i", "--indexDir") & + value("index_dir", opt.indexDir) % "Directory containing index files.", + required("-eq", "--eqCls-lst-file") & + value("eqCls_list_filename", opt.eqlistfile) % + "File containing list of equivalence (color) classes.", + required("-s", "--numSamples") & + value("numSamples", opt.numSamples) % "Total number of experiments (samples)." + ); + + auto steps_mode = ( + command("steps").set(selected, mode::steps), + required("-i", "--indexDir") & + value("index_dir", opt.indexDir) % "Directory containing index files.", + required("-s", "--numSamples") & + value("numSamples", opt.numSamples) % "Total number of experiments (samples)." + ); + + auto decodeAllEqs_mode = ( + command("decodeAllEqs").set(selected, mode::decodeAllEqs), + required("-i", "--indexDir") & + value("index_dir", opt.indexDir) % "Directory containing index files.", + required("-s", "--numSamples") & + value("numSamples", opt.numSamples) % "Total number of experiments (samples)." + ); + + auto query_mode = ( + command("query").set(selected, mode::query), + required("-i", "--indexDir") & + value("index_dir", opt.indexDir) % "Directory containing index files.", + required("-g", "--cqf") & + value("kmer-graph", opt.cqffile) % "cqf file containing the kmer mapping to color class ids.", + required("-q", "--queryFile") & + value("query-file", opt.queryfile) % "query file containing list of sequences.", + required("-o", "--outputFile") & + value("query-output-file", opt.outputfile) % "file to write query results.", + required("-s", "--numSamples") & + value("numSamples", opt.numSamples) % "Total number of experiments (samples)." + ); + + auto cli = ( + (validate_mode | steps_mode | decodeAllEqs_mode | query_mode | command("help").set(selected, mode::help) + ) + ); + + decltype(parse(argc, argv, cli)) res; + try { + res = parse(argc, argv, cli); + } catch (std::exception &e) { + std::cout << "\n\nParsing command line failed with exception: " << e.what() << "\n"; + std::cout << "\n\n"; + std::cout << make_man_page(cli, "MSF"); + return 1; + } + if (!res) { + std::cerr << "Cannot parse the input arguments\n"; + std::exit(1); + } + if (selected == mode::help) { + std::cerr << make_man_page(cli, "MSF"); + std::exit(1); + } + + uint64_t numWrds = (uint64_t) std::ceil((double) opt.numSamples / 64.0); + MSFQuery msfQuery(opt.numSamples); + msfQuery.loadIdx(opt.indexDir); + uint64_t eqCount = msfQuery.parentbv.size() - 1; + std::cerr << "total # of equivalence classes is : " << eqCount << "\n"; + + LRUCacheMap cache_lru(100000); + QueryStats queryStats; + queryStats.numSamples = opt.numSamples; + + if (selected == mode::validate) { + eqvec bvs; + loadEqs(opt.eqlistfile, bvs); + uint64_t cntr{0}; + for (uint64_t idx = 0; idx < eqCount; idx++) { + nonstd::optional dummy{nonstd::nullopt}; + std::vector newEq = msfQuery.buildColor(idx, queryStats, &cache_lru, nullptr, dummy); + std::vector oldEq(numWrds); + buildColor(bvs, oldEq, idx, opt.numSamples); + if (newEq != oldEq) { + std::cerr << "AAAAA! LOOOSER!!\n"; + std::cerr << cntr << ": index=" << idx << "\n"; + std::cerr << "n "; + for (auto k = 0; k < newEq.size(); k++) { + std::cerr << std::bitset<64>(newEq[k]); + } + std::cerr << "\no "; + for (auto k = 0; k < oldEq.size(); k++) { + std::cerr << std::bitset<64>(oldEq[k]); + } + std::cerr << "\n"; + std::exit(1); + } + cntr++; + if (cntr % 10000000 == 0) { + std::cerr << cntr << " eqs were the same\n"; + } + } + std::cerr << "WOOOOW! Validation passed\n"; + } else if (selected == mode::decodeAllEqs) { + std::random_device r; + + // Choose a random mean between 1 and 6 + std::default_random_engine e1(r()); + std::uniform_int_distribution uniform_dist(0, eqCount - 1); + for (uint64_t idx = 0; idx < 182169; idx++) { + nonstd::optional dummy{nonstd::nullopt}; + std::vector newEq = msfQuery.buildColor(uniform_dist(e1), + queryStats, + &cache_lru, + nullptr, + dummy, + false); + /*if (idx % 10000000 == 0) { + std::cerr << idx << " eqs decoded\n"; + }*/ + } + std::cerr << "cache was used " << queryStats.cacheCntr << " times " << queryStats.noCacheCntr << "\n"; + //std::cerr << "select time was " << queryStats.selectTime.count() << "s, flip time was " + // << queryStats.flipTime.count() << '\n'; + //std::cerr << "total selects = " << queryStats.totSel << ", time per select = " + // << queryStats.selectTime.count() / queryStats.totSel << '\n'; + std::cerr << "total # of queries = " << queryStats.totEqcls + << ", total # of queries rooted at a non-zero node = " << queryStats.rootedNonZero << "\n"; + + } else if (selected == mode::steps) { + uint32_t zero = msfQuery.parentbv.size() - 1; + std::vector> steps(eqCount, {0, 0}); + for (uint64_t idx = 0; idx < eqCount; idx++) { + auto row = recursiveSteps(idx, msfQuery.parentbv, steps, zero); + std::cout << msfQuery.parentbv[idx] << "\t" + << idx << "\t" + << row.first << "\t" << row.second << "\n"; + } + } else if (selected == mode::query) { + CQF dbg(opt.cqffile, false); + std::cerr << "Done loading cqf.\n"; + uint32_t seed = 2038074743; + uint64_t total_kmers = 0; + // loading kmers + mantis::QuerySets multi_kmers = Kmer::parse_kmers(opt.queryfile.c_str(), + seed, + dbg.range(), + 20, + total_kmers); + std::cerr << "Done loading query file : # of seqs: " << multi_kmers.size() << "\n"; + RankScores rs(1); + std::ofstream opfile(opt.outputfile); + { + CLI::AutoTimer timer{"Query time ", CLI::Timer::Big}; + for (auto &kmers : multi_kmers) { + opfile << "seq" << queryStats.cnt++ << '\t' << kmers.size() << '\n'; + mantis::QueryResult result = findSamples(kmers, dbg, msfQuery, cache_lru, + nullptr, queryStats); + for (auto it = result.begin(); it != result.end(); ++it) { + if (*it > 0) { + auto i = std::distance(result.begin(), it); + opfile << i << '\t' << *it << '\n'; + } + //opfile << it->first/*cdbg.get_sample(it->first)*/ << '\t' << it->second << '\n'; + } + } + opfile.close(); + } + std::cerr << "cache was used " << queryStats.cacheCntr << " times " << queryStats.noCacheCntr << "\n"; + std::cerr << "select time was " << queryStats.selectTime.count() << "s, flip time was " + << queryStats.flipTime.count() << '\n'; + std::cerr << "total selects = " << queryStats.totSel << ", time per select = " + << queryStats.selectTime.count() / queryStats.totSel << '\n'; + std::cerr << "total # of queries = " << queryStats.totEqcls + << ", total # of queries rooted at a non-zero node = " << queryStats.rootedNonZero << "\n"; + /*for (auto &kv : queryStats.numOcc) { + std::cout << kv.first << '\t' << kv.second << '\n'; + }*/ + } +}