diff --git a/GNUmakefile.in b/GNUmakefile.in index c33b5d9b..6900886f 100644 --- a/GNUmakefile.in +++ b/GNUmakefile.in @@ -13,6 +13,7 @@ AR = ar CC = @CC@ CXX = @CXX@ CPPFLAGS := -std=c++17 +MTFLAGS:= -L${LD_LIBRARY_PATH} -lwh -lpthread DEPSDIR := .deps DEPCFLAGS = -MD -MF $(DEPSDIR)/$*.d -MP LIBS = @LIBS@ $(MASSTREEDIR)/libjson.a $(LIBMALLOC) -lpthread -lm -lnuma @@ -366,6 +367,12 @@ unit-hashtable: $(OBJ)/unit-hashtable.o $(STO_DEPS) unit-dboindex: $(OBJ)/unit-dboindex.o $(INDEX_DEPS) $(CXX) $(CXXFLAGS) $(OPTFLAGS) -o $@ $< $(INDEX_DEPS) $(LDFLAGS) $(LIBS) +unit-dboindexmasstrie: $(OBJ)/unit-dboindexmasstrie.o $(INDEX_DEPS) + $(CXX) $(CXXFLAGS) $(OPTFLAGS) -o $@ $< $(INDEX_DEPS) $(LDFLAGS) $(LIBS) + +unit-test_MTrie: $(OBJ)/unit-test_MTrie.o $(INDEX_DEPS) + $(CXX) $(CXXFLAGS) $(OPTFLAGS) -o $@ $< $(INDEX_DEPS) $(LDFLAGS) $(LIBS) $(MTFLAGS) + unit-mvcc-access-all: $(OBJ)/unit-mvcc-access-all.o $(INDEX_DEPS) $(XXHASH_OBJ) $(CXX) $(CXXFLAGS) $(OPTFLAGS) -o $@ $< $(INDEX_DEPS) $(XXHASH_OBJ) $(LDFLAGS) $(LIBS) diff --git a/MassTrie-beta/MassTrie.hh b/MassTrie-beta/MassTrie.hh new file mode 100644 index 00000000..53cfd776 --- /dev/null +++ b/MassTrie-beta/MassTrie.hh @@ -0,0 +1,318 @@ +#include + +#include + +#include + +#include + +#include + +#include + +#include "wormhole/lib.h" + +#include "wormhole/kv.h" + +#include "wormhole/wh.h" + +#define NUM_THREADS 64 + +#define MAX_SIZE 64 + +using namespace std; + +//~~~~~~~~~CLASS MASSTRIE~~~~~~~~~~~~~~ + +class MassTrie +{ + +public: + // constructor + + MassTrie() + { + + // creating wh wormhole mapping key to internal_elem (as uintptr_t) + + wh = wh_create(); + + ref = wh_ref(this->wh); + + iter = wh_iter_create(this->ref); + + this->kbuf_out = (void *)malloc(sizeof(char) * MAX_SIZE); + + this->vbuf_out = (void *)malloc(sizeof(char) * MAX_SIZE); + + r = false; + } + + // destructor + + ~MassTrie() + { + + wh_iter_destroy(this->iter); + + wh_unref(this->ref); + + wh_clean(this->wh); + + wh_destroy(this->wh); + + free(kbuf_out); + + free(vbuf_out); + } + + //~~~~~~~~~MASSTRIE FUNCTIONS~~~~~~~~~~~~~~ + + // put function - putting a uintptr_t which is the internal_elem + + bool put(const void *key, int klen, const void *value, int vlen) + { + + return (wh_put(this->ref, key, klen, value, vlen)); + } + + // get function + + void *get(struct wormref *const ref, const void *key, int klen) + { + + // variables + + // bool r; + + u32 vlen_out = 0; + + // get action performed + + r = wh_get(ref, key, klen, vbuf_out, sizeof(vbuf_out), &vlen_out); + + return r ? vbuf_out : nullptr; + } + + // delete function + + bool del(const void *key, int klen) + { + + return (wh_del(this->ref, key, klen)); + } + + // probe function - returns true if key exists, false otherwise + + bool probe(const void *key, int klen) + { + + r = (wh_probe(this->ref, key, klen)); + + return r; + } + + // finds the closest pointer currently in the MassTrie + + // to a pointer passed as a parameter + + void *find_closest(const void *key) + { + + // variables + + u32 klen_out = 0; + + u32 vlen_out = 0; + + // bool r; + + int min = INT_MAX; + + int curr; + + void *res = NULL; + + // search loop + + wh_iter_seek(this->iter, NULL, 0); // seek to the head + + // printf("wh_iter_seek closest pointer to key\"\"\n"); + + while (wh_iter_valid(this->iter)) + { + + r = wh_iter_peek(this->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) + { + + // calculate disatnce + + curr = abs((long)(reinterpret_cast(kbuf_out)) - (long)(reinterpret_cast(key))); + + if (curr < min) + { + + // perform malloc + + if (!res) + + res = (void *)malloc(sizeof(char) * MAX_SIZE); + + // error handling + + if (res == NULL) + { + + printf("Error! memory not allocated."); + + exit(1); + } + + min = curr; + + // cout<<"curr = "<iter); + + memset(kbuf_out, 0, sizeof(kbuf_out)); + + memset(vbuf_out, 0, sizeof(vbuf_out)); + } + + return (res != NULL) ? res : nullptr; + } + + // deletes all from MassTrie + + void delete_all() + { + + // variables + + u32 klen_out = 0; + + u32 vlen_out = 0; + + // bool + + // search loop + + wh_iter_seek(this->iter, NULL, 0); // seek to the head + + // printf("wh_iter_seek closest pointer to key\"\"\n"); + + while (wh_iter_valid(this->iter)) + { + + r = wh_iter_peek(this->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) + { + + // delete key + + this->del(kbuf_out, sizeof(kbuf_out)); + } + + else + { + + printf("ERROR!\n"); + } + + wh_iter_skip1(this->iter); + + memset(kbuf_out, 0, sizeof(kbuf_out)); + + memset(vbuf_out, 0, sizeof(vbuf_out)); + } + } + + // data members + + struct wormhole *wh; + + struct wormref *ref; + + struct wormhole_iter *iter; + + void *kbuf_out; + + void *vbuf_out; + + bool r; + +}; // class MassTrie + +/** + +//override the << operation + + + +ostream& operator<<(ostream &os, MassTrie* m){ + + + +u32 klen_out = 0; + + char kbuf_out[MAX_SIZE] = {}; + + u32 vlen_out = 0; + + char vbuf_out[MAX_SIZE] = {}; + + bool r; + + + + wh_iter_seek(m->iter, NULL, 0); // seek to the head + + printf("wh_iter_seek \"\"\n"); + + while (wh_iter_valid(m->iter)) { + + r = wh_iter_peek(m->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) { + + os << "wh_iter_peek: key = "<(kbuf_out)<<" , klen = "<< klen_out<<" , "<< + + " value= "<(vbuf_out) << ", vlen= "<< vlen_out<iter); + + + + memset(kbuf_out,0,sizeof(kbuf_out)); + + memset(vbuf_out,0,sizeof(vbuf_out)); + + } + + return os; + +} + + + +**/ diff --git a/MassTrie-beta/wormhole/LICENSE b/MassTrie-beta/wormhole/LICENSE new file mode 100644 index 00000000..f288702d --- /dev/null +++ b/MassTrie-beta/wormhole/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/MassTrie-beta/wormhole/Makefile b/MassTrie-beta/wormhole/Makefile new file mode 100644 index 00000000..f00e6b59 --- /dev/null +++ b/MassTrie-beta/wormhole/Makefile @@ -0,0 +1,45 @@ +# Makefile +# rules (always with .out) +# SRC-X.out += abc # extra source: abc.c +# MOD-X.out += abc # extra module: abc.c abc.h +# ASM-X.out += abc # extra assembly: abc.S +# DEP-X.out += abc # extra dependency: abc +# FLG-X.out += -finline # extra flags +# LIB-X.out += abc # extra -labc options + +# X.out : xyz.h xyz.c # for extra dependences that are to be compiled/linked. + +# X => X.out +TARGETS += easydemo concbench stresstest +# X => X.c only +SOURCES += +# X => X.S only +ASSMBLY += +# X => X.c X.h +MODULES += lib kv wh +# X => X.h +HEADERS += ctypes + +FLG += +LIB += m + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),FreeBSD) +LIB += execinfo +endif + +# when $ make FORKER_PAPI=y +ifeq ($(strip $(FORKER_PAPI)),y) +LIB += papi +FLG += -DFORKER_PAPI +endif + +bin : libwh.so +libwh.so : Makefile Makefile.common lib.c lib.h kv.c kv.h wh.c wh.h wh.strip + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) -shared -fPIC) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + $(CCC) $(ALLFLG) -o $@ lib.c kv.c wh.c $(ALLLIB) + strip --strip-all --discard-all @wh.strip $@ + + +include Makefile.common diff --git a/MassTrie-beta/wormhole/Makefile.common b/MassTrie-beta/wormhole/Makefile.common new file mode 100644 index 00000000..ecd761e7 --- /dev/null +++ b/MassTrie-beta/wormhole/Makefile.common @@ -0,0 +1,216 @@ +#usage: include Makefile.common at the end of your Makefile + +# no builtin rules/vars (CC, CXX, etc. are still defined but will be empty) +MAKEFLAGS += -r -R + +HDR = $(addsuffix .h,$(MODULES) $(HEADERS)) +SRC = $(addsuffix .c,$(MODULES) $(SOURCES)) +ASM = $(addsuffix .S,$(ASSMBLY)) +OBJ = $(addsuffix .o,$(MODULES) $(SOURCES) $(ASSEMBLY)) +DEP = Makefile.common Makefile $(HDR) $(EXTERNDEP) $(EXTERNSRC) +BIN = $(addsuffix .out,$(TARGETS)) +DIS = $(addsuffix .dis,$(TARGETS)) + +# clang: +# EXTRA="-Rpass=loop-vectorize" # IDs loops that were successfully V-ed +# EXTRA="-Rpass-missed=loop-vectorize" # IDs loops that failed V +# EXTRA="-Rpass-analysis=loop-vectorize" # IDs the statements that caused V to fail +# EXTRA="-Rpass=\ *" # remarks for all passes +# other passes: https://llvm.org/docs/Passes.html + +O ?= rg + +# predefined OPT: make O={rg,r,0g,3g,p,0s,3s,cov,mc,hc,wn,stk} +ifeq ($O,rg) # make O=rg +OPT ?= -DNDEBUG -g3 -O3 -flto -fno-stack-protector +else ifeq ($O,r) # make O=r (for release) +OPT ?= -DNDEBUG -O3 -flto -fno-stack-protector +else ifeq ($O,ns) # make O=ns (no signal handlers) +OPT ?= -DNDEBUG -O3 -flto -fno-stack-protector -DNOSIGNAL +else ifeq ($O,0g) # make O=0g +OPT ?= -g3 -O0 -fno-inline +else ifeq ($O,2g) # make O=2g +OPT ?= -g3 -O2 +else ifeq ($O,3g) # make O=3g +OPT ?= -g3 -O3 -flto -fno-inline +else ifeq ($O,p) # make O=p (profiling: rg+noinline) +OPT ?= -DNDEBUG -g3 -O3 -flto -fno-stack-protector -fno-inline +else ifeq ($O,0s) # make O=0s (address sanitizer) +OPT ?= -g3 -O0 -fno-inline -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls -DHEAPCHECKING +else ifeq ($O,3s) # make O=3s (address sanitizer) +OPT ?= -g3 -O3 -fno-inline -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls -DHEAPCHECKING +else ifeq ($O,t) # make O=0t (thread sanitizer) +OPT ?= -g3 -O1 -fno-inline -fsanitize=thread -fno-stack-protector +else ifeq ($O,cov) # make O=cov (for gcov) +OPT ?= -g3 -DNDEBUG -O0 --coverage +CCC = gcc +else ifeq ($O,mc) # make O=mc (for valgrind memcheck) +OPT ?= -g3 -O1 -fno-inline -DHEAPCHECKING +ARCH ?= broadwell +else ifeq ($O,hc) # make O=hc (for gperftools heapcheck) +OPT ?= -g3 -O1 -fno-inline +LIB += tcmalloc +else ifeq ($O,wn) # more warning +OPT ?= -g3 -O3 -Wvla -Wformat=2 -Wconversion -Wstrict-prototypes -Wmissing-prototypes +else ifeq ($O,stk) # check stack usage with gcc +OPT ?= -g3 -O3 -DNDEBUG -fstack-usage +CCC = gcc +endif + +# malloc: g:glibc, t:tcmalloc, j:jemalloc +M ?= g + +ifeq ($M,t) + LIB += tcmalloc + FLG += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free +else ifeq ($M,j) + LIB += jemalloc +endif + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Linux) + CHECK_S := -D__linux__ + LIB += rt +else ifeq ($(UNAME_S),FreeBSD) + CHECK_S := -D__FreeBSD__ + FLG += -I/usr/local/include -L/usr/local/lib + LIB += rt + LIB += execinfo + TPUT := /usr/local/bin/tput +else ifeq ($(UNAME_S),Darwin) + CHECK_S := -D__APPLE__ -D__MACH__ + # do nothing +else + $(error "Supported Platforms: Linux, FreeBSD, Darwin") +endif +TPUT ?= tput + +CCC ?= clang +CSTD = -std=gnu18 +XCC ?= clang++ +XSTD = -std=gnu++17 + +UNAME_M := $(shell uname -m) +ifeq ($(UNAME_M),aarch64) # "native" does not work for clang@aarch64 + CHECK_M := -D__aarch64__ + ARCH ?= armv8-a+crc +else ifeq ($(UNAME_M),arm64) # "native" does not work for clang@aarch64 + CHECK_M := -D__aarch64__ + ARCH ?= armv8-a+crc +else ifeq ($(UNAME_M),x86_64) + CHECK_M := -D__x86_64__ + ARCH ?= native +else ifeq ($(UNAME_M),amd64) # freebsd + CHECK_M := -D__x86_64__ + ARCH ?= native +else + $(error "Supported Platforms: aarch64, x86_64") +endif + +TUNE ?= native + +NBI += memcpy memmove memcmp + +# minimal requirement on x86_64: -march=nehalem +# minimal requirement on aarch64: -march=armv8-a+crc +FLG += -march=$(ARCH) -mtune=$(TUNE) +FLG += -pthread -Wall -Wextra -Wshadow #-Weverything +FLG += $(addprefix -fno-builtin-,$(NBI)) +FLG += $(OPT) + +ifneq ($(shell $(CCC) --version 2>/dev/null | grep clang),) +FLG += -ferror-limit=3 +CCCTYPE := clang +else ifneq ($(shell $(CCC) --version 2>/dev/null | grep gcc),) +FLG += -fmax-errors=3 +FLG += -Wno-unknown-pragmas +CCCTYPE := gcc +else + $(error "Supported Compilers: clang, gcc") +endif + +ifeq ($(CCCTYPE),clang) + CCINST = /usr/lib/clang/$(shell $(CCC) --version 2>/dev/null | awk '/^clang/ { print $$3 }') + CCINC = $(CCINST)/include +else ifeq ($(CCCTYPE),gcc) + CCINST = /usr/lib/gcc/$(shell $(CCC) -dumpmachine)/$(shell $(CCC) -dumpversion) + CCINC = $(CCINST)/include $(CCINST)/include-fixed +endif +CCINC = /usr/include /usr/local/include + +ifneq ($(shell find $(CCINC) -name backtrace-supported.h 2>/dev/null),) + LIB += backtrace + FLG += -DBACKTRACE +endif + +ifneq ($(shell find $(CCINC) -name liburing.h 2>/dev/null),) + LIB += uring + FLG += -DLIBURING +endif + + +uniq = $(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1))) +magentatxt := $(shell $(TPUT) setaf 5) +greentxt := $(shell $(TPUT) setaf 2) +bluetxt := $(shell $(TPUT) setaf 4) +normaltxt := $(shell $(TPUT) sgr0) + +.PHONY : bin dis def clean cleanx check tags + +bin : $(BIN) +dis : $(DIS) bin +.DEFAULT_GOAL = bin +.SECONDEXPANSION: + +ifeq ($(J),o) +# DANGER. Don't use unless it works! +# build from .o files but target-specific flags are missing in %.o : %.x +%.out : %.o $(OBJ) $$(addsuffix .o,$$(SRC-$$@) $$(MOD-$$@) $$(ASM-$$@)) + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$*.o) $(FLG-$@) -rdynamic) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + $(CCC) $(ALLFLG) -o $@ $^ $(ALLLIB) +# +else # default: all-in-one command +%.out : %.c $(SRC) $(ASM) $(DEP) $$(DEP-$$@) $$(addsuffix .c,$$(SRC-$$@) $$(MOD-$$@)) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) $$(addsuffix .S,$$(ASM-$$@)) + $(eval ALLSRC := $(SRC) $(addsuffix .c,$(SRC-$@) $(MOD-$@)) $(ASM) $(addsuffix .S,$(ASM-$@))) + $(eval UNIQSRC := $(call uniq,$(ALLSRC))) + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) $(FLG-$@) -rdynamic) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + @printf '$(bluetxt)$@$(magentatxt) <= $(greentxt)$< $(UNIQSRC)$(normaltxt)\n' + $(CCC) $(ALLFLG) -o $@ $< $(UNIQSRC) $(ALLLIB) +# +endif + + +%.dis : %.out + objdump -SlwtC $< 1>$@ 2>/dev/null + +%.o : %.cc $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(XCC) $(XSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.o : %.c $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.o : %.S $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.s : %.c $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$*.o) -S -o $@ -c $< + +def : + $(CCC) $(FLG) -dM -E - = "h"; the iter will be placed on "hello" + r = wh_iter_valid(iter); // r == true; You should always check if iter is valid after a seek() and skip() + r = wh_iter_peek(iter, buf, 6, &len_out, NULL, 0, NULL); // only need the key: will get "hello" and 5 + r = wh_iter_peek(iter, NULL, 0, NULL, buf, 6, &len_out); // only need the value: will get "world!" and 6 + // (you can also get both key and value using one call with two buffers) + wh_iter_skip1(iter); // skip the current key; equivalent to wh_iter_skip(iter, 1); + r = wh_iter_valid(iter); // r == false; already passed the end of the dataset + wh_iter_park(iter); // an iter may hold locks; It's a good manner to "park" the iter before sleep. + sleep(10); // not interacting with the wormhole instance. + wh_iter_seek(iter, NULL, 0); // need to do another seek to reactivate the iter + r = wh_iter_valid(iter); // r == true; on the zero-sized key now + wh_iter_destroy(iter); // now we're done with the iter + wh_del(ref, "hello", 5); // delete a key + wh_del(ref, NULL, NULL); // delete the zero-sized key + wh_unref(ref); // the current thread is no longer interested in accessing the index + wh_destroy(wh); // fully destroy the index; all references should have been released before calling this +} +``` + +## Integer keys + +Wormhole supports binary keys, which means you don't need to print integers into text when using Wormhole to index integer keys. +Here are some quick examples for using Wormhole as an integer-key index. A little-endian CPU is assumed. + +```C +{ + // 32-bit unsigned integer keys + u32 key = __builtin_bswap32(1000); // reverse byte order of key 1000 + wh_put(ref, &key, 4, NULL, 0); + key = __builtin_bswap32(2000); // reverse byte order of key 2000 +    wh_put(ref, &key, 4, NULL, 0); + struct wormhole_iter * iter = wh_iter_create(ref); + key = __builtin_bswap32(999); + wh_iter_seek(iter, &key, 4); // seek 999 + u32 key_out, len_out; + r = wh_iter_peek(iter, &key_out, 4, &len_out, NULL, 0, NULL); // see 1000 in key_out in reversed byte order + wh_iter_skip1(iter); + r = wh_iter_peek(iter, &key_out, 4, &len_out, NULL, 0, NULL); // see 2000 in key_out in reversed byte order +} +``` + +# Advanced APIs + +If the simple and thread-safe `wh_*` interface already meets your performance requirements, You don't need to read the following sections. +Using the `wormhole_*` and `whunsafe_*` APIs can maximize the efficiency of your code with a roughly 5%-10% speedup. +However, inefficient use of these APIs, such as repeatedly calling malloc() to prepare the key buffer, can easily hurt the performance. + +## `struct kv` and `struct kref` + +There are a handful of helper functions (`kv_*` and `kref_*` functions) at the beginning of wh.h. +It's worth noting that the *key's hash* (`hash` of `struct kv` and `hash32` of `struct kref`) +must be up-to-date before passed to wormhole. +The `kv_refill*` helper functions internally update the hash after filling the kv contents. +In a more general case, `kv_update_hash` directly updates a `struct kv`'s hash. +Similarly, `kref_refill_hash32()` calculates the 32-bit hash for `struct kref`. +Performing the hash calculation at the client side can achieve the best efficiency on the server (the index operations). + +## The Wormhole API + +`concbench.c` and `stresstest.c` are examples of how to use a Wormhole index. +There are three sets of Wormhole API: `whsafe`, `wormhole`, and `whunsafe`. +* `whsafe`: The *worry-free* thread-safe API. If you use Wormhole in a concurrent environment and want minimal complexity in your code, you should use `whsafe`. +* `wormhole`: The standard thread-safe API. It offers better efficiency than `whsafe` but requires some extra effort for blocking prevention. +* `whunsafe`: the thread-unsafe API. It offers the best speed and efficiency but does not perform internal concurrency control. +External synchronization should be employed when accessing `whunsafe` in a concurrent environment. + +The functions of each API can be found near the end of `wh.c` (search `kvmap_api_whsafe`, `kvmap_api_wormhole`, and `kvmap_api_whunsafe`). +Note that each API contains a mix of `whsafe_*`, `wormhole_*`, and `whunsafe_*` functions. + +### The `whsafe` API +The `whsafe` API functions are listed in the `kvmap_api_whsafe` structure in `wh.c`. The API consists of a mix of `wormhole_*` and `whsafe_*` functions. + +The index operations (GET, SET, UPDATE, DEL, PROBE, INPLACE, MERGE, and SCAN (`wormhole_iter_*` functions)) are all *thread safe*. +A thread needs to hold a reference of the index (_wormref_) to perform safe index operations. + +An example of using point-query operations using the `whsafe` API. + +```C +{ + wh = wormhole_create(NULL); // use NULL here unless you want to change the allocator. + ref = whsafe_ref(wh); + for (...) { + whsafe_put(ref, ...); + whsafe_get(ref, ...); + whsafe_del(ref, ...); + ... // other safe operations + } + ... // other safe operations + wormhole_unref(ref); + wormhole_destroy(wh); +} +``` + +An example of range-query operations: + +```C +{ + ref = whsafe_ref(wh); + // ... assume we already have a valid ref + iter = wormhole_iter_create(ref); + for (...) { + whsafe_iter_seek(iter, key); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 1); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 3); + wormhole_iter_inp(iter, uf, priv); + // other iter operations + } + // An active iterator is likely holding a lock. + whsafe_iter_park(iter); // Release resources to avoid blocking other threads + // it's now safe to do something such as sleep() or waitpid() + // ... start using the iterator again + whsafe_iter_seek(iter, key2); + // ... other iter operations + whsafe_iter_destroy(iter); + // ... do something + // must destroy iterators before unref() + wormhole_unref(ref); +} +``` + +### The `wormhole` API +Similar to `whsafe`, `wormhole` is also thread safe. It's often faster than `whsafe` but requires extra caution when using it. + +An example of using point-query operations using the `wormhole` API. + +```C +{ + wh = wormhole_create(NULL); // use NULL here unless you want to change the allocator. + ref = wormhole_ref(wh); + for (...) { + wormhole_put(ref, ...); + wormhole_get(ref, ...); + wormhole_del(ref, ...); + ... // other safe operations + } + ... // other safe operations + wormhole_unref(ref); + wormhole_destroy(wh); +} +``` + +An example of range-query operations: + +```C +{ + ref = wormhole_ref(wh); + // ... assume we already have a valid ref + iter = wormhole_iter_create(ref); + for (...) { + wormhole_iter_seek(iter, key); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 1); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 3); + wormhole_iter_inp(iter, uf, priv); + // other iter operations + } + // An active iterator is likely holding a lock. + wormhole_iter_park(iter); // Release resources to avoid blocking other threads + while (condition not met) { // See below for explanation + wormhole_refresh_qstate(ref); + } + // ... start using the iterator again + wormhole_iter_seek(iter, key2); + // ... other iter operations + wormhole_iter_destroy(iter); + // ... do something + // must destroy iterators before unref() + wormhole_unref(ref); +} +``` + +### Avoid blocking writers when using the `wormhole` API +Wormhole internally uses QSBR RCU to synchronize readers/writers so every holder of a reference (`ref`) +needs to actively perform index operations. +An ref-holder, if not actively performing index operations, may block a writer thread that is performing split/merge operations. +(because of not periodically announcing its quiescent state). +If a ref-holder is about to become inactive from Wormhole's perspective (doing something else or just sleeping), +it is recommended that the holder temporarily releases the `ref` before entering the inactive status (such as calling `sleep(10)`), +and reactivate the `ref` before performing the next index operation. + +```C +{ + // assume we already have an active ref + wormhole_park(ref); // this will avoid blocking any other threads + sleep(10); + wormhole_resume(ref); // this will reactivate the ref + // continue to perform index operations +} +``` + +A common scenario of dead-locking is acquiring locks with an active wormhole reference, +The following example could cause deadlock between two threads. + +```C +// Thread A has an active ref and try to lock() +{ + struct wormref * ref = wormhole_ref(wh); + lock(just_a_lock); // << block here forever +} + +// Thread B already acquired the lock and wants to insert a key to wh +{ + lock(just_a_lock); + wormhole_put(ref, kv); << block here forever +} +``` + +To avoid this scenario, thread A should either call `wormhole_park(ref)` before acquiring the lock, or keep updating the qstate of the ref: +```C +// Solution A.1: use wormhole_park() +{ + struct wormref * ref = wormhole_ref(wh); + wormhole_park(ref); + lock(just_a_lock); + wormhole_resume(ref); // can use ref afterward +} + +// Solution A.2: use try_lock and wormhole_refresh_qstate() +{ + struct wormref * ref = wormhole_ref(wh); + while (!try_lock(just_a_lock)) { + wormhole_refresh_qstate(ref); + } + // continue to use ref +} +``` + +The above issues with QSBR are specific to the `wormhole` API. `whsafe` does not have these issues. + +### The `whunsafe` API +A set of *thread-unsafe* functions are also provided. See the functions with _prefix_ `whunsafe`. +The thread-unsafe functions don't use the reference (_wormref_). +Simply feed them with the pointer to the wormhole index: + +```C +{ + wh = whunsafe_create(NULL); + for (...) { + whunsafe_put(wh, ...); + whunsafe_get(wh, ...); + whunsafe_del(wh, ...); + ... // other unsafe operations + } + ... // other unsafe operations + wormhole_destroy(wh); +} +``` + +### In-place update with user-defined function +`wormhole_inp` executes a user-defined function on an existing key-value item. +If the key does not exist, a NULL pointer will be passed to the user-defined function. +A simple example would be incrementing a counter stored in a key-value pair. + +```C +{ + // user-defined in-place update function + void myadd1(struct kv * kv, void * priv) { + if (kv != NULL) { + assert(kv->vlen >= sizeof(u64)); + u64 * pvalue = kv_vptr(kv); + (*pvalue)++; + } + } + + // create the counter + u64 zero = 0; + struct kv * tmp = kv_create("counter", 7, &zero, 8); // malloc-ed + wormhole_put(ref, tmp); + + // perform +1 on the stored value + struct kref kref = kv_ref(tmp); // create a kref of tmp + wormhole_inp(ref, &kref, myadd1, NULL); +} +``` + +Note that the user-defined function should ONLY change the value's content, and nothing else. +Otherwise, the index can be corrupted. +A similar mechanism is also provided for iterators (`wormhole_iter_inp`). + +The inplace function can also be used to retrieve key-value data. For example: + +```C +{ + void inplace_getu64(struct kv * kv, void * priv) { + if (kv != NULL) { + assert(kv->vlen >= sizeof(u64)); + u64 * pvalue = kv_vptr(kv); + *(u64 *)priv = *pvalue; + } else { + *(u64 *)priv = 0; + } + } + ... + struct kref kref = ... + u64 val; + wormhole_inp(ref, &kref, inplace_getu64, &val); +} +``` + +### `merge`: atomic Read-Modify-Write +The `wormhole_merge` and `whsafe_merge` functions perform atomic Read-Modify-Write (RMW) operations. +In a RMW operation, if the search key is found, the KV pair will be passed to a user-defined callback function `uf` (short for user function). +Otherwise, a NULL pointer is passed to `uf`. +`uf` could update the KV in-place if it does not require any memory reallocation. +In such a case, `uf` should return the KV's pointer back and the merge function will do nothing else. +If `uf` want to replace the KV with something new, it should return a pointer that is different than the original KV pointer. +The `uf` should not make memory allocation by itself. +Instead, the `merge` function will copy the returned KV and replace the existing KV with the newly created one. +`uf` should not return NULL unless the key was not found. + +### Iterator +The `wormhole_iter_{seek,peek,skip,next,inp}` functions provide range-search functionalities. +If the search key does not exist, the `seek` operation will put the cursor on the item that is greater than the search-key. +`next` will return the item under the current cursor and move the cursor forward. +`peek` is similar but does not move the cursor. For example, with keys `{1,3,5}`, `seek(2); r = next()` will see `r == 3`. + +Currently Wormhole does not provide `seek_for_less_equal()` and `prev()` for backward scanning. This feature will be added in the future. + +# Memory management + +By default, Wormhole manages all the key-value data internally and only copies to or from a user-supplied +buffer (a `struct kv` object). +This draws a clear boundary in the memory space between the index structure and its users. +After a call to any of the index operations, the caller can immediately free +the buffer holding the key-reference or the key-value data. +This also allows users to use stack-allocated variables to interact with Wormhole. + +The memory manager of the internal key-value objects can be customized when creating a new Wormhole (see `wormhole_create`). +The customization will _only_ affect the internal `struct kv` objects. +Actually, the memory manager can be configured to directly use the caller's `struct kv` object and store it in Wormhole. +This `struct kvmap_mm` structure shows an example: + +```C +{ + const struct kvmap_mm kvmap_mm_ualloc { + .in = kvmap_mm_in_noop, // in wormhole_put(), store caller's kv in wh + .out = kvmap_mm_out_dup, // but still make a copy in wormhole_get() + .free = kvmap_mm_free_free, // call free() for delete/update + }; + ... + struct wormhole * wh = wormhole_create(&kvmap_mm_ualloc); + struct wormref * ref = wormhole_ref(wh); + ... + struct kv * newkv = malloc(size); + ... + wormhole_put(ref, newkv); + // Don't free newkv! it's now managed by wh +} +``` + +Each of the in/out/free functions can be freely customized. +A few `kvmap_mm_*` functions are already provided for common scenarios. +`kvmap_mm_ndf` is identical to the `kvmap_mm_ualloc` structure in the above example. + +## Hugepages +Wormhole uses hugepages when available. To reserve some hugepages in Linux (10000 * 2MB): + + # echo 10000 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + +# Tuning + +A few macros in `wh.c` can be tuned. + +* `WH_SLABLEAF_SIZE` controls the slab size for leaf node allocation. +The default is `((1lu << 21))` (2MB slabs). If 1GB hugepages are available, `WH_SLABLEAF_SIZE` can be set to `((1lu << 30))` to utilize 1GB hugepages. +Using 1GB hugepages can improve search performance on a large dataset. + +* `WH_KPN` controls "Keys Per (leaf-)Node". The default value is 128. +Compared to the default, `WH_KPN=256` can offer 5-10%+ higher point query and update speed. +However, range queries prefer a smaller node size such as 64. + + +* `QSBR_STATES_NR` and `QSBR_SHARDS_NR` control the capacity (number of active references) of the QSBR RCU. +The product of the two values is the capacity. For efficiency, `QSBR_STATES_NR` can be set to 23, 39, and 55, and `QSBR_SHARDS_NR` must be 2^n, n<=6. +The defaults are 23 and 32, respectively. The QSBR registry can run out of space if there are a few hundred of threads, which is not a problem in practice. + +# Limitations + +## Key Patterns +A **split** operation will fail when **129** (`WH_KPN + 1`) keys share a common prefix of 65535+ bytes. +In Wormhole, the maximum _anchor-key_ length is 65535 (2^16) bytes, which is shorter than the maximum key-length (2^32). + +## Memory Allocation +Insertions/updates can fail and return false when a memory allocation fails. +On memory-allocation failure, the hash-table expansion function will block and wait for available memory. + +# Performance +Some benchmarking results with some real-world datasets: See [this](https://github.com/wuxb45/wormhole/issues/5) page for more information. + +![Concurrent GET](https://user-images.githubusercontent.com/564235/112712778-704d7200-8e9f-11eb-9f4d-795de46772d1.png) diff --git a/MassTrie-beta/wormhole/README.txt b/MassTrie-beta/wormhole/README.txt new file mode 100644 index 00000000..e70108ef --- /dev/null +++ b/MassTrie-beta/wormhole/README.txt @@ -0,0 +1,31 @@ +To setup the project: + +If you're not already in the folder 'wormhole', preform: + +1. cd wormhole + +Once you're there, set the variable LD_LIBRARY_PATH to the +current working directory using: + +2. setenv LD_LIBRARY_PATH `pwd` + +You can check (optionally) that this operation was exceuted properly using: + +3. echo $LD_LIBRARY_PATH + + +Then, do: + +4. cd sto + +5. /./bootstrap.sh + +6. ./configure + +To run the test file do: + +7. make unit-testMTrie + +Then run it using: + +8. ./unit-test_MTrie diff --git a/MassTrie-beta/wormhole/concbench.c b/MassTrie-beta/wormhole/concbench.c new file mode 100644 index 00000000..f18abde9 --- /dev/null +++ b/MassTrie-beta/wormhole/concbench.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2018-2019 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "lib.h" +#include "kv.h" +#include "wh.h" + +atomic_uint_least64_t __seqno = 0; +u64 __nth = 0; +struct kv ** __samples = NULL; +u64 __nkeys = 0; +atomic_uint_least64_t __tot = 0; +u64 __endtime = 0; + + static void * +kv_load_worker(struct wormhole * const wh) +{ + srandom_u64(time_nsec() * time_nsec()); + struct wormref * const ref = wormhole_ref(wh); + const u64 seq = atomic_fetch_add(&__seqno, 1); + const u64 n0 = __nkeys / __nth * seq; + const u64 nz = (seq == (__nth - 1)) ? __nkeys : (__nkeys / __nth * (seq + 1)); + printf("load worker %lu %lu\n", n0, nz); + for (u64 i = n0; i < nz; i++) + wormhole_put(ref, __samples[i]); + wormhole_unref(ref); + return NULL; +} + + static void * +kv_probe_worker(struct wormhole * const wh) +{ + struct wormref * const ref = wormhole_ref(wh); + struct kv * next = __samples[random_u64() % __nkeys]; + u64 rnext = random_u64() % __nkeys; + u64 count = 0; + u64 succ = 0; +#define BATCH ((10000)) + do { + for (u64 i = 0; i < BATCH; i++) { + // reading kv samples leads to unnecessary cache misses + // use prefetch to minimize overhead on workload generation + struct kv * const key = next; + next = __samples[rnext]; + __builtin_prefetch(next, 0, 0); + __builtin_prefetch(((u8 *)next) + 64, 0, 0); + rnext = random_u64() % __nkeys; + __builtin_prefetch(&(__samples[rnext])); + + // do probe + // customize your benchmark: do a mix of wh operations with switch-cases + const struct kref kref = kv_kref(key); + if (wormhole_probe(ref, &kref)) + succ++; + } + count += BATCH; + } while (time_nsec() < __endtime); + if (count != succ) + printf("count %lu success %lu\n", count, succ); + (void)atomic_fetch_add(&__tot, count); + wormhole_unref(ref); + return NULL; +} + + int +main(int argc, char ** argv) +{ + if (argc < 3) { + printf("usage: <#keys> <#threads>\n"); + printf(" Get words.txt: wget https://github.com/dwyl/english-words/raw/master/words.txt\n"); + printf(" Example: %s words.txt 1000000 4\n", argv[0]); + printf(" Better to use only one numa node with numactl -N 0\n"); + printf(" Better to run X thread on X cores\n"); + return 0; + } + + char ** const words = malloc(sizeof(char *) * 1000000); // or `wc -l words.txt` + u64 nr_words = 0; + char * buf = malloc(8192); + size_t bufsize = 8192; + FILE * const fwords = fopen(argv[1], "r"); + if (fwords == NULL) { + printf("open words file failed\n"); + return 0; + } + + // read all words to words + while (getline(&buf, &bufsize, fwords) > 0) { + buf[strlen(buf)-1] = '\0'; + words[nr_words] = strdup(buf); + nr_words++; + } + fclose(fwords); + + // generate keys + const u64 nkeys = strtoull(argv[2], NULL, 10); + struct kv ** const samples = malloc(sizeof(struct kv *) * nkeys); + char * ss[6]; + for (u64 i = 0; i < nkeys; i++) { + for (u64 j = 0; j < 6; j++) + ss[j] = words[random() % nr_words]; + sprintf(buf, "%s %s %s %s %s %s!", ss[0], ss[1], ss[2], ss[3], ss[4], ss[5]); + samples[i] = kv_create_str(buf, NULL, 0); + } + // free words & buf + for (u64 i = 0; i < nr_words; i++) + free(words[i]); + free(words); + free(buf); + + // load (4) + __samples = samples; + __nkeys = nkeys; + struct wormhole * const wh = wormhole_create(NULL); + __nth = 4; + const u64 dtl = thread_fork_join(4, (void *)kv_load_worker, false, (void *)wh); + printf("load x4 %.2lf mops\n", ((double)nkeys) * 1e3 / ((double)dtl)); + + const u64 nth = strtoull(argv[3], NULL, 10); + printf("probe with %lu threads. each round takes 3 seconds\n", nth); + for (u64 i = 0; i < 3; i++) { + __tot = 0; + __endtime = time_nsec() + 3e9; // 3 sec + const u64 dt = thread_fork_join(nth, (void *)kv_probe_worker, false, (void *)wh); + const double mops = ((double)__tot) * 1e3 / ((double)dt); + printf("probe x%lu %.2lf mops\n", nth, mops); + sleep(1); + } + + // final clean up for valgrind + for (u64 i = 0; i < nkeys; i++) + free(samples[i]); + free(samples); + wormhole_destroy(wh); + return 0; +} diff --git a/MassTrie-beta/wormhole/concbench.out b/MassTrie-beta/wormhole/concbench.out new file mode 100644 index 00000000..ee87ca31 Binary files /dev/null and b/MassTrie-beta/wormhole/concbench.out differ diff --git a/MassTrie-beta/wormhole/ctypes.h b/MassTrie-beta/wormhole/ctypes.h new file mode 100644 index 00000000..314ca5dc --- /dev/null +++ b/MassTrie-beta/wormhole/ctypes.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +// C types only; C++ source code don't use this + +#include +#include + +/* C11 atomic types */ +typedef atomic_bool abool; + +typedef atomic_uchar au8; +typedef atomic_ushort au16; +typedef atomic_uint au32; +typedef atomic_ulong au64; +static_assert(sizeof(au8) == 1, "sizeof(au8)"); +static_assert(sizeof(au16) == 2, "sizeof(au16)"); +static_assert(sizeof(au32) == 4, "sizeof(au32)"); +static_assert(sizeof(au64) == 8, "sizeof(au64)"); + +typedef atomic_char as8; +typedef atomic_short as16; +typedef atomic_int as32; +typedef atomic_long as64; +static_assert(sizeof(as8) == 1, "sizeof(as8)"); +static_assert(sizeof(as16) == 2, "sizeof(as16)"); +static_assert(sizeof(as32) == 4, "sizeof(as32)"); +static_assert(sizeof(as64) == 8, "sizeof(as64)"); + +// shorten long names +#define MO_RELAXED memory_order_relaxed +#define MO_CONSUME memory_order_consume +#define MO_ACQUIRE memory_order_acquire +#define MO_RELEASE memory_order_release +#define MO_ACQ_REL memory_order_acq_rel +#define MO_SEQ_CST memory_order_seq_cst diff --git a/MassTrie-beta/wormhole/easydemo.c b/MassTrie-beta/wormhole/easydemo.c new file mode 100644 index 00000000..f095a6ac --- /dev/null +++ b/MassTrie-beta/wormhole/easydemo.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE +#include + +#include "lib.h" +#include "kv.h" +#include "wh.h" + + int +main(int argc, char ** argv) +{ + (void)argc; + (void)argv; + struct wormhole * const wh = wh_create(); + struct wormref * const ref = wh_ref(wh); + + bool r; + + r = wh_put(ref, "wormhole", 8, "easy", 4); + printf("wh_put wormhole easy %c\n", r?'T':'F'); + + r = wh_put(ref, "time_travel", 11, "impossible", 10); + printf("wh_put time_travel impossible %c\n", r?'T':'F'); + + r = wh_del(ref, "time_travel", 11); + printf("wh_del time_travel %c\n", r?'T':'F'); + + r = wh_probe(ref, "time_travel", 11); + printf("wh_probe time_travel %c\n", r?'T':'F'); + + u32 klen_out = 0; + char kbuf_out[8] = {}; + u32 vlen_out = 0; + char vbuf_out[8] = {}; + r = wh_get(ref, "wormhole", 8, vbuf_out, 8, &vlen_out); + printf("wh_get wormhole %c %u %.*s\n", r?'T':'F', vlen_out, vlen_out, vbuf_out); + + // in a concurrent environment, the kvmap_api_wormhole need park&resume when a thread is about to go idle + // don't need park&resume if you're using the default kvmap_api_whsafe in whwh.c! + wh_park(ref); + usleep(10); + wh_resume(ref); + + // prepare a few keys for range ops + wh_put(ref, "00", 2, "0_value", 7); + wh_put(ref, "11", 2, "1_value", 7); + wh_put(ref, "22", 2, "2_value", 7); + + struct wormhole_iter * const iter = wh_iter_create(ref); + + wh_iter_seek(iter, NULL, 0); // seek to the head + printf("wh_iter_seek \"\"\n"); + while (wh_iter_valid(iter)) { + r = wh_iter_peek(iter, kbuf_out, 8, &klen_out, vbuf_out, 8, &vlen_out); + if (r) { + printf("wh_iter_peek klen=%u key=%.*s vlen=%u value=%.*s\n", + klen_out, klen_out, kbuf_out, vlen_out, vlen_out, vbuf_out); + } else { + printf("ERROR!\n"); + } + wh_iter_skip1(iter); + } + + // call iter_park if you will go idle but want to use the iter later + // don't need to call iter_park if you're actively using iter + wh_iter_park(iter); + usleep(10); + + wh_iter_seek(iter, "0", 1); + printf("wh_iter_seek \"0\"\n"); + // this time we don't want to copy the value + r = wh_iter_peek(iter, kbuf_out, 8, &klen_out, NULL, 0, NULL); + if (r){ + printf("wh_iter_peek klen=%u key=%.*s\n", klen_out, klen_out, kbuf_out); + } else { + printf("ERROR: iter_peek failed\n"); + } + + wh_iter_destroy(iter); + // there must be no active iter when calling unref() + wh_unref(ref); + + // unsafe operations: should have released all references + wh_clean(wh); // just for demonstration + wh_destroy(wh); // destroy also calls clean interally + return 0; +} diff --git a/MassTrie-beta/wormhole/easydemo.out b/MassTrie-beta/wormhole/easydemo.out new file mode 100644 index 00000000..32521210 Binary files /dev/null and b/MassTrie-beta/wormhole/easydemo.out differ diff --git a/MassTrie-beta/wormhole/kv.c b/MassTrie-beta/wormhole/kv.c new file mode 100644 index 00000000..a1720e88 --- /dev/null +++ b/MassTrie-beta/wormhole/kv.c @@ -0,0 +1,1131 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include // static_assert +#include +#include "lib.h" +#include "ctypes.h" +#include "kv.h" +// }}} headers + +// crc32c {{{ + inline u32 +kv_crc32c(const void * const ptr, u32 len) +{ + return crc32c_inc((const u8 *)ptr, len, KV_CRC32C_SEED); +} + + inline u64 +kv_crc32c_extend(const u32 lo) +{ + const u64 hi = (u64)(~lo); + return (hi << 32) | ((u64)lo); +} +// }}} crc32c + +// kv {{{ + +// size {{{ + inline size_t +kv_size(const struct kv * const kv) +{ + return sizeof(*kv) + kv->klen + kv->vlen; +} + + inline size_t +kv_size_align(const struct kv * const kv, const u64 align) +{ + debug_assert(align && ((align & (align - 1)) == 0)); + return (sizeof(*kv) + kv->klen + kv->vlen + (align - 1)) & (~(align - 1)); +} + + inline size_t +key_size(const struct kv *const key) +{ + return sizeof(*key) + key->klen; +} + + inline size_t +key_size_align(const struct kv *const key, const u64 align) +{ + debug_assert(align && ((align & (align - 1)) == 0)); + return (sizeof(*key) + key->klen + (align - 1)) & (~(align - 1)); +} +// }}} size + +// construct {{{ + inline void +kv_update_hash(struct kv * const kv) +{ + const u32 lo = kv_crc32c((const void *)kv->kv, kv->klen); + kv->hash = kv_crc32c_extend(lo); +} + + inline void +kv_refill_value(struct kv * const kv, const void * const value, const u32 vlen) +{ + debug_assert((vlen == 0) || value); + memcpy(&(kv->kv[kv->klen]), value, vlen); + kv->vlen = vlen; +} + + inline void +kv_refill(struct kv * const kv, const void * const key, const u32 klen, + const void * const value, const u32 vlen) +{ + debug_assert(kv); + kv->klen = klen; + memcpy(&(kv->kv[0]), key, klen); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_str(struct kv * const kv, const char * const key, + const void * const value, const u32 vlen) +{ + kv_refill(kv, key, (u32)strlen(key), value, vlen); +} + + inline void +kv_refill_str_str(struct kv * const kv, const char * const key, + const char * const value) +{ + kv_refill(kv, key, (u32)strlen(key), value, (u32)strlen(value)); +} + +// the u64 key is filled in big-endian byte order for correct ordering + inline void +kv_refill_u64(struct kv * const kv, const u64 key, const void * const value, const u32 vlen) +{ + kv->klen = sizeof(u64); + *(u64 *)(kv->kv) = __builtin_bswap64(key); // bswap on little endian + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex32(struct kv * const kv, const u32 hex, const void * const value, const u32 vlen) +{ + kv->klen = 8; + strhex_32(kv->kv, hex); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex64(struct kv * const kv, const u64 hex, const void * const value, const u32 vlen) +{ + kv->klen = 16; + strhex_64(kv->kv, hex); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex64_klen(struct kv * const kv, const u64 hex, + const u32 klen, const void * const value, const u32 vlen) +{ + strhex_64(kv->kv, hex); + if (klen > 16) { + kv->klen = klen; + memset(kv->kv + 16, '!', klen - 16); + } else { + kv->klen = 16; + } + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_kref(struct kv * const kv, const struct kref * const kref) +{ + kv->klen = kref->len; + kv->vlen = 0; + kv->hash = kv_crc32c_extend(kref->hash32); + memmove(kv->kv, kref->ptr, kref->len); +} + + inline void +kv_refill_kref_v(struct kv * const kv, const struct kref * const kref, + const void * const value, const u32 vlen) +{ + kv->klen = kref->len; + kv->vlen = vlen; + kv->hash = kv_crc32c_extend(kref->hash32); + memmove(kv->kv, kref->ptr, kref->len); + memcpy(kv->kv + kv->klen, value, vlen); +} + + inline struct kref +kv_kref(const struct kv * const key) +{ + return (struct kref){.ptr = key->kv, .len = key->klen, .hash32 = key->hashlo}; +} + + inline struct kv * +kv_create(const void * const key, const u32 klen, const void * const value, const u32 vlen) +{ + struct kv * const kv = malloc(sizeof(*kv) + klen + vlen); + if (kv) + kv_refill(kv, key, klen, value, vlen); + return kv; +} + + inline struct kv * +kv_create_str(const char * const key, const void * const value, const u32 vlen) +{ + return kv_create(key, (u32)strlen(key), value, vlen); +} + + inline struct kv * +kv_create_str_str(const char * const key, const char * const value) +{ + return kv_create(key, (u32)strlen(key), value, (u32)strlen(value)); +} + + inline struct kv * +kv_create_kref(const struct kref * const kref, const void * const value, const u32 vlen) +{ + return kv_create(kref->ptr, kref->len, value, vlen); +} + +static struct kv __kv_null = {}; + +__attribute__((constructor)) + static void +kv_null_init(void) +{ + kv_update_hash(&__kv_null); +} + + inline const struct kv * +kv_null(void) +{ + return &__kv_null; +} +// }}} construct + +// dup {{{ + inline struct kv * +kv_dup(const struct kv * const kv) +{ + if (kv == NULL) + return NULL; + + const size_t sz = kv_size(kv); + struct kv * const new = malloc(sz); + if (new) + memcpy(new, kv, sz); + return new; +} + + inline struct kv * +kv_dup_key(const struct kv * const kv) +{ + if (kv == NULL) + return NULL; + + const size_t sz = key_size(kv); + struct kv * const new = malloc(sz); + if (new) { + memcpy(new, kv, sz); + new->vlen = 0; + } + return new; +} + + inline struct kv * +kv_dup2(const struct kv * const from, struct kv * const to) +{ + if (from == NULL) + return NULL; + const size_t sz = kv_size(from); + struct kv * const new = to ? to : malloc(sz); + if (new) + memcpy(new, from, sz); + return new; +} + + inline struct kv * +kv_dup2_key(const struct kv * const from, struct kv * const to) +{ + if (from == NULL) + return NULL; + const size_t sz = key_size(from); + struct kv * const new = to ? to : malloc(sz); + if (new) { + memcpy(new, from, sz); + new->vlen = 0; + } + return new; +} + + inline struct kv * +kv_dup2_key_prefix(const struct kv * const from, struct kv * const to, const u32 plen) +{ + if (from == NULL) + return NULL; + debug_assert(plen <= from->klen); + const size_t sz = key_size(from) - from->klen + plen; + struct kv * const new = to ? to : malloc(sz); + if (new) { + new->klen = plen; + memcpy(new->kv, from->kv, plen); + new->vlen = 0; + kv_update_hash(new); + } + return new; +} +// }}} dup + +// compare {{{ + static inline int +klen_compare(const u32 len1, const u32 len2) +{ + if (len1 < len2) + return -1; + else if (len1 > len2) + return 1; + else + return 0; +} + +// compare whether the two keys are identical +// optimistic: do not check hash + inline bool +kv_match(const struct kv * const key1, const struct kv * const key2) +{ + //cpu_prefetch0(((u8 *)key2) + 64); + //return (key1->hash == key2->hash) + // && (key1->klen == key2->klen) + // && (!memcmp(key1->kv, key2->kv, key1->klen)); + return (key1->klen == key2->klen) && (!memcmp(key1->kv, key2->kv, key1->klen)); +} + +// compare whether the two keys are identical +// check hash first +// pessimistic: return false quickly if their hashes mismatch + inline bool +kv_match_hash(const struct kv * const key1, const struct kv * const key2) +{ + return (key1->hash == key2->hash) + && (key1->klen == key2->klen) + && (!memcmp(key1->kv, key2->kv, key1->klen)); +} + + inline bool +kv_match_full(const struct kv * const kv1, const struct kv * const kv2) +{ + return (kv1->kvlen == kv2->kvlen) + && (!memcmp(kv1, kv2, sizeof(*kv1) + kv1->klen + kv1->vlen)); +} + + bool +kv_match_kv128(const struct kv * const sk, const u8 * const kv128) +{ + debug_assert(sk); + debug_assert(kv128); + + u32 klen128 = 0; + u32 vlen128 = 0; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(kv128, &klen128), &vlen128); + (void)vlen128; + return (sk->klen == klen128) && (!memcmp(sk->kv, pdata, klen128)); +} + + inline int +kv_compare(const struct kv * const kv1, const struct kv * const kv2) +{ + const u32 len = kv1->klen < kv2->klen ? kv1->klen : kv2->klen; + const int cmp = memcmp(kv1->kv, kv2->kv, (size_t)len); + return cmp ? cmp : klen_compare(kv1->klen, kv2->klen); +} + +// for qsort and bsearch + static int +kv_compare_ptrs(const void * const p1, const void * const p2) +{ + const struct kv * const * const pp1 = (typeof(pp1))p1; + const struct kv * const * const pp2 = (typeof(pp2))p2; + return kv_compare(*pp1, *pp2); +} + + int +kv_k128_compare(const struct kv * const sk, const u8 * const k128) +{ + debug_assert(sk); + const u32 klen1 = sk->klen; + u32 klen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(k128, &klen2); + debug_assert(ptr2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->kv, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + + int +kv_kv128_compare(const struct kv * const sk, const u8 * const kv128) +{ + debug_assert(sk); + const u32 klen1 = sk->klen; + u32 klen2 = 0; + u32 vlen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(vi128_decode_u32(kv128, &klen2), &vlen2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->kv, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + + inline void +kv_qsort(struct kv ** const kvs, const size_t nr) +{ + qsort(kvs, nr, sizeof(kvs[0]), kv_compare_ptrs); +} + +// return the length of longest common prefix of the two keys + inline u32 +kv_key_lcp(const struct kv * const key1, const struct kv * const key2) +{ + const u32 max = (key1->klen < key2->klen) ? key1->klen : key2->klen; + return memlcp(key1->kv, key2->kv, max); +} + +// return the length of longest common prefix of the two keys with a known lcp0 + inline u32 +kv_key_lcp_skip(const struct kv * const key1, const struct kv * const key2, const u32 lcp0) +{ + const u32 max = (key1->klen < key2->klen) ? key1->klen : key2->klen; + debug_assert(max >= lcp0); + return lcp0 + memlcp(key1->kv+lcp0, key2->kv+lcp0, max-lcp0); +} +// }}} + +// psort {{{ + static inline void +kv_psort_exchange(struct kv ** const kvs, const u64 i, const u64 j) +{ + if (i != j) { + struct kv * const tmp = kvs[i]; + kvs[i] = kvs[j]; + kvs[j] = tmp; + } +} + + static u64 +kv_psort_partition(struct kv ** const kvs, const u64 lo, const u64 hi) +{ + if (lo >= hi) + return lo; + + const u64 p = (lo+hi) >> 1; + kv_psort_exchange(kvs, lo, p); + u64 i = lo; + u64 j = hi + 1; + do { + while (kv_compare(kvs[++i], kvs[lo]) < 0 && i < hi); + while (kv_compare(kvs[--j], kvs[lo]) > 0); + if (i >= j) + break; + kv_psort_exchange(kvs, i, j); + } while (true); + kv_psort_exchange(kvs, lo, j); + return j; +} + + static void +kv_psort_rec(struct kv ** const kvs, const u64 lo, const u64 hi, const u64 tlo, const u64 thi) +{ + if (lo >= hi) + return; + const u64 c = kv_psort_partition(kvs, lo, hi); + + if (c > tlo) // go left + kv_psort_rec(kvs, lo, c-1, tlo, thi); + + if (c < thi) // go right + kv_psort_rec(kvs, c+1, hi, tlo, thi); +} + + inline void +kv_psort(struct kv ** const kvs, const u64 nr, const u64 tlo, const u64 thi) +{ + debug_assert(tlo <= thi); + debug_assert(thi < nr); + kv_psort_rec(kvs, 0, nr-1, tlo, thi); +} +// }}} psort + +// ptr {{{ + inline void * +kv_vptr(struct kv * const kv) +{ + return (void *)(&(kv->kv[kv->klen])); +} + + inline void * +kv_kptr(struct kv * const kv) +{ + return (void *)(&(kv->kv[0])); +} + + inline const void * +kv_vptr_c(const struct kv * const kv) +{ + return (const void *)(&(kv->kv[kv->klen])); +} + + inline const void * +kv_kptr_c(const struct kv * const kv) +{ + return (const void *)(&(kv->kv[0])); +} +// }}} ptr + +// print {{{ +// cmd "KV" K and V can be 's': string, 'x': hex, 'd': dec, or else for not printing. +// n for newline after kv + void +kv_print(const struct kv * const kv, const char * const cmd, FILE * const out) +{ + debug_assert(cmd); + const u32 klen = kv->klen; + fprintf(out, "#%016lx k[%3u]", kv->hash, klen); + + switch(cmd[0]) { + case 's': fprintf(out, " %.*s", klen, kv->kv); break; + case 'x': str_print_hex(out, kv->kv, klen); break; + case 'd': str_print_dec(out, kv->kv, klen); break; + default: break; + } + + const u32 vlen = kv->vlen; + switch (cmd[1]) { + case 's': fprintf(out, " v[%4u] %.*s", vlen, vlen, kv->kv+klen); break; + case 'x': fprintf(out, " v[%4u]", vlen); str_print_hex(out, kv->kv+klen, vlen); break; + case 'd': fprintf(out, " v[%4u]", vlen); str_print_dec(out, kv->kv+klen, vlen); break; + default: break; + } + if (strchr(cmd, 'n')) + fprintf(out, "\n"); +} +// }}} print + +// mm {{{ + struct kv * +kvmap_mm_in_noop(struct kv * const kv, void * const priv) +{ + (void)priv; + return kv; +} + +// copy-out + struct kv * +kvmap_mm_out_noop(struct kv * const kv, struct kv * const out) +{ + (void)out; + return kv; +} + + void +kvmap_mm_free_noop(struct kv * const kv, void * const priv) +{ + (void)kv; + (void)priv; +} + +// copy-in + struct kv * +kvmap_mm_in_dup(struct kv * const kv, void * const priv) +{ + (void)priv; + return kv_dup(kv); +} + +// copy-out + struct kv * +kvmap_mm_out_dup(struct kv * const kv, struct kv * const out) +{ + return kv_dup2(kv, out); +} + + void +kvmap_mm_free_free(struct kv * const kv, void * const priv) +{ + (void)priv; + free(kv); +} + +const struct kvmap_mm kvmap_mm_dup = { + .in = kvmap_mm_in_dup, + .out = kvmap_mm_out_dup, + .free = kvmap_mm_free_free, + .priv = NULL, +}; + +const struct kvmap_mm kvmap_mm_ndf = { + .in = kvmap_mm_in_noop, + .out = kvmap_mm_out_dup, + .free = kvmap_mm_free_free, + .priv = NULL, +}; + +// }}} mm + +// kref {{{ + inline void +kref_ref_raw(struct kref * const kref, const u8 * const ptr, const u32 len) +{ + kref->ptr = ptr; + kref->len = len; + kref->hash32 = 0; +} + + inline void +kref_ref_hash32(struct kref * const kref, const u8 * const ptr, const u32 len) +{ + kref->ptr = ptr; + kref->len = len; + kref->hash32 = kv_crc32c(ptr, len); +} + + inline void +kref_update_hash32(struct kref * const kref) +{ + kref->hash32 = kv_crc32c(kref->ptr, kref->len); +} + + inline void +kref_ref_kv(struct kref * const kref, const struct kv * const kv) +{ + kref->ptr = kv->kv; + kref->len = kv->klen; + kref->hash32 = kv->hashlo; +} + + inline void +kref_ref_kv_hash32(struct kref * const kref, const struct kv * const kv) +{ + kref->ptr = kv->kv; + kref->len = kv->klen; + kref->hash32 = kv_crc32c(kv->kv, kv->klen); +} + + inline bool +kref_match(const struct kref * const k1, const struct kref * const k2) +{ + return (k1->len == k2->len) && (!memcmp(k1->ptr, k2->ptr, k1->len)); +} + +// match a kref and a key + inline bool +kref_kv_match(const struct kref * const kref, const struct kv * const k) +{ + return (kref->len == k->klen) && (!memcmp(kref->ptr, k->kv, kref->len)); +} + + inline int +kref_compare(const struct kref * const kref1, const struct kref * const kref2) +{ + const u32 len = kref1->len < kref2->len ? kref1->len : kref2->len; + const int cmp = memcmp(kref1->ptr, kref2->ptr, (size_t)len); + return cmp ? cmp : klen_compare(kref1->len, kref2->len); +} + +// compare a kref and a key + inline int +kref_kv_compare(const struct kref * const kref, const struct kv * const k) +{ + debug_assert(kref); + debug_assert(k); + const u32 len = kref->len < k->klen ? kref->len : k->klen; + const int cmp = memcmp(kref->ptr, k->kv, (size_t)len); + return cmp ? cmp : klen_compare(kref->len, k->klen); +} + + inline u32 +kref_lcp(const struct kref * const k1, const struct kref * const k2) +{ + const u32 max = (k1->len < k2->len) ? k1->len : k2->len; + return memlcp(k1->ptr, k2->ptr, max); +} + + inline u32 +kref_kv_lcp(const struct kref * const kref, const struct kv * const kv) +{ + const u32 max = (kref->len < kv->klen) ? kref->len : kv->klen; + return memlcp(kref->ptr, kv->kv, max); +} + +// klen, key, ... + inline int +kref_k128_compare(const struct kref * const sk, const u8 * const k128) +{ + debug_assert(sk); + const u32 klen1 = sk->len; + u32 klen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(k128, &klen2); + debug_assert(ptr2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->ptr, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + +// klen, vlen, key, ... + inline int +kref_kv128_compare(const struct kref * const sk, const u8 * const kv128) +{ + debug_assert(sk); + const u32 klen1 = sk->len; + u32 klen2 = 0; + u32 vlen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(vi128_decode_u32(kv128, &klen2), &vlen2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->ptr, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + +static struct kref __kref_null = {.hash32 = KV_CRC32C_SEED}; + + inline const struct kref * +kref_null(void) +{ + return &__kref_null; +} +// }}} kref + +// kvref {{{ + inline void +kvref_ref_kv(struct kvref * const ref, struct kv * const kv) +{ + ref->kptr = kv->kv; + ref->vptr = kv->kv + kv->klen; + ref->hdr = *kv; +} + + struct kv * +kvref_dup2_kv(struct kvref * const ref, struct kv * const to) +{ + if (ref == NULL) + return NULL; + const size_t sz = sizeof(*to) + ref->hdr.klen + ref->hdr.vlen; + struct kv * const new = to ? to : malloc(sz); + if (new == NULL) + return NULL; + + *new = ref->hdr; + memcpy(new->kv, ref->kptr, new->klen); + memcpy(new->kv + new->klen, ref->vptr, new->vlen); + return new; +} + + struct kv * +kvref_dup2_key(struct kvref * const ref, struct kv * const to) +{ + if (ref == NULL) + return NULL; + const size_t sz = sizeof(*to) + ref->hdr.klen; + struct kv * const new = to ? to : malloc(sz); + if (new == NULL) + return NULL; + + *new = ref->hdr; + memcpy(new->kv, ref->kptr, new->klen); + return new; +} + + int +kvref_kv_compare(const struct kvref * const ref, const struct kv * const kv) +{ + const u32 len = ref->hdr.klen < kv->klen ? ref->hdr.klen : kv->klen; + const int cmp = memcmp(ref->kptr, kv->kv, (size_t)len); + return cmp ? cmp : klen_compare(ref->hdr.klen, kv->klen); +} +// }}} kvref + +// kv128 {{{ +// estimate the encoded size + inline size_t +kv128_estimate_kv(const struct kv * const kv) +{ + return vi128_estimate_u32(kv->klen) + vi128_estimate_u32(kv->vlen) + kv->klen + kv->vlen; +} + +// create a kv128 from kv + u8 * +kv128_encode_kv(const struct kv * const kv, u8 * const out, size_t * const pesize) +{ + u8 * const ptr = out ? out : malloc(kv128_estimate_kv(kv)); + if (!ptr) + return NULL; + + u8 * const pdata = vi128_encode_u32(vi128_encode_u32(ptr, kv->klen), kv->vlen); + memcpy(pdata, kv->kv, kv->klen + kv->vlen); + + if (pesize) + *pesize = (size_t)(pdata - ptr) + kv->klen + kv->vlen; + return ptr; // return the head of the encoded kv128 +} + +// dup kv128 to a kv + struct kv * +kv128_decode_kv(const u8 * const ptr, struct kv * const out, size_t * const pesize) +{ + u32 klen, vlen; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(ptr, &klen), &vlen); + struct kv * const ret = out ? out : malloc(sizeof(struct kv) + klen + vlen); + if (ret) + kv_refill(ret, pdata, klen, pdata + klen, vlen); + + if (pesize) + *pesize = (size_t)(pdata - ptr) + klen + vlen; + return ret; // return the kv +} + + inline size_t +kv128_size(const u8 * const ptr) +{ + u32 klen, vlen; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(ptr, &klen), &vlen); + return ((size_t)(pdata - ptr)) + klen + vlen; +} +// }}} kv128 + +// }}} kv + +// kvmap {{{ + +// registry {{{ +// increase MAX if need more +#define KVMAP_API_MAX ((32)) +static struct kvmap_api_reg kvmap_api_regs[KVMAP_API_MAX]; +static u64 kvmap_api_regs_nr = 0; + + void +kvmap_api_register(const int nargs, const char * const name, const char * const args_msg, + void * (*create)(const char *, const struct kvmap_mm *, char **), const struct kvmap_api * const api) +{ + if (kvmap_api_regs_nr < KVMAP_API_MAX) { + kvmap_api_regs[kvmap_api_regs_nr].nargs = nargs; + kvmap_api_regs[kvmap_api_regs_nr].name = name; + kvmap_api_regs[kvmap_api_regs_nr].args_msg = args_msg; + kvmap_api_regs[kvmap_api_regs_nr].create = create; + kvmap_api_regs[kvmap_api_regs_nr].api = api; + kvmap_api_regs_nr++; + } else { + fprintf(stderr, "%s failed to register [%s]\n", __func__, name); + } +} + void +kvmap_api_helper_message(void) +{ + fprintf(stderr, "%s Usage: api ...\n", __func__); + for (u64 i = 0; i < kvmap_api_regs_nr; i++) { + fprintf(stderr, "%s example: api %s %s\n", __func__, + kvmap_api_regs[i].name, kvmap_api_regs[i].args_msg); + } +} + + int +kvmap_api_helper(int argc, char ** const argv, const struct kvmap_mm * const mm, + const struct kvmap_api ** const api_out, void ** const map_out) +{ + // "api" "name" "arg1", ... + if (argc < 2 || strcmp(argv[0], "api") != 0) + return -1; + + for (u64 i = 0; i < kvmap_api_regs_nr; i++) { + const struct kvmap_api_reg * const reg = &kvmap_api_regs[i]; + if (0 != strcmp(argv[1], reg->name)) + continue; + + if ((argc - 2) < reg->nargs) + return -1; + + void * const map = reg->create(argv[1], mm, argv + 2); // skip "api" "name" + if (map) { + *api_out = reg->api; + *map_out = map; + return 2 + reg->nargs; + } else { + return -1; + } + } + + // no match + return -1; +} +// }}} registry + +// misc {{{ + void +kvmap_inp_steal_kv(struct kv * const kv, void * const priv) +{ + // steal the kv pointer out so we don't need a dangerous get_key_interanl() + if (priv) + *(struct kv **)priv = kv; +} + + inline void * +kvmap_ref(const struct kvmap_api * const api, void * const map) +{ + return api->ref ? api->ref(map) : map; +} + +// return the original map pointer; usually unused by caller + inline void * +kvmap_unref(const struct kvmap_api * const api, void * const ref) +{ + return api->unref ? api->unref(ref) : ref; +} +// }}} misc + +// kvmap_kv_op {{{ + inline struct kv * +kvmap_kv_get(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, struct kv * const out) +{ + const struct kref kref = kv_kref(key); + return api->get(ref, &kref, out); +} + + inline bool +kvmap_kv_probe(const struct kvmap_api * const api, void * const ref, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + return api->probe(ref, &kref); +} + + inline bool +kvmap_kv_put(const struct kvmap_api * const api, void * const ref, + struct kv * const kv) +{ + return api->put(ref, kv); +} + + inline bool +kvmap_kv_del(const struct kvmap_api * const api, void * const ref, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + return api->del(ref, &kref); +} + + inline bool +kvmap_kv_inpr(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->inpr(ref, &kref, uf, priv); +} + + inline bool +kvmap_kv_inpw(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->inpw(ref, &kref, uf, priv); +} + + inline bool +kvmap_kv_merge(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_merge_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->merge(ref, &kref, uf, priv); +} + + inline u64 +kvmap_kv_delr(const struct kvmap_api * const api, void * const ref, + const struct kv * const start, const struct kv * const end) +{ + const struct kref kref0 = kv_kref(start); + if (end) { + const struct kref krefz = kv_kref(end); + return api->delr(ref, &kref0, &krefz); + } else { + return api->delr(ref, &kref0, NULL); + } +} + + inline void +kvmap_kv_iter_seek(const struct kvmap_api * const api, void * const iter, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + api->iter_seek(iter, &kref); +} +// }}} kvmap_kv_op + +// kvmap_raw_op {{{ + inline struct kv * +kvmap_raw_get(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, struct kv * const out) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->get(ref, &kref, out); +} + + inline bool +kvmap_raw_probe(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->probe(ref, &kref); +} + + inline bool +kvmap_raw_del(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->del(ref, &kref); +} + + inline bool +kvmap_raw_inpr(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->inpr(ref, &kref, uf, priv); +} + + inline bool +kvmap_raw_inpw(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->inpw(ref, &kref, uf, priv); +} + + inline void +kvmap_raw_iter_seek(const struct kvmap_api * const api, void * const iter, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + api->iter_seek(iter, &kref); +} +// }}}} kvmap_raw_op + +// dump {{{ + u64 +kvmap_dump_keys(const struct kvmap_api * const api, void * const map, const int fd) +{ + void * const ref = kvmap_ref(api, map); + void * const iter = api->iter_create(ref); + api->iter_seek(iter, kref_null()); + u64 i = 0; + while (api->iter_valid(iter)) { + struct kvref kvref; + api->iter_kvref(iter, &kvref); + dprintf(fd, "%010lu [%3u] %.*s [%u]\n", i, kvref.hdr.klen, kvref.hdr.klen, kvref.kptr, kvref.hdr.vlen); + i++; + api->iter_skip1(iter); + } + api->iter_destroy(iter); + kvmap_unref(api, ref); + return i; +} +// }}} dump + +// kv64 {{{ +struct kv64 { // internal only + struct kv kv; + u64 key_be; // must be in big endian + u64 value; +}; + + inline bool +kvmap_kv64_get(const struct kvmap_api * const api, void * const ref, + const u64 key, u64 * const out) +{ + struct kv64 keybuf, kvout; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + struct kv * const ret = api->get(ref, &kref, &kvout.kv); + if (ret) { + *out = kvout.value; + return true; + } else { + return false; + } +} + + inline bool +kvmap_kv64_probe(const struct kvmap_api * const api, void * const ref, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + return api->probe(ref, &kref); +} + + inline bool +kvmap_kv64_put(const struct kvmap_api * const api, void * const ref, + const u64 key, const u64 value) +{ + struct kv64 kv; + kv.key_be = __builtin_bswap64(key); + kv.value = value; + kv.kv.klen = sizeof(key); + kv.kv.vlen = sizeof(value); + if (api->hashkey) + kv_update_hash(&kv.kv); + + return api->put(ref, &kv.kv); +} + + inline bool +kvmap_kv64_del(const struct kvmap_api * const api, void * const ref, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + return api->del(ref, &kref); +} + + inline void +kvmap_kv64_iter_seek(const struct kvmap_api * const api, void * const iter, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + api->iter_seek(iter, &kref); +} + + inline bool +kvmap_kv64_iter_peek(const struct kvmap_api * const api, void * const iter, + u64 * const key_out, u64 * const value_out) +{ + struct kv64 kvout; + struct kv * const ret = api->iter_peek(iter, &kvout.kv); + if (key_out) + *key_out = __builtin_bswap64(kvout.key_be); // to LE + if (value_out) + *value_out = kvout.value; + return ret != NULL; +} +// }}} kv64 + +// }}} kvmap + +// vim:fdm=marker diff --git a/MassTrie-beta/wormhole/kv.h b/MassTrie-beta/wormhole/kv.h new file mode 100644 index 00000000..1e251e58 --- /dev/null +++ b/MassTrie-beta/wormhole/kv.h @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +// crc32c {{{ +#define KV_CRC32C_SEED ((0xDEADBEEFu)) + + extern u32 +kv_crc32c(const void * const ptr, u32 len); + + extern u64 +kv_crc32c_extend(const u32 crc32c); +// }}} crc32c + +// kv {{{ + +// struct {{{ +/* + * Some internal union names can be ignored: + * struct kv { + * u32 klen; + * u32 vlen; + * u64 hash; + * u8 kv[]; + * }; + */ +struct kv { + union { // the first u64 + u64 kvlen; + struct { + u32 klen; + union { u32 vlen; u32 refcnt; }; + }; + }; + union { + u64 hash; // hashvalue of the key + u64 priv; // can hide a value here if hash is not used + void * privptr; + struct { u32 hashlo; u32 hashhi; }; // little endian + struct { u32 privlo; u32 privhi; }; + }; + u8 kv[0]; // len(kv) == klen + vlen +} __attribute__((packed)); + +struct kref { + u32 len; + union { u32 hash32; u32 priv; }; + const u8 * ptr; +} __attribute__((packed)); + +struct kvref { + const u8 * kptr; // read-only + const u8 * vptr; // read-only + struct kv hdr; // hdr.kv[] is invalid +}; +// }}} struct + +// kv {{{ +typedef int (*kv_kv_cmp_func)(const struct kv *, const struct kv *); + + extern size_t +kv_size(const struct kv * const kv); + + extern size_t +kv_size_align(const struct kv * const kv, const u64 align); + + extern size_t +key_size(const struct kv * const key); + + extern size_t +key_size_align(const struct kv * const key, const u64 align); + + extern void +kv_update_hash(struct kv * const kv); + + extern void +kv_refill_value(struct kv * const kv, const void * const value, const u32 vlen); + + extern void +kv_refill(struct kv * const kv, const void * const key, const u32 klen, + const void * const value, const u32 vlen); + + extern void +kv_refill_str(struct kv * const kv, const char * const key, + const void * const value, const u32 vlen); + + extern void +kv_refill_str_str(struct kv * const kv, const char * const key, + const char * const value); + +// the u64 key is filled in big-endian byte order + extern void +kv_refill_u64(struct kv * const kv, const u64 key, const void * const value, const u32 vlen); + + extern void +kv_refill_hex32(struct kv * const kv, const u32 hex, const void * const value, const u32 vlen); + + extern void +kv_refill_hex64(struct kv * const kv, const u64 hex, const void * const value, const u32 vlen); + + extern void +kv_refill_hex64_klen(struct kv * const kv, const u64 hex, const u32 klen, + const void * const value, const u32 vlen); + + extern void +kv_refill_kref(struct kv * const kv, const struct kref * const kref); + + extern void +kv_refill_kref_v(struct kv * const kv, const struct kref * const kref, + const void * const value, const u32 vlen); + + extern struct kref +kv_kref(const struct kv * const key); + + extern struct kv * +kv_create(const void * const key, const u32 klen, const void * const value, const u32 vlen); + + extern struct kv * +kv_create_str(const char * const key, const void * const value, const u32 vlen); + + extern struct kv * +kv_create_str_str(const char * const key, const char * const value); + + extern struct kv * +kv_create_kref(const struct kref * const kref, const void * const value, const u32 vlen); + +// a static kv with klen == 0 + extern const struct kv * +kv_null(void); + + extern struct kv * +kv_dup(const struct kv * const kv); + + extern struct kv * +kv_dup_key(const struct kv * const kv); + + extern struct kv * +kv_dup2(const struct kv * const from, struct kv * const to); + + extern struct kv * +kv_dup2_key(const struct kv * const from, struct kv * const to); + + extern struct kv * +kv_dup2_key_prefix(const struct kv * const from, struct kv * const to, const u32 plen); + + extern bool +kv_match(const struct kv * const key1, const struct kv * const key2); + + extern bool +kv_match_hash(const struct kv * const key1, const struct kv * const key2); + + extern bool +kv_match_full(const struct kv * const kv1, const struct kv * const kv2); + + extern bool +kv_match_kv128(const struct kv * const sk, const u8 * const kv128); + + extern int +kv_compare(const struct kv * const kv1, const struct kv * const kv2); + + extern int +kv_k128_compare(const struct kv * const sk, const u8 * const k128); + + extern int +kv_kv128_compare(const struct kv * const sk, const u8 * const kv128); + + extern void +kv_qsort(struct kv ** const kvs, const size_t nr); + + extern u32 +kv_key_lcp(const struct kv * const key1, const struct kv * const key2); + + extern u32 +kv_key_lcp_skip(const struct kv * const key1, const struct kv * const key2, const u32 lcp0); + + extern void +kv_psort(struct kv ** const kvs, const u64 nr, const u64 tlo, const u64 thi); + + extern void * +kv_vptr(struct kv * const kv); + + extern void * +kv_kptr(struct kv * const kv); + + extern const void * +kv_vptr_c(const struct kv * const kv); + + extern const void * +kv_kptr_c(const struct kv * const kv); + + extern void +kv_print(const struct kv * const kv, const char * const cmd, FILE * const out); +// }}} kv + +// mm {{{ +typedef struct kv * (* kvmap_mm_in_func)(struct kv * kv, void * priv); +typedef struct kv * (* kvmap_mm_out_func)(struct kv * kv, struct kv * out); +typedef void (* kvmap_mm_free_func)(struct kv * kv, void * priv); + +// manage internal kv data of kvmap +struct kvmap_mm { + // to create a private copy of "kv" + // see put() functions + kvmap_mm_in_func in; + // to duplicate a private copy of "kv" to "out" + // see get() and iter_peek() functions + kvmap_mm_out_func out; + // to free a kv + // see del() and put() functions + kvmap_mm_free_func free; + void * priv; +}; + + extern struct kv * +kvmap_mm_in_noop(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_out_noop(struct kv * const kv, struct kv * const out); + + extern void +kvmap_mm_free_noop(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_in_dup(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_out_dup(struct kv * const kv, struct kv * const out); + + extern void +kvmap_mm_free_free(struct kv * const kv, void * const priv); + +// the default mm +extern const struct kvmap_mm kvmap_mm_dup; // in:Dup, out:Dup, free:Free +extern const struct kvmap_mm kvmap_mm_ndf; // in:Noop, out:Dup, free:Free +// }}} mm + +// ref {{{ +typedef int (*kref_kv_cmp_func)(const struct kref *, const struct kv *); + +// ptr and len only + extern void +kref_ref_raw(struct kref * const kref, const u8 * const ptr, const u32 len); + +// this calculates hash32 + extern void +kref_ref_hash32(struct kref * const kref, const u8 * const ptr, const u32 len); + + extern void +kref_update_hash32(struct kref * const kref); + + extern void +kref_ref_kv(struct kref * const kref, const struct kv * const kv); + + extern void +kref_ref_kv_hash32(struct kref * const kref, const struct kv * const kv); + + extern bool +kref_match(const struct kref * const k1, const struct kref * const k2); + + extern bool +kref_kv_match(const struct kref * const kref, const struct kv * const k); + + extern int +kref_compare(const struct kref * const kref1, const struct kref * const kref2); + + extern int +kref_kv_compare(const struct kref * const kref, const struct kv * const k); + + extern u32 +kref_lcp(const struct kref * const k1, const struct kref * const k2); + + extern u32 +kref_kv_lcp(const struct kref * const kref, const struct kv * const kv); + + extern int +kref_k128_compare(const struct kref * const sk, const u8 * const k128); + + extern int +kref_kv128_compare(const struct kref * const sk, const u8 * const kv128); + + extern const struct kref * +kref_null(void); + + extern void +kvref_ref_kv(struct kvref * const ref, struct kv * const kv); + + extern struct kv * +kvref_dup2_kv(struct kvref * const ref, struct kv * const to); + + extern struct kv * +kvref_dup2_key(struct kvref * const ref, struct kv * const to); + + extern int +kvref_kv_compare(const struct kvref * const ref, const struct kv * const kv); +// }}} ref + +// kv128 {{{ + extern size_t +kv128_estimate_kv(const struct kv * const kv); + + extern u8 * +kv128_encode_kv(const struct kv * const kv, u8 * const out, size_t * const pesize); + + extern struct kv * +kv128_decode_kv(const u8 * const ptr, struct kv * const out, size_t * const pesize); + + extern size_t +kv128_size(const u8 * const ptr); +// }}} kv128 + +// }}} kv + +// kvmap {{{ + +// kvmap_api {{{ +typedef void (* kv_inp_func)(struct kv * const curr, void * const priv); + +// the merge function should: +// 1: return NULL if the origin kv is not changed at all +// 2: return kv0 if updates has been applied in-place +// 3: return a different kv if the original kv must be replaced +// In an in-memory kvmap, 2==1 and no further action is needed +// In a persistent kv store with a memtable, 2 will need an insertion if kv0 is not from the memtable +typedef struct kv * (* kv_merge_func)(struct kv * const kv0, void * const priv); + +struct kvmap_api { + // feature bits + bool hashkey; // true: caller needs to provide correct hash in kv/kref + bool ordered; // true: has iter_seek + bool threadsafe; // true: support thread_safe access + bool readonly; // true: no put() and del() + bool irefsafe; // true: iter's kref/kvref can be safely accessed after iter_seek/iter_skip/iter_park + bool unique; // provide unique keys, especially for iterators + bool refpark; // ref has park() and resume() + bool async; // XXX for testing KVell + + // put (aka put/upsert): return true on success; false on error + // mm.in() controls how things move into the kvmap; the default mm make a copy with malloc() + // mm.free() controls how old kv get disposed when replaced + bool (* put) (void * const ref, struct kv * const kv); + // get: search and return a kv if found, or NULL if not + // with the default mm: malloc() if out == NULL; otherwise, use out as buffer + // with custom kvmap_mm: mm.out() controls buffer; use with caution + // caller should use the returned ptr even if out is provided + struct kv * (* get) (void * const ref, const struct kref * const key, struct kv * const out); + // probe: return true on found, false on not found + bool (* probe) (void * const ref, const struct kref * const key); + // del: return true on something deleted, false on not found + // mm.free() controls how old kv get disposed when replaced + bool (* del) (void * const ref, const struct kref * const key); + // inp: inplace operation if key exists; otherwise return false; uf() is always executed even with NULL key + // inpr/inpw acquires r/w locks respectively. + // Note that in inpw() you can only change the value. + bool (* inpr) (void * const ref, const struct kref * const key, kv_inp_func uf, void * const priv); + bool (* inpw) (void * const ref, const struct kref * const key, kv_inp_func uf, void * const priv); + // merge: put+callback on old/new keys; another name: read-modify-write + // return true if successfull; return false on error + bool (* merge) (void * const ref, const struct kref * const key, kv_merge_func uf, void * const priv); + // delete-range: delete all keys from start (inclusive) to end (exclusive) + u64 (* delr) (void * const ref, const struct kref * const start, const struct kref * const end); + // make everything persist; for persistent maps only + void (* sync) (void * const ref); + + // general guidelines for thread-safe iters: + // - it is assumed that the key under the cursor is locked/freezed/immutable + // - once created one must call iter_seek to make it valid + // - the ownership of ref is given to the iter so ref should not be used until iter_destroy + // - creating and use more than one iter based on a ref can cause deadlocks + void * (* iter_create) (void * const ref); + // move the cursor to the first key >= search-key; + void (* iter_seek) (void * const iter, const struct kref * const key); + // check if the cursor points to a valid key + bool (* iter_valid) (void * const iter); + // return the current key; copy to out if (out != NULL) + // mm.out() controls copy-out + struct kv * (* iter_peek) (void * const iter, struct kv * const out); + // similar to peek but does not copy; return false if iter is invalid + bool (* iter_kref) (void * const iter, struct kref * const kref); + // similar to iter_kref but also provide the value + bool (* iter_kvref) (void * const iter, struct kvref * const kvref); + // iter_retain makes kref or kvref of the current iter remain valid until released + // the returned opaque pointer should be provided when releasing the hold + u64 (* iter_retain) (void * const iter); + void (* iter_release) (void * const iter, const u64 opaque); + // skip one element + void (* iter_skip1) (void * const iter); + // skip nr elements + void (* iter_skip) (void * const iter, const u32 nr); + // iter_next == iter_peek + iter_skip1 + struct kv * (* iter_next) (void * const iter, struct kv * const out); + // perform inplace opeation if the current key is valid; return false if no current key + // the uf() is always executed even with NULL key + bool (* iter_inp) (void * const iter, kv_inp_func uf, void * const priv); + // invalidate the iter to release any resources or locks + // afterward, must call seek() again before accessing data + void (* iter_park) (void * const iter); + // destroy iter + void (* iter_destroy) (void * const iter); + + // misc: + // create refs for maps if required; always use use kvmap_ref() and kvmap_unref() + // if there are ref/unref functions, ref-ptr should be used as map for all kv operations + void * (* ref) (void * map); + // return the original map + void * (* unref) (void * ref); + // pause access without unref; must call resume later before access index again + void (* park) (void * ref); + // resume access of ref; must be paired with a park() + void (* resume) (void * ref); + + // UNSAFE functions: + // empty the map + void (* clean) (void * map); + // erase everything + void (* destroy) (void * map); + // for debugging + void (* fprint) (void * map, FILE * const out); +}; + +// registry +struct kvmap_api_reg { + int nargs; // number of arguments after name + const char * name; + const char * args_msg; // see ...helper_message + // multiple apis may share one create function + // arguments: name (e.g., "rdb"), mm (usually NULL), the remaining args + void * (*create)(const char *, const struct kvmap_mm *, char **); + const struct kvmap_api * api; +}; + +// call this function to register a kvmap_api + extern void +kvmap_api_register(const int nargs, const char * const name, const char * const args_msg, + void * (*create)(const char *, const struct kvmap_mm *, char **), const struct kvmap_api * const api); + + extern void +kvmap_api_helper_message(void); + + extern int +kvmap_api_helper(int argc, char ** const argv, const struct kvmap_mm * const mm, + const struct kvmap_api ** const api_out, void ** const map_out); +// }}} kvmap_api + +// helpers {{{ + extern void +kvmap_inp_steal_kv(struct kv * const kv, void * const priv); + + extern void * +kvmap_ref(const struct kvmap_api * const api, void * const map); + + extern void * +kvmap_unref(const struct kvmap_api * const api, void * const ref); + + extern struct kv * +kvmap_kv_get(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, struct kv * const out); + + extern bool +kvmap_kv_probe(const struct kvmap_api * const api, void * const ref, + const struct kv * const key); + + extern bool +kvmap_kv_put(const struct kvmap_api * const api, void * const ref, + struct kv * const kv); + + extern bool +kvmap_kv_del(const struct kvmap_api * const api, void * const ref, + const struct kv * const key); + + extern bool +kvmap_kv_inpr(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv); + + extern bool +kvmap_kv_inpw(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv); + + extern bool +kvmap_kv_merge(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_merge_func uf, void * const priv); + + extern u64 +kvmap_kv_delr(const struct kvmap_api * const api, void * const ref, + const struct kv * const start, const struct kv * const end); + + extern void +kvmap_kv_iter_seek(const struct kvmap_api * const api, void * const iter, + const struct kv * const key); + + extern struct kv * +kvmap_raw_get(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, struct kv * const out); + + extern bool +kvmap_raw_probe(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr); + + extern bool +kvmap_raw_del(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr); + + extern bool +kvmap_raw_inpr(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv); + + extern bool +kvmap_raw_inpw(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv); + + extern void +kvmap_raw_iter_seek(const struct kvmap_api * const api, void * const iter, + const u32 len, const u8 * const ptr); + + extern u64 +kvmap_dump_keys(const struct kvmap_api * const api, void * const map, const int fd); + + extern bool +kvmap_kv64_get(const struct kvmap_api * const api, void * const ref, + const u64 key, u64 * const out); + + extern bool +kvmap_kv64_probe(const struct kvmap_api * const api, void * const ref, + const u64 key); + + extern bool +kvmap_kv64_put(const struct kvmap_api * const api, void * const ref, + const u64 key, const u64 value); + + extern bool +kvmap_kv64_del(const struct kvmap_api * const api, void * const ref, + const u64 key); + + extern void +kvmap_kv64_iter_seek(const struct kvmap_api * const api, void * const iter, + const u64 key); + + extern bool +kvmap_kv64_iter_peek(const struct kvmap_api * const api, void * const iter, + u64 * const key_out, u64 * const value_out); +// }}} helpers + +// }}} kvmap + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/MassTrie-beta/wormhole/lib.c b/MassTrie-beta/wormhole/lib.c new file mode 100644 index 00000000..06d45f6d --- /dev/null +++ b/MassTrie-beta/wormhole/lib.c @@ -0,0 +1,3026 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include "lib.h" +#include "ctypes.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // va_start + +#if defined(__linux__) +#include +#include // malloc_usable_size +#elif defined(__APPLE__) && defined(__MACH__) +#include +#include +#elif defined(__FreeBSD__) +#include +#include +#endif // OS + +#if defined(__FreeBSD__) +#include +#endif +// }}} headers + +// math {{{ + inline u64 +mhash64(const u64 v) +{ + return v * 11400714819323198485lu; +} + + inline u32 +mhash32(const u32 v) +{ + return v * 2654435761u; +} + +// From Daniel Lemire's blog (2013, lemire.me) + u64 +gcd64(u64 a, u64 b) +{ + if (a == 0) + return b; + if (b == 0) + return a; + + const u32 shift = (u32)__builtin_ctzl(a | b); + a >>= __builtin_ctzl(a); + do { + b >>= __builtin_ctzl(b); + if (a > b) { + const u64 t = b; + b = a; + a = t; + } + b = b - a; + } while (b); + return a << shift; +} +// }}} math + +// random {{{ +// Lehmer's generator is 2x faster than xorshift +/** + * D. H. Lehmer, Mathematical methods in large-scale computing units. + * Proceedings of a Second Symposium on Large Scale Digital Calculating + * Machinery; + * Annals of the Computation Laboratory, Harvard Univ. 26 (1951), pp. 141-146. + * + * P L'Ecuyer, Tables of linear congruential generators of different sizes and + * good lattice structure. Mathematics of Computation of the American + * Mathematical + * Society 68.225 (1999): 249-260. + */ +struct lehmer_u64 { + union { + u128 v128; + u64 v64[2]; + }; +}; + +static __thread struct lehmer_u64 rseed_u128 = {.v64 = {4294967291, 1549556881}}; + + static inline u64 +lehmer_u64_next(struct lehmer_u64 * const s) +{ + const u64 r = s->v64[1]; + s->v128 *= 0xda942042e4dd58b5lu; + return r; +} + + static inline void +lehmer_u64_seed(struct lehmer_u64 * const s, const u64 seed) +{ + s->v128 = (((u128)(~seed)) << 64) | (seed | 1); + (void)lehmer_u64_next(s); +} + + inline u64 +random_u64(void) +{ + return lehmer_u64_next(&rseed_u128); +} + + inline void +srandom_u64(const u64 seed) +{ + lehmer_u64_seed(&rseed_u128, seed); +} + + inline double +random_double(void) +{ + // random between [0.0 - 1.0] + const u64 r = random_u64(); + return ((double)r) * (1.0 / ((double)(~0lu))); +} +// }}} random + +// timing {{{ + inline u64 +time_nsec(void) +{ + struct timespec ts; + // MONO_RAW is 5x to 10x slower than MONO + clock_gettime(CLOCK_MONOTONIC, &ts); + return ((u64)ts.tv_sec) * 1000000000lu + ((u64)ts.tv_nsec); +} + + inline double +time_sec(void) +{ + const u64 nsec = time_nsec(); + return ((double)nsec) * 1.0e-9; +} + + inline u64 +time_diff_nsec(const u64 last) +{ + return time_nsec() - last; +} + + inline double +time_diff_sec(const double last) +{ + return time_sec() - last; +} + +// need char str[64] + void +time_stamp(char * str, const size_t size) +{ + time_t now; + struct tm nowtm; + time(&now); + localtime_r(&now, &nowtm); + strftime(str, size, "%F %T %z", &nowtm); +} + + void +time_stamp2(char * str, const size_t size) +{ + time_t now; + struct tm nowtm; + time(&now); + localtime_r(&now, &nowtm); + strftime(str, size, "%F-%H-%M-%S%z", &nowtm); +} +// }}} timing + +// cpucache {{{ + inline void +cpu_pause(void) +{ +#if defined(__x86_64__) + _mm_pause(); +#elif defined(__aarch64__) + // nop +#endif +} + + inline void +cpu_mfence(void) +{ + atomic_thread_fence(MO_SEQ_CST); +} + +// compiler fence + inline void +cpu_cfence(void) +{ + atomic_thread_fence(MO_ACQ_REL); +} + + inline void +cpu_prefetch0(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 0); +} + + inline void +cpu_prefetch1(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 1); +} + + inline void +cpu_prefetch2(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 2); +} + + inline void +cpu_prefetch3(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 3); +} + + inline void +cpu_prefetchw(const void * const ptr) +{ + __builtin_prefetch(ptr, 1, 0); +} +// }}} cpucache + +// crc32c {{{ + inline u32 +crc32c_u8(const u32 crc, const u8 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u8(crc, v); +#elif defined(__aarch64__) + return __crc32cb(crc, v); +#endif +} + + inline u32 +crc32c_u16(const u32 crc, const u16 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u16(crc, v); +#elif defined(__aarch64__) + return __crc32ch(crc, v); +#endif +} + + inline u32 +crc32c_u32(const u32 crc, const u32 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u32(crc, v); +#elif defined(__aarch64__) + return __crc32cw(crc, v); +#endif +} + + inline u32 +crc32c_u64(const u32 crc, const u64 v) +{ +#if defined(__x86_64__) + return (u32)_mm_crc32_u64(crc, v); +#elif defined(__aarch64__) + return (u32)__crc32cd(crc, v); +#endif +} + + inline u32 +crc32c_inc_123(const u8 * buf, u32 nr, u32 crc) +{ + if (nr == 1) + return crc32c_u8(crc, buf[0]); + + crc = crc32c_u16(crc, *(u16 *)buf); + return (nr == 2) ? crc : crc32c_u8(crc, buf[2]); +} + + inline u32 +crc32c_inc_x4(const u8 * buf, u32 nr, u32 crc) +{ + //debug_assert((nr & 3) == 0); + const u32 nr8 = nr >> 3; +#pragma nounroll + for (u32 i = 0; i < nr8; i++) + crc = crc32c_u64(crc, ((u64*)buf)[i]); + + if (nr & 4u) + crc = crc32c_u32(crc, ((u32*)buf)[nr8<<1]); + return crc; +} + + u32 +crc32c_inc(const u8 * buf, u32 nr, u32 crc) +{ + crc = crc32c_inc_x4(buf, nr, crc); + const u32 nr123 = nr & 3u; + return nr123 ? crc32c_inc_123(buf + nr - nr123, nr123, crc) : crc; +} +// }}} crc32c + +// debug {{{ + void +debug_break(void) +{ + usleep(100); +} + +static u64 * debug_watch_u64 = NULL; + + static void +watch_u64_handler(const int sig) +{ + (void)sig; + const u64 v = debug_watch_u64 ? (*debug_watch_u64) : 0; + fprintf(stderr, "[USR1] %lu (0x%lx)\n", v, v); +} + + void +watch_u64_usr1(u64 * const ptr) +{ + debug_watch_u64 = ptr; + struct sigaction sa = {}; + sa.sa_handler = watch_u64_handler; + sigemptyset(&(sa.sa_mask)); + sa.sa_flags = SA_RESTART; + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + fprintf(stderr, "Failed to set signal handler for SIGUSR1\n"); + } else { + fprintf(stderr, "to watch> kill -s SIGUSR1 %d\n", getpid()); + } +} + +static void * debug_bt_state = NULL; +#if defined(BACKTRACE) && defined(__linux__) +// TODO: get exec path on MacOS and FreeBSD + +#include +static char debug_filepath[1024] = {}; + + static void +debug_bt_error_cb(void * const data, const char * const msg, const int errnum) +{ + (void)data; + if (msg) + dprintf(2, "libbacktrace: %s %s\n", msg, strerror(errnum)); +} + + static int +debug_bt_print_cb(void * const data, const uintptr_t pc, + const char * const file, const int lineno, const char * const func) +{ + u32 * const plevel = (typeof(plevel))data; + if (file || func || lineno) { + dprintf(2, "[%u]0x%012lx " TERMCLR(35) "%s" TERMCLR(31) ":" TERMCLR(34) "%d" TERMCLR(0)" %s\n", + *plevel, pc, file ? file : "???", lineno, func ? func : "???"); + } else if (pc) { + dprintf(2, "[%u]0x%012lx ??\n", *plevel, pc); + } + (*plevel)++; + return 0; +} + +__attribute__((constructor)) + static void +debug_backtrace_init(void) +{ + const ssize_t len = readlink("/proc/self/exe", debug_filepath, 1023); + // disable backtrace + if (len < 0 || len >= 1023) + return; + + debug_filepath[len] = '\0'; + debug_bt_state = backtrace_create_state(debug_filepath, 1, debug_bt_error_cb, NULL); +} +#endif // BACKTRACE + + static void +debug_wait_gdb(void * const bt_state) +{ + if (bt_state) { +#if defined(BACKTRACE) + dprintf(2, "Backtrace :\n"); + u32 level = 0; + backtrace_full(debug_bt_state, 1, debug_bt_print_cb, debug_bt_error_cb, &level); +#endif // BACKTRACE + } else { // fallback to execinfo if no backtrace or initialization failed + void *array[64]; + const int size = backtrace(array, 64); + dprintf(2, "Backtrace (%d):\n", size - 1); + backtrace_symbols_fd(array + 1, size - 1, 2); + } + + abool v = true; + char timestamp[32]; + time_stamp(timestamp, 32); + char threadname[32] = {}; + thread_get_name(pthread_self(), threadname, 32); + strcat(threadname, "(!!)"); + thread_set_name(pthread_self(), threadname); + char hostname[32]; + gethostname(hostname, 32); + + const char * const pattern = "[Waiting GDB] %1$s %2$s @ %3$s\n" + " Attach me: " TERMCLR(31) "sudo -Hi gdb -p %4$d" TERMCLR(0) "\n"; + char buf[256]; + sprintf(buf, pattern, timestamp, threadname, hostname, getpid()); + write(2, buf, strlen(buf)); + + // to continue: gdb> set var v = 0 + // to kill from shell: $ kill %pid; kill -CONT %pid + + // uncomment this line to surrender the shell on error + // kill(getpid(), SIGSTOP); // stop burning cpu, once + + static au32 nr_waiting = 0; + const u32 seq = atomic_fetch_add_explicit(&nr_waiting, 1, MO_RELAXED); + if (seq == 0) { + sprintf(buf, "/run/user/%u/.debug_wait_gdb_pid", getuid()); + const int pidfd = open(buf, O_CREAT|O_TRUNC|O_WRONLY, 00644); + if (pidfd >= 0) { + dprintf(pidfd, "%u", getpid()); + close(pidfd); + } + } + +#pragma nounroll + while (atomic_load_explicit(&v, MO_CONSUME)) + sleep(1); +} + +#ifndef NDEBUG + void +debug_assert(const bool v) +{ + if (!v) + debug_wait_gdb(debug_bt_state); +} +#endif + +__attribute__((noreturn)) + void +debug_die(void) +{ + debug_wait_gdb(debug_bt_state); + exit(0); +} + +__attribute__((noreturn)) + void +debug_die_perror(void) +{ + perror(NULL); + debug_die(); +} + +#if !defined(NOSIGNAL) +// signal handler for wait_gdb on fatal errors + static void +wait_gdb_handler(const int sig, siginfo_t * const info, void * const context) +{ + (void)info; + (void)context; + char buf[64] = "[SIGNAL] "; + strcat(buf, strsignal(sig)); + write(2, buf, strlen(buf)); + debug_wait_gdb(NULL); +} + +// setup hooks for catching fatal errors +__attribute__((constructor)) + static void +debug_init(void) +{ + void * stack = pages_alloc_4kb(16); + //fprintf(stderr, "altstack %p\n", stack); + stack_t ss = {.ss_sp = stack, .ss_flags = 0, .ss_size = PGSZ*16}; + if (sigaltstack(&ss, NULL)) + fprintf(stderr, "sigaltstack failed\n"); + + struct sigaction sa = {.sa_sigaction = wait_gdb_handler, .sa_flags = SA_SIGINFO | SA_ONSTACK}; + sigemptyset(&(sa.sa_mask)); + const int fatals[] = {SIGSEGV, SIGFPE, SIGILL, SIGBUS, 0}; + for (int i = 0; fatals[i]; i++) { + if (sigaction(fatals[i], &sa, NULL) == -1) { + fprintf(stderr, "Failed to set signal handler for %s\n", strsignal(fatals[i])); + fflush(stderr); + } + } +} + +__attribute__((destructor)) + static void +debug_exit(void) +{ + // to get rid of valgrind warnings + stack_t ss = {.ss_flags = SS_DISABLE}; + stack_t oss = {}; + sigaltstack(&ss, &oss); + if (oss.ss_sp) + pages_unmap(oss.ss_sp, PGSZ * 16); +} +#endif // !defined(NOSIGNAL) + + void +debug_dump_maps(FILE * const out) +{ + FILE * const in = fopen("/proc/self/smaps", "r"); + char * line0 = yalloc(1024); + size_t size0 = 1024; + while (!feof(in)) { + const ssize_t r1 = getline(&line0, &size0, in); + if (r1 < 0) break; + fprintf(out, "%s", line0); + } + free(line0); + fflush(out); + fclose(in); +} + +static pid_t perf_pid = 0; + +#if defined(__linux__) +__attribute__((constructor)) + static void +debug_perf_init(void) +{ + const pid_t ppid = getppid(); + char tmp[256] = {}; + sprintf(tmp, "/proc/%d/cmdline", ppid); + FILE * const fc = fopen(tmp, "r"); + const size_t nr = fread(tmp, 1, sizeof(tmp) - 1, fc); + fclose(fc); + // look for "perf record" + if (nr < 12) + return; + tmp[nr] = '\0'; + for (u64 i = 0; i < nr; i++) + if (tmp[i] == 0) + tmp[i] = ' '; + + char * const perf = strstr(tmp, "perf record"); + if (perf) { + fprintf(stderr, "%s: perf detected\n", __func__); + perf_pid = ppid; + } +} +#endif // __linux__ + + bool +debug_perf_switch(void) +{ + if (perf_pid > 0) { + kill(perf_pid, SIGUSR2); + return true; + } else { + return false; + } +} +// }}} debug + +// mm {{{ +#ifdef ALLOCFAIL + bool +alloc_fail(void) +{ +#define ALLOCFAIL_RECP ((64lu)) +#define ALLOCFAIL_MAGIC ((ALLOCFAIL_RECP / 3lu)) + return ((random_u64() % ALLOCFAIL_RECP) == ALLOCFAIL_MAGIC); +} + +#ifdef MALLOCFAIL +extern void * __libc_malloc(size_t size); + void * +malloc(size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_malloc(size); +} + +extern void * __libc_calloc(size_t nmemb, size_t size); + void * +calloc(size_t nmemb, size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_calloc(nmemb, size); +} + +extern void *__libc_realloc(void *ptr, size_t size); + + void * +realloc(void *ptr, size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_realloc(ptr, size); +} +#endif // MALLOC_FAIL +#endif // ALLOC_FAIL + + void * +xalloc(const size_t align, const size_t size) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + void * p; + return (posix_memalign(&p, align, size) == 0) ? p : NULL; +} + +// alloc cache-line aligned address + void * +yalloc(const size_t size) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + void * p; + return (posix_memalign(&p, 64, size) == 0) ? p : NULL; +} + + void ** +malloc_2d(const size_t nr, const size_t size) +{ + const size_t size1 = nr * sizeof(void *); + const size_t size2 = nr * size; + void ** const mem = malloc(size1 + size2); + u8 * const mem2 = ((u8 *)mem) + size1; + for (size_t i = 0; i < nr; i++) + mem[i] = mem2 + (i * size); + return mem; +} + + inline void ** +calloc_2d(const size_t nr, const size_t size) +{ + void ** const ret = malloc_2d(nr, size); + memset(ret[0], 0, nr * size); + return ret; +} + + inline void +pages_unmap(void * const ptr, const size_t size) +{ +#ifndef HEAPCHECKING + munmap(ptr, size); +#else + (void)size; + free(ptr); +#endif +} + + void +pages_lock(void * const ptr, const size_t size) +{ + static bool use_mlock = true; + if (use_mlock) { + const int ret = mlock(ptr, size); + if (ret != 0) { + use_mlock = false; + fprintf(stderr, "%s: mlock disabled\n", __func__); + } + } +} + +#ifndef HEAPCHECKING + static void * +pages_do_alloc(const size_t size, const int flags) +{ + // vi /etc/security/limits.conf + // * - memlock unlimited + void * const p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (p == MAP_FAILED) + return NULL; + + pages_lock(p, size); + return p; +} + +#if defined(__linux__) && defined(MAP_HUGETLB) + +#if defined(MAP_HUGE_SHIFT) +#define PAGES_FLAGS_1G ((MAP_HUGETLB | (30 << MAP_HUGE_SHIFT))) +#define PAGES_FLAGS_2M ((MAP_HUGETLB | (21 << MAP_HUGE_SHIFT))) +#else // MAP_HUGE_SHIFT +#define PAGES_FLAGS_1G ((MAP_HUGETLB)) +#define PAGES_FLAGS_2M ((MAP_HUGETLB)) +#endif // MAP_HUGE_SHIFT + +#else +#define PAGES_FLAGS_1G ((0)) +#define PAGES_FLAGS_2M ((0)) +#endif // __linux__ + +#endif // HEAPCHECKING + + inline void * +pages_alloc_1gb(const size_t nr_1gb) +{ + const u64 sz = nr_1gb << 30; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS | PAGES_FLAGS_1G); +#else + void * const p = xalloc(1lu << 21, sz); // Warning: valgrind fails with 30 + if (p) + memset(p, 0, sz); + return p; +#endif +} + + inline void * +pages_alloc_2mb(const size_t nr_2mb) +{ + const u64 sz = nr_2mb << 21; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS | PAGES_FLAGS_2M); +#else + void * const p = xalloc(1lu << 21, sz); + if (p) + memset(p, 0, sz); + return p; +#endif +} + + inline void * +pages_alloc_4kb(const size_t nr_4kb) +{ + const size_t sz = nr_4kb << 12; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS); +#else + void * const p = xalloc(1lu << 12, sz); + if (p) + memset(p, 0, sz); + return p; +#endif +} + + void * +pages_alloc_best(const size_t size, const bool try_1gb, u64 * const size_out) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + // 1gb huge page: at least 0.25GB + if (try_1gb) { + if (size >= (1lu << 28)) { + const size_t nr_1gb = bits_round_up(size, 30) >> 30; + void * const p1 = pages_alloc_1gb(nr_1gb); + if (p1) { + *size_out = nr_1gb << 30; + return p1; + } + } + } + + // 2mb huge page: at least 0.5MB + if (size >= (1lu << 19)) { + const size_t nr_2mb = bits_round_up(size, 21) >> 21; + void * const p2 = pages_alloc_2mb(nr_2mb); + if (p2) { + *size_out = nr_2mb << 21; + return p2; + } + } + + const size_t nr_4kb = bits_round_up(size, 12) >> 12; + void * const p3 = pages_alloc_4kb(nr_4kb); + if (p3) + *size_out = nr_4kb << 12; + return p3; +} +// }}} mm + +// process/thread {{{ +static u32 process_ncpu; +#if defined(__FreeBSD__) +typedef cpuset_t cpu_set_t; +#elif defined(__APPLE__) && defined(__MACH__) +typedef u64 cpu_set_t; +#define CPU_SETSIZE ((64)) +#define CPU_COUNT(__cpu_ptr__) (__builtin_popcountl(*__cpu_ptr__)) +#define CPU_ISSET(__cpu_idx__, __cpu_ptr__) (((*__cpu_ptr__) >> __cpu_idx__) & 1lu) +#define CPU_ZERO(__cpu_ptr__) ((*__cpu_ptr__) = 0) +#define CPU_SET(__cpu_idx__, __cpu_ptr__) ((*__cpu_ptr__) |= (1lu << __cpu_idx__)) +#define CPU_CLR(__cpu_idx__, __cpu_ptr__) ((*__cpu_ptr__) &= ~(1lu << __cpu_idx__)) +#define pthread_attr_setaffinity_np(...) ((void)0) +#endif + +__attribute__((constructor)) + static void +process_init(void) +{ + // Linux's default is 1024 cpus + process_ncpu = (u32)sysconf(_SC_NPROCESSORS_CONF); + if (process_ncpu > CPU_SETSIZE) { + fprintf(stderr, "%s: can use only %zu cores\n", + __func__, (size_t)CPU_SETSIZE); + process_ncpu = CPU_SETSIZE; + } + thread_set_name(pthread_self(), "main"); +} + + static inline int +thread_getaffinity_set(cpu_set_t * const cpuset) +{ +#if defined(__linux__) + return sched_getaffinity(0, sizeof(*cpuset), cpuset); +#elif defined(__FreeBSD__) + return cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(*cpuset), cpuset); +#elif defined(__APPLE__) && defined(__MACH__) + *cpuset = (1lu << process_ncpu) - 1; + return (int)process_ncpu; // TODO +#endif // OS +} + + static inline int +thread_setaffinity_set(const cpu_set_t * const cpuset) +{ +#if defined(__linux__) + return sched_setaffinity(0, sizeof(*cpuset), cpuset); +#elif defined(__FreeBSD__) + return cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(*cpuset), cpuset); +#elif defined(__APPLE__) && defined(__MACH__) + (void)cpuset; // TODO + return 0; +#endif // OS +} + + void +thread_get_name(const pthread_t pt, char * const name, const size_t len) +{ +#if defined(__linux__) + pthread_getname_np(pt, name, len); +#elif defined(__FreeBSD__) + pthread_get_name_np(pt, name, len); +#elif defined(__APPLE__) && defined(__MACH__) + (void)pt; + (void)len; + strcpy(name, "unknown"); // TODO +#endif // OS +} + + void +thread_set_name(const pthread_t pt, const char * const name) +{ +#if defined(__linux__) + pthread_setname_np(pt, name); +#elif defined(__FreeBSD__) + pthread_set_name_np(pt, name); +#elif defined(__APPLE__) && defined(__MACH__) + (void)pt; + (void)name; // TODO +#endif // OS +} + +// kB + long +process_get_rss(void) +{ + struct rusage rs; + getrusage(RUSAGE_SELF, &rs); + return rs.ru_maxrss; +} + + u32 +process_affinity_count(void) +{ + cpu_set_t set; + if (thread_getaffinity_set(&set) != 0) + return process_ncpu; + + const u32 nr = (u32)CPU_COUNT(&set); + return nr ? nr : process_ncpu; +} + + u32 +process_getaffinity_list(const u32 max, u32 * const cores) +{ + memset(cores, 0, max * sizeof(cores[0])); + cpu_set_t set; + if (thread_getaffinity_set(&set) != 0) + return 0; + + const u32 nr_affinity = (u32)CPU_COUNT(&set); + const u32 nr = nr_affinity < max ? nr_affinity : max; + u32 j = 0; + for (u32 i = 0; i < process_ncpu; i++) { + if (CPU_ISSET(i, &set)) + cores[j++] = i; + + if (j >= nr) + break; + } + return j; +} + + void +thread_setaffinity_list(const u32 nr, const u32 * const list) +{ + cpu_set_t set; + CPU_ZERO(&set); + for (u32 i = 0; i < nr; i++) + if (list[i] < process_ncpu) + CPU_SET(list[i], &set); + thread_setaffinity_set(&set); +} + + void +thread_pin(const u32 cpu) +{ + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(cpu % process_ncpu, &set); + thread_setaffinity_set(&set); +} + + u64 +process_cpu_time_usec(void) +{ + struct rusage rs; + getrusage(RUSAGE_SELF, &rs); + const u64 usr = (((u64)rs.ru_utime.tv_sec) * 1000000lu) + ((u64)rs.ru_utime.tv_usec); + const u64 sys = (((u64)rs.ru_stime.tv_sec) * 1000000lu) + ((u64)rs.ru_stime.tv_usec); + return usr + sys; +} + +struct fork_join_info { + u32 total; + u32 ncores; + u32 * cores; + void *(*func)(void *); + bool args; + union { + void * arg1; + void ** argn; + }; + union { + struct { au32 ferr, jerr; }; + au64 xerr; + }; +}; + +// DON'T CHANGE! +#define FORK_JOIN_RANK_BITS ((16)) // 16 +#define FORK_JOIN_MAX ((1u << FORK_JOIN_RANK_BITS)) + +/* + * fj(6): T0 + * / \ + * T0 T4 + * / \ / + * T0 T2 T4 + * / \ / \ / \ + * t0 t1 t2 t3 t4 t5 + */ + +// recursive tree fork-join + static void * +thread_do_fork_join_worker(void * const ptr) +{ + struct entry13 fjp = {.ptr = ptr}; + // GCC: Without explicitly casting from fjp.fji (a 45-bit u64 value), + // the high bits will get truncated, which is always CORRECT in gcc. + // Don't use gcc. + struct fork_join_info * const fji = u64_to_ptr(fjp.e3); + const u32 rank = (u32)fjp.e1; + + const u32 nchild = (u32)__builtin_ctz(rank ? rank : bits_p2_up_u32(fji->total)); + debug_assert(nchild <= FORK_JOIN_RANK_BITS); + pthread_t tids[FORK_JOIN_RANK_BITS]; + if (nchild) { + cpu_set_t set; + CPU_ZERO(&set); + pthread_attr_t attr; + pthread_attr_init(&attr); + //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); // Joinable by default + // fork top-down + for (u32 i = nchild - 1; i < nchild; i--) { + const u32 cr = rank + (1u << i); // child's rank + if (cr >= fji->total) + continue; // should not break + const u32 core = fji->cores[(cr < fji->ncores) ? cr : (cr % fji->ncores)]; + CPU_SET(core, &set); + pthread_attr_setaffinity_np(&attr, sizeof(set), &set); + fjp.e1 = (u16)cr; + const int r = pthread_create(&tids[i], &attr, thread_do_fork_join_worker, fjp.ptr); + CPU_CLR(core, &set); + if (unlikely(r)) { // fork failed + memset(&tids[0], 0, sizeof(tids[0]) * (i+1)); + u32 nmiss = (1u << (i + 1)) - 1; + if ((rank + nmiss) >= fji->total) + nmiss = fji->total - 1 - rank; + (void)atomic_fetch_add_explicit(&fji->ferr, nmiss, MO_RELAXED); + break; + } + } + pthread_attr_destroy(&attr); + } + + char thname0[16]; + char thname1[16]; + thread_get_name(pthread_self(), thname0, 16); + snprintf(thname1, 16, "%.8s_%u", thname0, rank); + thread_set_name(pthread_self(), thname1); + + void * const ret = fji->func(fji->args ? fji->argn[rank] : fji->arg1); + + thread_set_name(pthread_self(), thname0); + // join bottom-up + for (u32 i = 0; i < nchild; i++) { + const u32 cr = rank + (1u << i); // child rank + if (cr >= fji->total) + break; // safe to break + if (tids[i]) { + const int r = pthread_join(tids[i], NULL); + if (unlikely(r)) { // error + //fprintf(stderr, "pthread_join %u..%u = %d: %s\n", rank, cr, r, strerror(r)); + (void)atomic_fetch_add_explicit(&fji->jerr, 1, MO_RELAXED); + } + } + } + return ret; +} + + u64 +thread_fork_join(u32 nr, void *(*func) (void *), const bool args, void * const argx) +{ + if (unlikely(nr > FORK_JOIN_MAX)) { + fprintf(stderr, "%s reduce nr to %u\n", __func__, FORK_JOIN_MAX); + nr = FORK_JOIN_MAX; + } + + u32 cores[CPU_SETSIZE]; + u32 ncores = process_getaffinity_list(process_ncpu, cores); + if (unlikely(ncores == 0)) { // force to use all cores + ncores = process_ncpu; + for (u32 i = 0; i < process_ncpu; i++) + cores[i] = i; + } + if (unlikely(nr == 0)) + nr = ncores; + + // the compiler does not know fji can change since we cast &fji into fjp + struct fork_join_info fji = {.total = nr, .cores = cores, .ncores = ncores, + .func = func, .args = args, .arg1 = argx}; + const struct entry13 fjp = entry13(0, (u64)(&fji)); + + // save current affinity + cpu_set_t set0; + thread_getaffinity_set(&set0); + + // master thread shares thread0's core + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(fji.cores[0], &set); + thread_setaffinity_set(&set); + + const u64 t0 = time_nsec(); + (void)thread_do_fork_join_worker(fjp.ptr); + const u64 dt = time_diff_nsec(t0); + + // restore original affinity + thread_setaffinity_set(&set0); + + // check and report errors (unlikely) + if (atomic_load_explicit(&fji.xerr, MO_CONSUME)) + fprintf(stderr, "%s errors: fork %u join %u\n", __func__, fji.ferr, fji.jerr); + return dt; +} + + int +thread_create_at(const u32 cpu, pthread_t * const thread, + void *(*start_routine) (void *), void * const arg) +{ + const u32 cpu_id = (cpu < process_ncpu) ? cpu : (cpu % process_ncpu); + pthread_attr_t attr; + pthread_attr_init(&attr); + //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(cpu_id, &set); + pthread_attr_setaffinity_np(&attr, sizeof(set), &set); + const int r = pthread_create(thread, &attr, start_routine, arg); + pthread_attr_destroy(&attr); + return r; +} +// }}} process/thread + +// locking {{{ + +// spinlock {{{ +#if defined(__linux__) +#define SPINLOCK_PTHREAD +#endif // __linux__ + +#if defined(SPINLOCK_PTHREAD) +static_assert(sizeof(pthread_spinlock_t) <= sizeof(spinlock), "spinlock size"); +#else // SPINLOCK_PTHREAD +static_assert(sizeof(au32) <= sizeof(spinlock), "spinlock size"); +#endif // SPINLOCK_PTHREAD + + void +spinlock_init(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_init(p, PTHREAD_PROCESS_PRIVATE); +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + atomic_store_explicit(p, 0, MO_RELEASE); +#endif // SPINLOCK_PTHREAD +} + + inline void +spinlock_lock(spinlock * const lock) +{ +#if defined(CORR) +#pragma nounroll + while (!spinlock_trylock(lock)) + corr_yield(); +#else // CORR +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_lock(p); // return value ignored +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; +#pragma nounroll + do { + if (atomic_fetch_sub_explicit(p, 1, MO_ACQUIRE) == 0) + return; +#pragma nounroll + do { + cpu_pause(); + } while (atomic_load_explicit(p, MO_CONSUME)); + } while (true); +#endif // SPINLOCK_PTHREAD +#endif // CORR +} + + inline bool +spinlock_trylock(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + return !pthread_spin_trylock(p); +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + return atomic_fetch_sub_explicit(p, 1, MO_ACQUIRE) == 0; +#endif // SPINLOCK_PTHREAD +} + + inline void +spinlock_unlock(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_unlock(p); // return value ignored +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + atomic_store_explicit(p, 0, MO_RELEASE); +#endif // SPINLOCK_PTHREAD +} +// }}} spinlock + +// pthread mutex {{{ +static_assert(sizeof(pthread_mutex_t) <= sizeof(mutex), "mutexlock size"); + inline void +mutex_init(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_init(p, NULL); +} + + inline void +mutex_lock(mutex * const lock) +{ +#if defined(CORR) +#pragma nounroll + while (!mutex_trylock(lock)) + corr_yield(); +#else + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_lock(p); // return value ignored +#endif +} + + inline bool +mutex_trylock(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + return !pthread_mutex_trylock(p); // return value ignored +} + + inline void +mutex_unlock(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_unlock(p); // return value ignored +} + + inline void +mutex_deinit(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_destroy(p); +} +// }}} pthread mutex + +// rwdep {{{ +// poor man's lockdep for rwlock +// per-thread lock list +// it calls debug_die() when local double-(un)locking is detected +// cyclic dependencies can be manually identified by looking at the two lists below in gdb +#ifdef RWDEP +#define RWDEP_NR ((16)) +__thread const rwlock * rwdep_readers[RWDEP_NR] = {}; +__thread const rwlock * rwdep_writers[RWDEP_NR] = {}; + + static void +rwdep_check(const rwlock * const lock) +{ + debug_assert(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == lock) + debug_die(); + if (rwdep_writers[i] == lock) + debug_die(); + } +} +#endif // RWDEP + + static void +rwdep_lock_read(const rwlock * const lock) +{ +#ifdef RWDEP + rwdep_check(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == NULL) { + rwdep_readers[i] = lock; + return; + } + } +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_unlock_read(const rwlock * const lock) +{ +#ifdef RWDEP + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == lock) { + rwdep_readers[i] = NULL; + return; + } + } + debug_die(); +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_lock_write(const rwlock * const lock) +{ +#ifdef RWDEP + rwdep_check(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_writers[i] == NULL) { + rwdep_writers[i] = lock; + return; + } + } +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_unlock_write(const rwlock * const lock) +{ +#ifdef RWDEP + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_writers[i] == lock) { + rwdep_writers[i] = NULL; + return; + } + } + debug_die(); +#else + (void)lock; +#endif // RWDEP +} +// }}} rwlockdep + +// rwlock {{{ +typedef au32 lock_t; +typedef u32 lock_v; +static_assert(sizeof(lock_t) == sizeof(lock_v), "lock size"); +static_assert(sizeof(lock_t) <= sizeof(rwlock), "lock size"); + +#define RWLOCK_WSHIFT ((sizeof(lock_t) * 8 - 1)) +#define RWLOCK_WBIT ((((lock_v)1) << RWLOCK_WSHIFT)) + + inline void +rwlock_init(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + atomic_store_explicit(pvar, 0, MO_RELEASE); +} + + inline bool +rwlock_trylock_read(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + if ((atomic_fetch_add_explicit(pvar, 1, MO_ACQUIRE) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } else { + atomic_fetch_sub_explicit(pvar, 1, MO_RELAXED); + return false; + } +} + + inline bool +rwlock_trylock_read_lp(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + if (atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT) { + cpu_pause(); + return false; + } + return rwlock_trylock_read(lock); +} + +// actually nr + 1 + inline bool +rwlock_trylock_read_nr(rwlock * const lock, u16 nr) +{ + lock_t * const pvar = (typeof(pvar))lock; + if ((atomic_fetch_add_explicit(pvar, 1, MO_ACQUIRE) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } + +#pragma nounroll + do { // someone already locked; wait for a little while + cpu_pause(); + if ((atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } + } while (nr--); + + atomic_fetch_sub_explicit(pvar, 1, MO_RELAXED); + return false; +} + + inline void +rwlock_lock_read(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; +#pragma nounroll + do { + if (rwlock_trylock_read(lock)) + return; +#pragma nounroll + do { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } while (atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT); + } while (true); +} + + inline void +rwlock_unlock_read(rwlock * const lock) +{ + rwdep_unlock_read(lock); + lock_t * const pvar = (typeof(pvar))lock; + atomic_fetch_sub_explicit(pvar, 1, MO_RELEASE); +} + + inline bool +rwlock_trylock_write(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + lock_v v0 = atomic_load_explicit(pvar, MO_CONSUME); + if ((v0 == 0) && atomic_compare_exchange_weak_explicit(pvar, &v0, RWLOCK_WBIT, MO_ACQUIRE, MO_RELAXED)) { + rwdep_lock_write(lock); + return true; + } else { + return false; + } +} + +// actually nr + 1 + inline bool +rwlock_trylock_write_nr(rwlock * const lock, u16 nr) +{ +#pragma nounroll + do { + if (rwlock_trylock_write(lock)) + return true; + cpu_pause(); + } while (nr--); + return false; +} + + inline void +rwlock_lock_write(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; +#pragma nounroll + do { + if (rwlock_trylock_write(lock)) + return; +#pragma nounroll + do { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } while (atomic_load_explicit(pvar, MO_CONSUME)); + } while (true); +} + + inline bool +rwlock_trylock_write_hp(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + lock_v v0 = atomic_load_explicit(pvar, MO_CONSUME); + if (v0 >> RWLOCK_WSHIFT) + return false; + + if (atomic_compare_exchange_weak_explicit(pvar, &v0, v0|RWLOCK_WBIT, MO_ACQUIRE, MO_RELAXED)) { + rwdep_lock_write(lock); + // WBIT successfully marked; must wait for readers to leave + if (v0) { // saw active readers +#pragma nounroll + while (atomic_load_explicit(pvar, MO_CONSUME) != RWLOCK_WBIT) { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } + } + return true; + } else { + return false; + } +} + + inline bool +rwlock_trylock_write_hp_nr(rwlock * const lock, u16 nr) +{ +#pragma nounroll + do { + if (rwlock_trylock_write_hp(lock)) + return true; + cpu_pause(); + } while (nr--); + return false; +} + + inline void +rwlock_lock_write_hp(rwlock * const lock) +{ +#pragma nounroll + while (!rwlock_trylock_write_hp(lock)) { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } +} + + inline void +rwlock_unlock_write(rwlock * const lock) +{ + rwdep_unlock_write(lock); + lock_t * const pvar = (typeof(pvar))lock; + atomic_fetch_sub_explicit(pvar, RWLOCK_WBIT, MO_RELEASE); +} + + inline void +rwlock_write_to_read(rwlock * const lock) +{ + rwdep_unlock_write(lock); + rwdep_lock_read(lock); + lock_t * const pvar = (typeof(pvar))lock; + // +R -W + atomic_fetch_add_explicit(pvar, ((lock_v)1) - RWLOCK_WBIT, MO_ACQ_REL); +} + +#undef RWLOCK_WSHIFT +#undef RWLOCK_WBIT +// }}} rwlock + +// }}} locking + +// coroutine {{{ + +// asm {{{ +#if defined(__x86_64__) +// number pushes in co_switch_stack +#define CO_CONTEXT_SIZE ((6)) + +// for switch/exit: pass a return value to the target +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_switch_stack;" + ".type co_switch_stack, @function;" + "co_switch_stack:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_switch_stack;" + "_co_switch_stack:" +#else +#error Supported platforms: Linux/FreeBSD/Apple +#endif // OS + "push %rbp; push %rbx; push %r12;" + "push %r13; push %r14; push %r15;" + "mov %rsp, (%rdi);" + "mov %rsi, %rsp;" + "pop %r15; pop %r14; pop %r13;" + "pop %r12; pop %rbx; pop %rbp;" + "mov %rdx, %rax;" + "retq;" + ); + +#elif defined(__aarch64__) +// number pushes in co_switch_stack +#define CO_CONTEXT_SIZE ((20)) +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_switch_stack;" + ".type co_switch_stack, @function;" + "co_switch_stack:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_switch_stack;" + "_co_switch_stack:" +#else +#error supported platforms: Linux/FreeBSD/Apple +#endif // OS + "sub x8, sp, 160;" + "str x8, [x0];" + "stp x30, x19, [x8]; ldp x30, x19, [x1];" + "stp x20, x21, [x8, 16]; ldp x20, x21, [x1, 16];" + "stp x22, x23, [x8, 32]; ldp x22, x23, [x1, 32];" + "stp x24, x25, [x8, 48]; ldp x24, x25, [x1, 48];" + "stp x26, x27, [x8, 64]; ldp x26, x27, [x1, 64];" + "stp x28, x29, [x8, 80]; ldp x28, x29, [x1, 80];" + "stp d8, d9, [x8, 96]; ldp d8, d9, [x1, 96];" + "stp d10, d11, [x8, 112]; ldp d10, d11, [x1, 112];" + "stp d12, d13, [x8, 128]; ldp d12, d13, [x1, 128];" + "stp d14, d15, [x8, 144]; ldp d14, d15, [x1, 144];" + "add sp, x1, 160;" + "mov x0, x2;" + "br x30;" + ); + +extern void co_entry_aarch64(void); +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_entry_aarch64;" + ".type co_entry_aarch64, @function;" + "co_entry_aarch64:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_entry_aarch64;" + "_co_entry_aarch64:" +#else +#error supported platforms: Linux/FreeBSD/Apple +#endif // OS + "ldr x8, [sp, 0];" + "blr x8;" + "ldr x8, [sp, 8];" + "blr x8;" + "ldr x8, [sp, 16];" + "blr x8;" + ); +#else +#error supported CPUs: x86_64 or AArch64 +#endif // co_switch_stack x86_64 and aarch64 +// }}} asm + +// co {{{ +struct co { + u64 rsp; + void * priv; + u64 * host; // set host to NULL to exit + size_t stksz; +}; + +// not atomic: no concurrent access +// volatile: avoid caching of co_curr +static __thread struct co * volatile co_curr = NULL; // NULL in host + +// the stack sits under the struct co + static void +co_init(struct co * const co, void * func, void * priv, u64 * const host, + const u64 stksz, void * func_exit) +{ + debug_assert((stksz & 0x3f) == 0); // a multiple of 64 bytes + u64 * rsp = ((u64 *)co) - 4; + rsp[0] = (u64)func; + rsp[1] = (u64)func_exit; + rsp[2] = (u64)debug_die; + rsp[3] = 0; + + rsp -= CO_CONTEXT_SIZE; + +#if defined(__aarch64__) + rsp[0] = (u64)co_entry_aarch64; +#endif + + co->rsp = (u64)rsp; + co->priv = priv; + co->host = host; + co->stksz = stksz; +} + + static void +co_exit0(void) +{ + co_exit(0); +} + + struct co * +co_create(const u64 stacksize, void * func, void * priv, u64 * const host) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct co); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct co * const co = (typeof(co))(mem + stksz); + co_init(co, func, priv, host, stksz, co_exit0); + return co; +} + + inline void +co_reuse(struct co * const co, void * func, void * priv, u64 * const host) +{ + co_init(co, func, priv, host, co->stksz, co_exit0); +} + + inline struct co * +co_fork(void * func, void * priv) +{ + return co_curr ? co_create(co_curr->stksz, func, priv, co_curr->host) : NULL; +} + + inline void * +co_priv(void) +{ + return co_curr ? co_curr->priv : NULL; +} + +// the host calls this to enter a coroutine. + inline u64 +co_enter(struct co * const to, const u64 retval) +{ + debug_assert(co_curr == NULL); // must entry from the host + debug_assert(to && to->host); + u64 * const save = to->host; + co_curr = to; + const u64 ret = co_switch_stack(save, to->rsp, retval); + co_curr = NULL; + return ret; +} + +// switch from a coroutine to another coroutine +// co_curr must be valid +// the target will resume and receive the retval + inline u64 +co_switch_to(struct co * const to, const u64 retval) +{ + debug_assert(co_curr); + debug_assert(co_curr != to); + debug_assert(to && to->host); + struct co * const save = co_curr; + co_curr = to; + return co_switch_stack(&(save->rsp), to->rsp, retval); +} + +// switch from a coroutine to the host routine +// co_yield is now a c++ keyword... + inline u64 +co_back(const u64 retval) +{ + debug_assert(co_curr); + struct co * const save = co_curr; + co_curr = NULL; + return co_switch_stack(&(save->rsp), *(save->host), retval); +} + +#ifdef CO_STACK_CHECK + static void +co_stack_check(const u8 * const mem, const u64 stksz) +{ + const u64 * const mem64 = (typeof(mem64))mem; + const u64 size64 = stksz / sizeof(u64); + for (u64 i = 0; i < size64; i++) { + if (mem64[i] != 0x5c5c5c5c5c5c5c5clu) { + fprintf(stderr, "%s co stack usage: %lu/%lu\n", __func__, stksz - (i * sizeof(u64)), stksz); + break; + } + } +} +#endif // CO_STACK_CHECK + +// return to host and set host to NULL +__attribute__((noreturn)) + void +co_exit(const u64 retval) +{ + debug_assert(co_curr); +#ifdef CO_STACK_CHECK + const u64 stksz = co_curr->stksz; + u8 * const mem = ((u8 *)co_curr) - stksz; + co_stack_check(mem, stksz); +#endif // CO_STACK_CHECK + const u64 hostrsp = *(co_curr->host); + co_curr->host = NULL; + struct co * const save = co_curr; + co_curr = NULL; + (void)co_switch_stack(&(save->rsp), hostrsp, retval); + // return to co_enter + debug_die(); +} + +// host is set to NULL on exit + inline bool +co_valid(struct co * const co) +{ + return co->host != NULL; +} + +// return NULL on host + inline struct co * +co_self(void) +{ + return co_curr; +} + + inline void +co_destroy(struct co * const co) +{ + u8 * const mem = ((u8 *)co) - co->stksz; + free(mem); +} +// }}} co + +// corr {{{ +struct corr { + struct co co; + struct corr * next; + struct corr * prev; +}; + +// initial and link guest to the run-queue + struct corr * +corr_create(const u64 stacksize, void * func, void * priv, u64 * const host) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct corr); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct corr * const co = (typeof(co))(mem + stksz); + co_init(&(co->co), func, priv, host, stksz, corr_exit); + co->next = co; + co->prev = co; + return co; +} + + struct corr * +corr_link(const u64 stacksize, void * func, void * priv, struct corr * const prev) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct corr); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct corr * const co = (typeof(co))(mem + stksz); + co_init(&(co->co), func, priv, prev->co.host, stksz, corr_exit); + co->next = prev->next; + co->prev = prev; + co->prev->next = co; + co->next->prev = co; + return co; +} + + inline void +corr_reuse(struct corr * const co, void * func, void * priv, u64 * const host) +{ + co_init(&(co->co), func, priv, host, co->co.stksz, corr_exit); + co->next = co; + co->prev = co; +} + + inline void +corr_relink(struct corr * const co, void * func, void * priv, struct corr * const prev) +{ + co_init(&(co->co), func, priv, prev->co.host, co->co.stksz, corr_exit); + co->next = prev->next; + co->prev = prev; + co->prev->next = co; + co->next->prev = co; +} + + inline void +corr_enter(struct corr * const co) +{ + (void)co_enter(&(co->co), 0); +} + + inline void +corr_yield(void) +{ + struct corr * const curr = (typeof(curr))co_curr; + if (curr && (curr->next != curr)) + (void)co_switch_to(&(curr->next->co), 0); +} + +__attribute__((noreturn)) + inline void +corr_exit(void) +{ + debug_assert(co_curr); +#ifdef CO_STACK_CHECK + const u64 stksz = co_curr->stksz; + const u8 * const mem = ((u8 *)(co_curr)) - stksz; + co_stack_check(mem, stksz); +#endif // CO_STACK_CHECK + + struct corr * const curr = (typeof(curr))co_curr; + if (curr->next != curr) { // have more corr + struct corr * const next = curr->next; + struct corr * const prev = curr->prev; + next->prev = prev; + prev->next = next; + curr->next = NULL; + curr->prev = NULL; + curr->co.host = NULL; // invalidate + (void)co_switch_to(&(next->co), 0); + } else { // the last corr + co_exit0(); + } + debug_die(); +} + + inline void +corr_destroy(struct corr * const co) +{ + co_destroy(&(co->co)); +} +// }}} corr + +// }}} co + +// bits {{{ + inline u32 +bits_reverse_u32(const u32 v) +{ + const u32 v2 = __builtin_bswap32(v); + const u32 v3 = ((v2 & 0xf0f0f0f0u) >> 4) | ((v2 & 0x0f0f0f0fu) << 4); + const u32 v4 = ((v3 & 0xccccccccu) >> 2) | ((v3 & 0x33333333u) << 2); + const u32 v5 = ((v4 & 0xaaaaaaaau) >> 1) | ((v4 & 0x55555555u) << 1); + return v5; +} + + inline u64 +bits_reverse_u64(const u64 v) +{ + const u64 v2 = __builtin_bswap64(v); + const u64 v3 = ((v2 & 0xf0f0f0f0f0f0f0f0lu) >> 4) | ((v2 & 0x0f0f0f0f0f0f0f0flu) << 4); + const u64 v4 = ((v3 & 0xcccccccccccccccclu) >> 2) | ((v3 & 0x3333333333333333lu) << 2); + const u64 v5 = ((v4 & 0xaaaaaaaaaaaaaaaalu) >> 1) | ((v4 & 0x5555555555555555lu) << 1); + return v5; +} + + inline u64 +bits_rotl_u64(const u64 v, const u8 n) +{ + const u8 sh = n & 0x3f; + return (v << sh) | (v >> (64 - sh)); +} + + inline u64 +bits_rotr_u64(const u64 v, const u8 n) +{ + const u8 sh = n & 0x3f; + return (v >> sh) | (v << (64 - sh)); +} + + inline u32 +bits_rotl_u32(const u32 v, const u8 n) +{ + const u8 sh = n & 0x1f; + return (v << sh) | (v >> (32 - sh)); +} + + inline u32 +bits_rotr_u32(const u32 v, const u8 n) +{ + const u8 sh = n & 0x1f; + return (v >> sh) | (v << (32 - sh)); +} + + inline u64 +bits_p2_up_u64(const u64 v) +{ + // clz(0) is undefined + return (v > 1) ? (1lu << (64 - __builtin_clzl(v - 1lu))) : v; +} + + inline u32 +bits_p2_up_u32(const u32 v) +{ + // clz(0) is undefined + return (v > 1) ? (1u << (32 - __builtin_clz(v - 1u))) : v; +} + + inline u64 +bits_p2_down_u64(const u64 v) +{ + return v ? (1lu << (63 - __builtin_clzl(v))) : v; +} + + inline u32 +bits_p2_down_u32(const u32 v) +{ + return v ? (1u << (31 - __builtin_clz(v))) : v; +} + + inline u64 +bits_round_up(const u64 v, const u8 power) +{ + return (v + (1lu << power) - 1lu) >> power << power; +} + + inline u64 +bits_round_up_a(const u64 v, const u64 a) +{ + return (v + a - 1) / a * a; +} + + inline u64 +bits_round_down(const u64 v, const u8 power) +{ + return v >> power << power; +} + + inline u64 +bits_round_down_a(const u64 v, const u64 a) +{ + return v / a * a; +} +// }}} bits + +// vi128 {{{ +#if defined(__GNUC__) && __GNUC__ >= 7 +#define FALLTHROUGH __attribute__ ((fallthrough)) +#else +#define FALLTHROUGH ((void)0) +#endif /* __GNUC__ >= 7 */ + + inline u32 +vi128_estimate_u32(const u32 v) +{ + static const u8 t[] = {5,5,5,5, + 4,4,4,4,4,4,4, 3,3,3,3,3,3,3, + 2,2,2,2,2,2,2, 1,1,1,1,1,1,1}; + return v ? t[__builtin_clz(v)] : 2; + // 0 -> [0x80 0x00] the first byte is non-zero + + // nz bit range -> enc length offset in t[] + // 0 -> 2 special case + // 1 to 7 -> 1 31 to 25 + // 8 to 14 -> 2 24 to 18 + // 15 to 21 -> 3 17 to 11 + // 22 to 28 -> 4 10 to 4 + // 29 to 32 -> 5 3 to 0 +} + + u8 * +vi128_encode_u32(u8 * dst, u32 v) +{ + switch (vi128_estimate_u32(v)) { + case 5: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 4: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 3: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 2: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 1: + *(dst++) = (u8)v; + break; + default: + debug_die(); + break; + } + return dst; +} + + const u8 * +vi128_decode_u32(const u8 * src, u32 * const out) +{ + debug_assert(*src); + u32 r = 0; + for (u32 shift = 0; shift < 32; shift += 7) { + const u8 byte = *(src++); + r |= (((u32)(byte & 0x7f)) << shift); + if ((byte & 0x80) == 0) { // No more bytes to consume + *out = r; + return src; + } + } + *out = 0; + return NULL; // invalid +} + + inline u32 +vi128_estimate_u64(const u64 v) +{ + static const u8 t[] = {10, + 9,9,9,9,9,9,9, 8,8,8,8,8,8,8, 7,7,7,7,7,7,7, + 6,6,6,6,6,6,6, 5,5,5,5,5,5,5, 4,4,4,4,4,4,4, + 3,3,3,3,3,3,3, 2,2,2,2,2,2,2, 1,1,1,1,1,1,1}; + return v ? t[__builtin_clzl(v)] : 2; +} + +// return ptr after the generated bytes + u8 * +vi128_encode_u64(u8 * dst, u64 v) +{ + switch (vi128_estimate_u64(v)) { + case 10: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 9: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 8: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 7: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 6: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 5: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 4: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 3: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 2: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 1: + *(dst++) = (u8)v; + break; + default: + debug_die(); + break; + } + return dst; +} + +// return ptr after the consumed bytes + const u8 * +vi128_decode_u64(const u8 * src, u64 * const out) +{ + u64 r = 0; + for (u32 shift = 0; shift < 64; shift += 7) { + const u8 byte = *(src++); + r |= (((u64)(byte & 0x7f)) << shift); + if ((byte & 0x80) == 0) { // No more bytes to consume + *out = r; + return src; + } + } + *out = 0; + return NULL; // invalid +} + +#undef FALLTHROUGH +// }}} vi128 + +// misc {{{ + inline struct entry13 +entry13(const u16 e1, const u64 e3) +{ + debug_assert((e3 >> 48) == 0); + return (struct entry13){.v64 = (e3 << 16) | e1}; +} + + inline void +entry13_update_e3(struct entry13 * const e, const u64 e3) +{ + debug_assert((e3 >> 48) == 0); + *e = entry13(e->e1, e3); +} + + inline void * +u64_to_ptr(const u64 v) +{ + return (void *)v; +} + + inline u64 +ptr_to_u64(const void * const ptr) +{ + return (u64)ptr; +} + +// portable malloc_usable_size + inline size_t +m_usable_size(void * const ptr) +{ +#if defined(__linux__) || defined(__FreeBSD__) + const size_t sz = malloc_usable_size(ptr); +#elif defined(__APPLE__) && defined(__MACH__) + const size_t sz = malloc_size(ptr); +#endif // OS + +#ifndef HEAPCHECKING + // valgrind and asan may return unaligned usable size + debug_assert((sz & 0x7lu) == 0); +#endif // HEAPCHECKING + + return sz; +} + + inline size_t +fdsize(const int fd) +{ + struct stat st; + st.st_size = 0; + if (fstat(fd, &st) != 0) + return 0; + + if (S_ISBLK(st.st_mode)) { +#if defined(__linux__) + ioctl(fd, BLKGETSIZE64, &st.st_size); +#elif defined(__APPLE__) && defined(__MACH__) + u64 blksz = 0; + u64 nblks = 0; + ioctl(fd, DKIOCGETBLOCKSIZE, &blksz); + ioctl(fd, DKIOCGETBLOCKCOUNT, &nblks); + st.st_size = (ssize_t)(blksz * nblks); +#elif defined(__FreeBSD__) + ioctl(fd, DIOCGMEDIASIZE, &st.st_size); +#endif // OS + } + + return (size_t)st.st_size; +} + + u32 +memlcp(const u8 * const p1, const u8 * const p2, const u32 max) +{ + const u32 max64 = max & (~7u); + u32 clen = 0; + while (clen < max64) { + const u64 v1 = *(const u64 *)(p1+clen); + const u64 v2 = *(const u64 *)(p2+clen); + const u64 x = v1 ^ v2; + if (x) + return clen + (u32)(__builtin_ctzl(x) >> 3); + + clen += sizeof(u64); + } + + if ((clen + sizeof(u32)) <= max) { + const u32 v1 = *(const u32 *)(p1+clen); + const u32 v2 = *(const u32 *)(p2+clen); + const u32 x = v1 ^ v2; + if (x) + return clen + (u32)(__builtin_ctz(x) >> 3); + + clen += sizeof(u32); + } + + while ((clen < max) && (p1[clen] == p2[clen])) + clen++; + return clen; +} + +static double logger_t0 = 0.0; + +__attribute__((constructor)) + static void +logger_init(void) +{ + logger_t0 = time_sec(); +} + +__attribute__ ((format (printf, 2, 3))) + void +logger_printf(const int fd, const char * const fmt, ...) +{ + char buf[4096]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + dprintf(fd, "%010.3lf %08x %s", time_diff_sec(logger_t0), crc32c_u64(0x12345678, (u64)pthread_self()), buf); +} +// }}} misc + +// astk {{{ +// atomic stack +struct acell { struct acell * next; }; + +// extract ptr from m value + static inline struct acell * +astk_ptr(const u64 m) +{ + return (struct acell *)(m >> 16); +} + +// calculate the new magic + static inline u64 +astk_m1(const u64 m0, struct acell * const ptr) +{ + return ((m0 + 1) & 0xfffflu) | (((u64)ptr) << 16); +} + +// calculate the new magic + static inline u64 +astk_m1_unsafe(struct acell * const ptr) +{ + return ((u64)ptr) << 16; +} + + static bool +astk_try_push(au64 * const pmagic, struct acell * const first, struct acell * const last) +{ + u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + last->next = astk_ptr(m0); + const u64 m1 = astk_m1(m0, first); + return atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_RELEASE, MO_RELAXED); +} + + static void +astk_push_safe(au64 * const pmagic, struct acell * const first, struct acell * const last) +{ + while (!astk_try_push(pmagic, first, last)); +} + + static void +astk_push_unsafe(au64 * const pmagic, struct acell * const first, + struct acell * const last) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + last->next = astk_ptr(m0); + const u64 m1 = astk_m1_unsafe(first); + atomic_store_explicit(pmagic, m1, MO_RELAXED); +} + +//// can fail for two reasons: (1) NULL: no available object; (2) ~0lu: contention +// static void * +//astk_try_pop(au64 * const pmagic) +//{ +// u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); +// struct acell * const ret = astk_ptr(m0); +// if (ret == NULL) +// return NULL; +// +// const u64 m1 = astk_m1(m0, ret->next); +// if (atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_ACQUIRE, MO_RELAXED)) +// return ret; +// else +// return (void *)(~0lu); +//} + + static void * +astk_pop_safe(au64 * const pmagic) +{ + do { + u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + struct acell * const ret = astk_ptr(m0); + if (ret == NULL) + return NULL; + + const u64 m1 = astk_m1(m0, ret->next); + if (atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_ACQUIRE, MO_RELAXED)) + return ret; + } while (true); +} + + static void * +astk_pop_unsafe(au64 * const pmagic) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + struct acell * const ret = astk_ptr(m0); + if (ret == NULL) + return NULL; + + const u64 m1 = astk_m1_unsafe(ret->next); + atomic_store_explicit(pmagic, m1, MO_RELAXED); + return (void *)ret; +} + + static void * +astk_peek_unsafe(au64 * const pmagic) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + return astk_ptr(m0); +} +// }}} astk + +// slab {{{ +#define SLAB_OBJ0_OFFSET ((64)) +struct slab { + au64 magic; // hi 48: ptr, lo 16: seq + u64 padding1[7]; + + // 2nd line + struct acell * head_active; // list of blocks in use or in magic + struct acell * head_backup; // list of unused full blocks + u64 nr_ready; // UNSAFE only! number of objects under magic + u64 padding2[5]; + + // 3rd line const + u64 obj_size; // const: aligned size of each object + u64 blk_size; // const: size of each memory block + u64 objs_per_slab; // const: number of objects in a slab + u64 obj0_offset; // const: offset of the first object in a block + u64 padding3[4]; + + // 4th line + union { + mutex lock; + u64 padding4[8]; + }; +}; +static_assert(sizeof(struct slab) == 256, "sizeof(struct slab) != 256"); + + static void +slab_add(struct slab * const slab, struct acell * const blk, const bool is_safe) +{ + // insert into head_active + blk->next = slab->head_active; + slab->head_active = blk; + + u8 * const base = ((u8 *)blk) + slab->obj0_offset; + struct acell * iter = (typeof(iter))base; // [0] + for (u64 i = 1; i < slab->objs_per_slab; i++) { + struct acell * const next = (typeof(next))(base + (i * slab->obj_size)); + iter->next = next; + iter = next; + } + + // base points to the first block; iter points to the last block + if (is_safe) { // other threads can poll magic + astk_push_safe(&slab->magic, (struct acell *)base, iter); + } else { // unsafe + astk_push_unsafe(&slab->magic, (struct acell *)base, iter); + slab->nr_ready += slab->objs_per_slab; + } +} + +// critical section; call with lock + static bool +slab_expand(struct slab * const slab, const bool is_safe) +{ + struct acell * const old = slab->head_backup; + if (old) { // pop old from backup and add + slab->head_backup = old->next; + slab_add(slab, old, is_safe); + } else { // more core + size_t blk_size; + struct acell * const new = pages_alloc_best(slab->blk_size, true, &blk_size); + (void)blk_size; + if (new == NULL) + return false; + + slab_add(slab, new, is_safe); + } + return true; +} + +// return 0 on failure; otherwise, obj0_offset + static u64 +slab_check_sizes(const u64 obj_size, const u64 blk_size) +{ + // obj must be non-zero and 8-byte aligned + // blk must be at least of page size and power of 2 + if ((!obj_size) || (obj_size % 8lu) || (blk_size < 4096lu) || (blk_size & (blk_size - 1))) + return 0; + + // each slab should have at least one object + const u64 obj0_offset = (obj_size & (obj_size - 1)) ? SLAB_OBJ0_OFFSET : obj_size; + if (obj0_offset >= blk_size || (blk_size - obj0_offset) < obj_size) + return 0; + + return obj0_offset; +} + + static void +slab_init_internal(struct slab * const slab, const u64 obj_size, const u64 blk_size, const u64 obj0_offset) +{ + memset(slab, 0, sizeof(*slab)); + slab->obj_size = obj_size; + slab->blk_size = blk_size; + slab->objs_per_slab = (blk_size - obj0_offset) / obj_size; + debug_assert(slab->objs_per_slab); // >= 1 + slab->obj0_offset = obj0_offset; + mutex_init(&(slab->lock)); +} + + struct slab * +slab_create(const u64 obj_size, const u64 blk_size) +{ + const u64 obj0_offset = slab_check_sizes(obj_size, blk_size); + if (!obj0_offset) + return NULL; + + struct slab * const slab = yalloc(sizeof(*slab)); + if (slab == NULL) + return NULL; + + slab_init_internal(slab, obj_size, blk_size, obj0_offset); + return slab; +} + +// unsafe + bool +slab_reserve_unsafe(struct slab * const slab, const u64 nr) +{ + while (slab->nr_ready < nr) + if (!slab_expand(slab, false)) + return false; + return true; +} + + void * +slab_alloc_unsafe(struct slab * const slab) +{ + void * ret = astk_pop_unsafe(&slab->magic); + if (ret == NULL) { + if (!slab_expand(slab, false)) + return NULL; + ret = astk_pop_unsafe(&slab->magic); + } + debug_assert(ret); + slab->nr_ready--; + return ret; +} + + void * +slab_alloc_safe(struct slab * const slab) +{ + void * ret = astk_pop_safe(&slab->magic); + if (ret) + return ret; + + mutex_lock(&slab->lock); + do { + ret = astk_pop_safe(&slab->magic); // may already have new objs + if (ret) + break; + if (!slab_expand(slab, true)) + break; + } while (true); + mutex_unlock(&slab->lock); + return ret; +} + + void +slab_free_unsafe(struct slab * const slab, void * const ptr) +{ + debug_assert(ptr); + astk_push_unsafe(&slab->magic, ptr, ptr); + slab->nr_ready++; +} + + void +slab_free_safe(struct slab * const slab, void * const ptr) +{ + astk_push_safe(&slab->magic, ptr, ptr); +} + +// UNSAFE + void +slab_free_all(struct slab * const slab) +{ + slab->magic = 0; + slab->nr_ready = 0; // backup does not count + + if (slab->head_active) { + struct acell * iter = slab->head_active; + while (iter->next) + iter = iter->next; + // now iter points to the last blk + iter->next = slab->head_backup; // active..backup + slab->head_backup = slab->head_active; // backup gets all + slab->head_active = NULL; // empty active + } +} + +// unsafe + u64 +slab_get_nalloc(struct slab * const slab) +{ + struct acell * iter = slab->head_active; + u64 n = 0; + while (iter) { + n++; + iter = iter->next; + } + n *= slab->objs_per_slab; + + iter = astk_peek_unsafe(&slab->magic); + while (iter) { + n--; + iter = iter->next; + } + return n; +} + + static void +slab_deinit(struct slab * const slab) +{ + debug_assert(slab); + struct acell * iter = slab->head_active; + while (iter) { + struct acell * const next = iter->next; + pages_unmap(iter, slab->blk_size); + iter = next; + } + iter = slab->head_backup; + while (iter) { + struct acell * const next = iter->next; + pages_unmap(iter, slab->blk_size); + iter = next; + } +} + + void +slab_destroy(struct slab * const slab) +{ + slab_deinit(slab); + free(slab); +} +// }}} slab + +// string {{{ +static union { u16 v16; u8 v8[2]; } strdec_table[100]; + +__attribute__((constructor)) + static void +strdec_init(void) +{ + for (u8 i = 0; i < 100; i++) { + const u8 hi = (typeof(hi))('0' + (i / 10)); + const u8 lo = (typeof(lo))('0' + (i % 10)); + strdec_table[i].v8[0] = hi; + strdec_table[i].v8[1] = lo; + } +} + +// output 10 bytes + void +strdec_32(void * const out, const u32 v) +{ + u32 vv = v; + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 4; i <= 4; i--) { // x5 + ptr[i] = strdec_table[vv % 100].v16; + vv /= 100u; + } +} + +// output 20 bytes + void +strdec_64(void * const out, const u64 v) +{ + u64 vv = v; + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 9; i <= 9; i--) { // x10 + ptr[i] = strdec_table[vv % 100].v16; + vv /= 100; + } +} + +static const u8 strhex_table_16[16] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + +#if defined(__x86_64__) + static inline m128 +strhex_helper(const u64 v) +{ + static const u8 mask1[16] = {15,7,14,6,13,5,12,4,11,3,10,2,9,1,8,0}; + + const m128 tmp = _mm_set_epi64x((s64)(v>>4), (s64)v); // mm want s64 + const m128 hilo = _mm_and_si128(tmp, _mm_set1_epi8(0xf)); + const m128 bin = _mm_shuffle_epi8(hilo, _mm_load_si128((void *)mask1)); + const m128 str = _mm_shuffle_epi8(_mm_load_si128((const void *)strhex_table_16), bin); + return str; +} +#elif defined(__aarch64__) + static inline m128 +strhex_helper(const u64 v) +{ + static const u8 mask1[16] = {15,7,14,6,13,5,12,4,11,3,10,2,9,1,8,0}; + u64 v2[2] = {v, v>>4}; + const m128 tmp = vld1q_u8((u8 *)v2); + const m128 hilo = vandq_u8(tmp, vdupq_n_u8(0xf)); + const m128 bin = vqtbl1q_u8(hilo, vld1q_u8(mask1)); + const m128 str = vqtbl1q_u8(vld1q_u8(strhex_table_16), bin); + return str; +} +#else +static u16 strhex_table_256[256]; + +__attribute__((constructor)) + static void +strhex_init(void) +{ + for (u64 i = 0; i < 256; i++) + strhex_table_256[i] = (((u16)strhex_table_16[i & 0xf]) << 8) | (strhex_table_16[i>>4]); +} +#endif // __x86_64__ + +// output 8 bytes + void +strhex_32(void * const out, u32 v) +{ +#if defined(__x86_64__) + const m128 str = strhex_helper((u64)v); + _mm_storel_epi64(out, _mm_srli_si128(str, 8)); +#elif defined(__aarch64__) + const m128 str = strhex_helper((u64)v); + vst1q_lane_u64(out, vreinterpretq_u64_u8(str), 1); +#else + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 0; i < 4; i++) { + ptr[3-i] = strhex_table_256[v & 0xff]; + v >>= 8; + } +#endif +} + +// output 16 bytes // buffer must be aligned by 16B + void +strhex_64(void * const out, u64 v) +{ +#if defined(__x86_64__) + const m128 str = strhex_helper(v); + _mm_storeu_si128(out, str); +#elif defined(__aarch64__) + const m128 str = strhex_helper(v); + vst1q_u8(out, str); +#else + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 0; i < 8; i++) { + ptr[7-i] = strhex_table_256[v & 0xff]; + v >>= 8; + } +#endif +} + +// string to u64 + inline u64 +a2u64(const void * const str) +{ + return strtoull(str, NULL, 10); +} + + inline u32 +a2u32(const void * const str) +{ + return (u32)strtoull(str, NULL, 10); +} + + inline s64 +a2s64(const void * const str) +{ + return strtoll(str, NULL, 10); +} + + inline s32 +a2s32(const void * const str) +{ + return (s32)strtoll(str, NULL, 10); +} + + void +str_print_hex(FILE * const out, const void * const data, const u32 len) +{ + const u8 * const ptr = data; + const u32 strsz = len * 3; + u8 * const buf = malloc(strsz); + for (u32 i = 0; i < len; i++) { + buf[i*3] = ' '; + buf[i*3+1] = strhex_table_16[ptr[i]>>4]; + buf[i*3+2] = strhex_table_16[ptr[i] & 0xf]; + } + fwrite(buf, strsz, 1, out); + free(buf); +} + + void +str_print_dec(FILE * const out, const void * const data, const u32 len) +{ + const u8 * const ptr = data; + const u32 strsz = len * 4; + u8 * const buf = malloc(strsz); + for (u32 i = 0; i < len; i++) { + const u8 v = ptr[i]; + buf[i*4] = ' '; + const u8 v1 = v / 100u; + const u8 v23 = v % 100u; + buf[i*4+1] = (u8)'0' + v1; + buf[i*4+2] = (u8)'0' + (v23 / 10u); + buf[i*4+3] = (u8)'0' + (v23 % 10u); + } + fwrite(buf, strsz, 1, out); + free(buf); +} + +// returns a NULL-terminated list of string tokens. +// After use you only need to free the returned pointer (char **). + char ** +strtoks(const char * const str, const char * const delim) +{ + if (str == NULL) + return NULL; + size_t nptr_alloc = 32; + char ** tokens = malloc(sizeof(tokens[0]) * nptr_alloc); + if (tokens == NULL) + return NULL; + const size_t bufsize = strlen(str) + 1; + char * const buf = malloc(bufsize); + if (buf == NULL) + goto fail_buf; + + memcpy(buf, str, bufsize); + char * saveptr = NULL; + char * tok = strtok_r(buf, delim, &saveptr); + size_t ntoks = 0; + while (tok) { + if (ntoks >= nptr_alloc) { + nptr_alloc += 32; + char ** const r = realloc(tokens, sizeof(tokens[0]) * nptr_alloc); + if (r == NULL) + goto fail_realloc; + + tokens = r; + } + tokens[ntoks] = tok; + ntoks++; + tok = strtok_r(NULL, delim, &saveptr); + } + tokens[ntoks] = NULL; + const size_t nptr = ntoks + 1; // append a NULL + const size_t rsize = (sizeof(tokens[0]) * nptr) + bufsize; + char ** const r = realloc(tokens, rsize); + if (r == NULL) + goto fail_realloc; + + tokens = r; + char * const dest = (char *)(&(tokens[nptr])); + memcpy(dest, buf, bufsize); + for (u64 i = 0; i < ntoks; i++) + tokens[i] += (dest - buf); + + free(buf); + return tokens; + +fail_realloc: + free(buf); +fail_buf: + free(tokens); + return NULL; +} + + u32 +strtoks_count(const char * const * const toks) +{ + if (!toks) + return 0; + u32 n = 0; + while (toks[n++]); + return n; +} +// }}} string + +// qsbr {{{ +#define QSBR_STATES_NR ((23)) // shard capacity; valid values are 3*8-1 == 23; 5*8-1 == 39; 7*8-1 == 55 +#define QSBR_SHARD_BITS ((5)) // 2^n shards +#define QSBR_SHARD_NR (((1u) << QSBR_SHARD_BITS)) +#define QSBR_SHARD_MASK ((QSBR_SHARD_NR - 1)) + +struct qsbr_ref_real { +#ifdef QSBR_DEBUG + pthread_t ptid; // 8 + u32 status; // 4 + u32 nbt; // 4 (number of backtrace frames) +#define QSBR_DEBUG_BTNR ((14)) + void * backtrace[QSBR_DEBUG_BTNR]; +#endif + au64 qstate; // user updates it + au64 * pptr; // internal only + struct qsbr_ref_real * park; +}; + +static_assert(sizeof(struct qsbr_ref) == sizeof(struct qsbr_ref_real), "sizeof qsbr_ref"); + +// Quiescent-State-Based Reclamation RCU +struct qsbr { + struct qsbr_ref_real target; + u64 padding0[5]; + struct qshard { + au64 bitmap; + au64 ptrs[QSBR_STATES_NR]; + } shards[QSBR_SHARD_NR]; +}; + + struct qsbr * +qsbr_create(void) +{ + struct qsbr * const q = yalloc(sizeof(*q)); + memset(q, 0, sizeof(*q)); + return q; +} + + static inline struct qshard * +qsbr_shard(struct qsbr * const q, void * const ptr) +{ + const u32 sid = crc32c_u64(0, (u64)ptr) & QSBR_SHARD_MASK; + debug_assert(sid < QSBR_SHARD_NR); + return &(q->shards[sid]); +} + + static inline void +qsbr_write_qstate(struct qsbr_ref_real * const ref, const u64 v) +{ + atomic_store_explicit(&ref->qstate, v, MO_RELAXED); +} + + bool +qsbr_register(struct qsbr * const q, struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + struct qshard * const shard = qsbr_shard(q, ref); + qsbr_write_qstate(ref, 0); + + do { + u64 bits = atomic_load_explicit(&shard->bitmap, MO_CONSUME); + const u32 pos = (u32)__builtin_ctzl(~bits); + if (unlikely(pos >= QSBR_STATES_NR)) + return false; + + const u64 bits1 = bits | (1lu << pos); + if (atomic_compare_exchange_weak_explicit(&shard->bitmap, &bits, bits1, MO_ACQUIRE, MO_RELAXED)) { + atomic_store_explicit(&shard->ptrs[pos], (u64)ref, MO_RELAXED); + //shard->ptrs[pos] = ref; + + ref->pptr = &(shard->ptrs[pos]); + ref->park = &q->target; +#ifdef QSBR_DEBUG + ref->ptid = (u64)pthread_self(); + ref->tid = 0; + ref->status = 1; + ref->nbt = backtrace(ref->backtrace, QSBR_DEBUG_BTNR); +#endif + return true; + } + } while (true); +} + + void +qsbr_unregister(struct qsbr * const q, struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + struct qshard * const shard = qsbr_shard(q, ref); + const u32 pos = (u32)(ref->pptr - shard->ptrs); + debug_assert(pos < QSBR_STATES_NR); + debug_assert(shard->bitmap & (1lu << pos)); + + atomic_store_explicit(&shard->ptrs[pos], (u64)(&q->target), MO_RELAXED); + //shard->ptrs[pos] = &q->target; + (void)atomic_fetch_and_explicit(&shard->bitmap, ~(1lu << pos), MO_RELEASE); +#ifdef QSBR_DEBUG + ref->tid = 0; + ref->ptid = 0; + ref->status = 0xffff; // unregistered + ref->nbt = 0; +#endif + ref->pptr = NULL; + // wait for qsbr_wait to leave if it's working on the shard + while (atomic_load_explicit(&shard->bitmap, MO_CONSUME) >> 63) + cpu_pause(); +} + + inline void +qsbr_update(struct qsbr_ref * const qref, const u64 v) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + debug_assert((*ref->pptr) == (u64)ref); // must be unparked + // rcu update does not require release or acquire order + qsbr_write_qstate(ref, v); +} + + inline void +qsbr_park(struct qsbr_ref * const qref) +{ + cpu_cfence(); + struct qsbr_ref_real * const ref = (typeof(ref))qref; + atomic_store_explicit(ref->pptr, (u64)ref->park, MO_RELAXED); +#ifdef QSBR_DEBUG + ref->status = 0xfff; // parked +#endif +} + + inline void +qsbr_resume(struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + atomic_store_explicit(ref->pptr, (u64)ref, MO_RELAXED); +#ifdef QSBR_DEBUG + ref->status = 0xf; // resumed +#endif + cpu_cfence(); +} + +// waiters needs external synchronization + void +qsbr_wait(struct qsbr * const q, const u64 target) +{ + cpu_cfence(); + qsbr_write_qstate(&q->target, target); + u64 cbits = 0; // check-bits; each bit corresponds to a shard + u64 bms[QSBR_SHARD_NR]; // copy of all bitmap + // take an unsafe snapshot of active users + for (u32 i = 0; i < QSBR_SHARD_NR; i++) { + bms[i] = atomic_load_explicit(&q->shards[i].bitmap, MO_CONSUME); + if (bms[i]) + cbits |= (1lu << i); // set to 1 if [i] has ptrs + } + + while (cbits) { + for (u64 ctmp = cbits; ctmp; ctmp &= (ctmp - 1)) { + // shard id + const u32 i = (u32)__builtin_ctzl(ctmp); + struct qshard * const shard = &(q->shards[i]); + const u64 bits1 = atomic_fetch_or_explicit(&(shard->bitmap), 1lu << 63, MO_ACQUIRE); + for (u64 bits = bms[i]; bits; bits &= (bits - 1)) { + const u64 bit = bits & -bits; // extract lowest bit + if ((bits1 & bit) == 0) { + bms[i] &= ~bit; + } else { + au64 * pptr = &(shard->ptrs[__builtin_ctzl(bit)]); + struct qsbr_ref_real * const ptr = (typeof(ptr))atomic_load_explicit(pptr, MO_RELAXED); + if (atomic_load_explicit(&(ptr->qstate), MO_CONSUME) == target) + bms[i] &= ~bit; + } + } + (void)atomic_fetch_and_explicit(&(shard->bitmap), ~(1lu << 63), MO_RELEASE); + if (bms[i] == 0) + cbits &= ~(1lu << i); + } +#if defined(CORR) + corr_yield(); +#endif + } + debug_assert(cbits == 0); + cpu_cfence(); +} + + void +qsbr_destroy(struct qsbr * const q) +{ + if (q) + free(q); +} +#undef QSBR_STATES_NR +#undef QSBR_BITMAP_NR +// }}} qsbr + +// vim:fdm=marker diff --git a/MassTrie-beta/wormhole/lib.h b/MassTrie-beta/wormhole/lib.h new file mode 100644 index 00000000..40a2f40d --- /dev/null +++ b/MassTrie-beta/wormhole/lib.h @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +// includes {{{ +// C headers +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// POSIX headers +#include +#include +#include + +// Linux headers +#include +#include +#include +#include + +// SIMD +#if defined(__x86_64__) +#include +#elif defined(__aarch64__) +#include +#include +#endif +// }}} includes + +#ifdef __cplusplus +extern "C" { +#endif + +// types {{{ +#ifndef typeof +#define typeof __typeof__ +#endif +#ifndef asm +#define asm __asm__ +#endif +typedef char s8; +typedef short s16; +typedef int s32; +typedef long s64; +typedef __int128_t s128; +static_assert(sizeof(s8) == 1, "sizeof(s8)"); +static_assert(sizeof(s16) == 2, "sizeof(s16)"); +static_assert(sizeof(s32) == 4, "sizeof(s32)"); +static_assert(sizeof(s64) == 8, "sizeof(s64)"); +static_assert(sizeof(s128) == 16, "sizeof(s128)"); + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long u64; +typedef __uint128_t u128; +static_assert(sizeof(u8) == 1, "sizeof(u8)"); +static_assert(sizeof(u16) == 2, "sizeof(u16)"); +static_assert(sizeof(u32) == 4, "sizeof(u32)"); +static_assert(sizeof(u64) == 8, "sizeof(u64)"); +static_assert(sizeof(u128) == 16, "sizeof(u128)"); + +#if defined(__x86_64__) +typedef __m128i m128; +#if defined(__AVX2__) +typedef __m256i m256; +#endif // __AVX2__ +#if defined(__AVX512F__) +typedef __m512i m512; +#endif // __AVX512F__ +#elif defined(__aarch64__) +typedef uint8x16_t m128; +#else +#error Need x86_64 or AArch64. +#endif +// }}} types + +// defs {{{ +#define likely(____x____) __builtin_expect(____x____, 1) +#define unlikely(____x____) __builtin_expect(____x____, 0) + +// ansi colors +// 3X:fg; 4X:bg; 9X:light fg; 10X:light bg; +// X can be one of the following colors: +// 0:black; 1:red; 2:green; 3:yellow; +// 4:blue; 5:magenta; 6:cyan; 7:white; +#define TERMCLR(____code____) "\x1b[" #____code____ "m" +// }}} defs + +// const {{{ +#define PGBITS ((12)) +#define PGSZ ((1lu << PGBITS)) +// }}} const + +// math {{{ + extern u64 +mhash64(const u64 v); + + extern u32 +mhash32(const u32 v); + + extern u64 +gcd64(u64 a, u64 b); +// }}} math + +// random {{{ + extern u64 +random_u64(void); + + extern void +srandom_u64(const u64 seed); + + extern double +random_double(void); +// }}} random + +// timing {{{ + extern u64 +time_nsec(void); + + extern double +time_sec(void); + + extern u64 +time_diff_nsec(const u64 last); + + extern double +time_diff_sec(const double last); + + extern void +time_stamp(char * str, const size_t size); + + extern void +time_stamp2(char * str, const size_t size); +// }}} timing + +// cpucache {{{ + extern void +cpu_pause(void); + + extern void +cpu_mfence(void); + + extern void +cpu_cfence(void); + + extern void +cpu_prefetch0(const void * const ptr); + + extern void +cpu_prefetch1(const void * const ptr); + + extern void +cpu_prefetch2(const void * const ptr); + + extern void +cpu_prefetch3(const void * const ptr); + + extern void +cpu_prefetchw(const void * const ptr); +// }}} cpucache + +// crc32c {{{ + extern u32 +crc32c_u8(const u32 crc, const u8 v); + + extern u32 +crc32c_u16(const u32 crc, const u16 v); + + extern u32 +crc32c_u32(const u32 crc, const u32 v); + + extern u32 +crc32c_u64(const u32 crc, const u64 v); + +// 1 <= nr <= 3 + extern u32 +crc32c_inc_123(const u8 * buf, u32 nr, u32 crc); + +// nr % 4 == 0 + extern u32 +crc32c_inc_x4(const u8 * buf, u32 nr, u32 crc); + + extern u32 +crc32c_inc(const u8 * buf, u32 nr, u32 crc); +// }}} crc32c + +// debug {{{ + extern void +debug_break(void); + + extern void +debug_backtrace(void); + + extern void +watch_u64_usr1(u64 * const ptr); + +#ifndef NDEBUG + extern void +debug_assert(const bool v); +#else +#define debug_assert(expr) ((void)0) +#endif + +__attribute__((noreturn)) + extern void +debug_die(void); + +__attribute__((noreturn)) + extern void +debug_die_perror(void); + + extern void +debug_dump_maps(FILE * const out); + + extern bool +debug_perf_switch(void); +// }}} debug + +// mm {{{ +#ifdef ALLOCFAIL + extern bool +alloc_fail(void); +#endif + + extern void * +xalloc(const size_t align, const size_t size); + + extern void * +yalloc(const size_t size); + + extern void ** +malloc_2d(const size_t nr, const size_t size); + + extern void ** +calloc_2d(const size_t nr, const size_t size); + + extern void +pages_unmap(void * const ptr, const size_t size); + + extern void +pages_lock(void * const ptr, const size_t size); + +/* hugepages */ +// force posix allocators: -DVALGRIND_MEMCHECK + extern void * +pages_alloc_4kb(const size_t nr_4kb); + + extern void * +pages_alloc_2mb(const size_t nr_2mb); + + extern void * +pages_alloc_1gb(const size_t nr_1gb); + + extern void * +pages_alloc_best(const size_t size, const bool try_1gb, u64 * const size_out); +// }}} mm + +// process/thread {{{ + extern void +thread_get_name(const pthread_t pt, char * const name, const size_t len); + + extern void +thread_set_name(const pthread_t pt, const char * const name); + + extern long +process_get_rss(void); + + extern u32 +process_affinity_count(void); + + extern u32 +process_getaffinity_list(const u32 max, u32 * const cores); + + extern void +thread_setaffinity_list(const u32 nr, const u32 * const list); + + extern void +thread_pin(const u32 cpu); + + extern u64 +process_cpu_time_usec(void); + +// if args == true, argx is void ** +// if args == false, argx is void * + extern u64 +thread_fork_join(u32 nr, void *(*func) (void *), const bool args, void * const argx); + + extern int +thread_create_at(const u32 cpu, pthread_t * const thread, void *(*start_routine) (void *), void * const arg); +// }}} process/thread + +// locking {{{ +typedef union { + u32 opaque; +} spinlock; + + extern void +spinlock_init(spinlock * const lock); + + extern void +spinlock_lock(spinlock * const lock); + + extern bool +spinlock_trylock(spinlock * const lock); + + extern void +spinlock_unlock(spinlock * const lock); + +typedef union { + u32 opaque; +} rwlock; + + extern void +rwlock_init(rwlock * const lock); + + extern bool +rwlock_trylock_read(rwlock * const lock); + +// low-priority reader-lock; use with trylock_write_hp + extern bool +rwlock_trylock_read_lp(rwlock * const lock); + + extern bool +rwlock_trylock_read_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_read(rwlock * const lock); + + extern void +rwlock_unlock_read(rwlock * const lock); + + extern bool +rwlock_trylock_write(rwlock * const lock); + + extern bool +rwlock_trylock_write_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_write(rwlock * const lock); + +// writer has higher priority; new readers are blocked + extern bool +rwlock_trylock_write_hp(rwlock * const lock); + + extern bool +rwlock_trylock_write_hp_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_write_hp(rwlock * const lock); + + extern void +rwlock_unlock_write(rwlock * const lock); + + extern void +rwlock_write_to_read(rwlock * const lock); + +typedef union { + u64 opqaue[8]; +} mutex; + + extern void +mutex_init(mutex * const lock); + + extern void +mutex_lock(mutex * const lock); + + extern bool +mutex_trylock(mutex * const lock); + + extern void +mutex_unlock(mutex * const lock); + + extern void +mutex_deinit(mutex * const lock); +// }}} locking + +// coroutine {{{ +extern u64 co_switch_stack(u64 * const saversp, const u64 newrsp, const u64 retval); + +struct co; + + extern struct co * +co_create(const u64 stacksize, void * func, void * priv, u64 * const host); + + extern void +co_reuse(struct co * const co, void * func, void * priv, u64 * const host); + + extern struct co * +co_fork(void * func, void * priv); + + extern void * +co_priv(void); + + extern u64 +co_enter(struct co * const to, const u64 retval); + + extern u64 +co_switch_to(struct co * const to, const u64 retval); + + extern u64 +co_back(const u64 retval); + + extern void +co_exit(const u64 retval); + + extern bool +co_valid(struct co * const co); + + extern struct co * +co_self(void); + + extern void +co_destroy(struct co * const co); + +struct corr; + + extern struct corr * +corr_create(const u64 stacksize, void * func, void * priv, u64 * const host); + + extern struct corr * +corr_link(const u64 stacksize, void * func, void * priv, struct corr * const prev); + + extern void +corr_reuse(struct corr * const co, void * func, void * priv, u64 * const host); + + extern void +corr_relink(struct corr * const co, void * func, void * priv, struct corr * const prev); + + extern void +corr_enter(struct corr * const co); + + extern void +corr_yield(void); + + extern void +corr_exit(void); + + extern void +corr_destroy(struct corr * const co); +// }}} coroutine + +// bits {{{ + extern u32 +bits_reverse_u32(const u32 v); + + extern u64 +bits_reverse_u64(const u64 v); + + extern u64 +bits_rotl_u64(const u64 v, const u8 n); + + extern u64 +bits_rotr_u64(const u64 v, const u8 n); + + extern u32 +bits_rotl_u32(const u32 v, const u8 n); + + extern u32 +bits_rotr_u32(const u32 v, const u8 n); + + extern u64 +bits_p2_up_u64(const u64 v); + + extern u32 +bits_p2_up_u32(const u32 v); + + extern u64 +bits_p2_down_u64(const u64 v); + + extern u32 +bits_p2_down_u32(const u32 v); + + extern u64 +bits_round_up(const u64 v, const u8 power); + + extern u64 +bits_round_up_a(const u64 v, const u64 a); + + extern u64 +bits_round_down(const u64 v, const u8 power); + + extern u64 +bits_round_down_a(const u64 v, const u64 a); +// }}} bits + +// vi128 {{{ + extern u32 +vi128_estimate_u32(const u32 v); + + extern u8 * +vi128_encode_u32(u8 * dst, u32 v); + + extern const u8 * +vi128_decode_u32(const u8 * src, u32 * const out); + + extern u32 +vi128_estimate_u64(const u64 v); + + extern u8 * +vi128_encode_u64(u8 * dst, u64 v); + + extern const u8 * +vi128_decode_u64(const u8 * src, u64 * const out); +// }}} vi128 + +// misc {{{ +// TODO: only works on little endian? +struct entry13 { // what a beautiful name + union { + u16 e1; + struct { // easy for debugging + u64 e1_64:16; + u64 e3:48; + }; + u64 v64; + void * ptr; + }; +}; + +static_assert(sizeof(struct entry13) == 8, "sizeof(entry13) != 8"); + +// directly access read .e1 and .e3 +// directly write .e1 +// use entry13_update() to update the entire entry + + extern struct entry13 +entry13(const u16 e1, const u64 e3); + + extern void +entry13_update_e3(struct entry13 * const e, const u64 e3); + + extern void * +u64_to_ptr(const u64 v); + + extern u64 +ptr_to_u64(const void * const ptr); + + extern size_t +m_usable_size(void * const ptr); + + extern size_t +fdsize(const int fd); + + extern u32 +memlcp(const u8 * const p1, const u8 * const p2, const u32 max); + +__attribute__ ((format (printf, 2, 3))) + extern void +logger_printf(const int fd, const char * const fmt, ...); +// }}} misc + +// slab {{{ +struct slab; + + extern struct slab * +slab_create(const u64 obj_size, const u64 blk_size); + + extern bool +slab_reserve_unsafe(struct slab * const slab, const u64 nr); + + extern void * +slab_alloc_unsafe(struct slab * const slab); + + extern void * +slab_alloc_safe(struct slab * const slab); + + extern void +slab_free_unsafe(struct slab * const slab, void * const ptr); + + extern void +slab_free_safe(struct slab * const slab, void * const ptr); + + extern void +slab_free_all(struct slab * const slab); + + extern u64 +slab_get_nalloc(struct slab * const slab); + + extern void +slab_destroy(struct slab * const slab); +// }}} slab + +// string {{{ +// XXX strdec_ and strhex_ functions does not append the trailing '\0' to the output string +// size of out should be >= 10 + extern void +strdec_32(void * const out, const u32 v); + +// size of out should be >= 20 + extern void +strdec_64(void * const out, const u64 v); + +// size of out should be >= 8 + extern void +strhex_32(void * const out, const u32 v); + +// size of out should be >= 16 + extern void +strhex_64(void * const out, const u64 v); + + extern u64 +a2u64(const void * const str); + + extern u32 +a2u32(const void * const str); + + extern s64 +a2s64(const void * const str); + + extern s32 +a2s32(const void * const str); + + extern void +str_print_hex(FILE * const out, const void * const data, const u32 len); + + extern void +str_print_dec(FILE * const out, const void * const data, const u32 len); + +// user should free returned ptr (and nothing else) after use + extern char ** +strtoks(const char * const str, const char * const delim); + + extern u32 +strtoks_count(const char * const * const toks); +// }}} string + +// qsbr {{{ +// QSBR vs EBR (Quiescent-State vs Epoch Based Reclaimation) +// QSBR: readers just use qsbr_update -> qsbr_update -> ... repeatedly +// EBR: readers use qsbr_update -> qsbr_park -> qsbr_resume -> qsbr_update -> ... +// The advantage of EBR is qsbr_park can happen much earlier than the next qsbr_update +// The disadvantage is the extra cost, a pair of park/resume is used in every iteration +struct qsbr; +struct qsbr_ref { +#ifdef QSBR_DEBUG + u64 debug[16]; +#endif + u64 opaque[3]; +}; + + extern struct qsbr * +qsbr_create(void); + +// every READER accessing the shared data must first register itself with the qsbr + extern bool +qsbr_register(struct qsbr * const q, struct qsbr_ref * const qref); + + extern void +qsbr_unregister(struct qsbr * const q, struct qsbr_ref * const qref); + +// For READER: mark the beginning of critical section; like rcu_read_lock() + extern void +qsbr_update(struct qsbr_ref * const qref, const u64 v); + +// temporarily stop access the shared data to avoid blocking writers +// READER can use qsbr_park (like rcu_read_unlock()) in conjunction with qsbr_update +// qsbr_park is roughly equivalent to qsbr_unregister, but faster + extern void +qsbr_park(struct qsbr_ref * const qref); + +// undo the effect of qsbr_park; must use it between qsbr_park and qsbr_update +// qsbr_resume is roughly equivalent to qsbr_register, but faster + extern void +qsbr_resume(struct qsbr_ref * const qref); + +// WRITER: wait until all the readers have announced v=target with qsbr_update + extern void +qsbr_wait(struct qsbr * const q, const u64 target); + + extern void +qsbr_destroy(struct qsbr * const q); +// }}} qsbr + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/MassTrie-beta/wormhole/libwh.so b/MassTrie-beta/wormhole/libwh.so new file mode 100644 index 00000000..2ecd7e7e Binary files /dev/null and b/MassTrie-beta/wormhole/libwh.so differ diff --git a/MassTrie-beta/wormhole/stresstest.c b/MassTrie-beta/wormhole/stresstest.c new file mode 100644 index 00000000..93fb6f05 --- /dev/null +++ b/MassTrie-beta/wormhole/stresstest.c @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2016-2020 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +#include "lib.h" +#include "kv.h" +#include "wh.h" +#include "ctypes.h" + +struct stress_info { + u64 nkeys; + u32 nloader; + u32 nunldr; + u32 nth; + u32 cpt; + bool has_iter; + + au64 seqno; + struct kv ** keys; + + const struct kvmap_api * api; + void * map; + au64 tot; + au64 wfail; + u64 endtime; +}; + + static void * +stress_load_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + srandom_u64(time_nsec() * time_nsec() / time_nsec()); + void * const ref = kvmap_ref(si->api, si->map); + const u64 seq = atomic_fetch_add(&si->seqno, 1); + const u64 n0 = si->nkeys / si->nloader * seq; + const u64 nz = (seq == (si->nloader - 1)) ? si->nkeys : (si->nkeys / si->nloader * (seq + 1)); + //printf("load worker %lu %lu\n", n0, nz-1); + + char * buf = malloc(128); + debug_assert(buf); + u64 * buf64 = (typeof(buf64))buf; + for (u64 i = n0; i < nz; i++) { + const u32 klen = (u32)(random_u64() & 0x3flu) + 8; + const u32 klen8 = (klen + 7) >> 3; + /* + buf64[0] = bswap_64(i); // little endian + for (u64 j = 1; j < klen8; j++) + buf64[j] = random_u64(); + */ + const u64 rkey = random_u64(); + for (u32 j = 0; j < klen8; j++) + buf64[j] = (rkey >> j) & 0x0101010101010101lu; + + si->keys[i] = kv_create(buf, klen, buf, 8); + if (si->keys[i] == NULL) + exit(0); + kvmap_kv_put(si->api, ref, si->keys[i]); + } + free(buf); + kvmap_unref(si->api, ref); + return NULL; +} + + static void * +stress_unload_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + const u64 seq = atomic_fetch_add(&si->seqno, 1); + const u64 n0 = si->nkeys / si->nunldr * seq; + const u64 nz = (seq == (si->nunldr - 1)) ? si->nkeys : (si->nkeys / si->nunldr * (seq + 1)); + + void * const ref = kvmap_ref(si->api, si->map); + for (u64 i = n0; i < nz; i++) { + kvmap_kv_del(si->api, ref, si->keys[i]); + free(si->keys[i]); + } + kvmap_unref(si->api, ref); + return NULL; +} + + static void +stress_inp_plus1(struct kv * const kv0, void * const priv) +{ + (void)priv; + if (kv0) { // can be NULL + u64 * ptr = kv_vptr(kv0); + ++(*ptr); + } +} + + static struct kv * +stress_merge_plus1(struct kv * const kv0, void * const priv) +{ + (void)priv; + if (kv0) { // can be NULL + u64 * ptr = kv_vptr(kv0); + ++(*ptr); + return kv0; + } else { + u64 * ptr = kv_vptr((struct kv *)priv); + *ptr = 0; + return priv; + } +} + + static void +stress_func(struct stress_info * const si) +{ + srandom_u64(time_nsec() * time_nsec() / time_nsec()); + const struct kvmap_api * const api = si->api; + void * ref = kvmap_ref(api, si->map); + struct kv * next = si->keys[random_u64() % si->nkeys]; + u64 rnext = random_u64() % si->nkeys; + struct kv * const tmp = malloc(128); + struct kref tmpkref; + struct kvref tmpkvref; + debug_assert(tmp); + void * iter = NULL; + if (api->iter_park) { + iter = api->iter_create(ref); + api->iter_park(iter); + } + u64 wfail1 = 0; + u64 nops = 0; +#define BATCHSIZE ((4096)) + do { + for (u64 i = 0; i < BATCHSIZE; i++) { + // reading kv keys leads to unnecessary cache misses + // use prefetch to minimize overhead on workload generation + struct kv * const key = next; + next = si->keys[rnext]; + cpu_prefetch0(next); + cpu_prefetch0(((u8 *)next) + 64); + rnext = random_u64() % si->nkeys; + cpu_prefetch0(&(si->keys[rnext])); + + // do probe + // customize your benchmark: do a mix of wh operations with switch-cases + const u64 r = random_u64() % 16; + switch (r) { + case 0: + kvmap_kv_probe(api, ref, key); + break; + case 1: + kvmap_kv_get(api, ref, key, tmp); + break; + case 2: + if (si->has_iter) { + if (api->iter_park == NULL) + iter = api->iter_create(ref); + debug_assert(iter); + kvmap_kv_iter_seek(api, iter, key); + api->iter_next(iter, tmp); + api->iter_peek(iter, tmp); + api->iter_skip(iter, 2); + // this is unsafe; only reader's lock is acquired + if (api->iter_inp) + api->iter_inp(iter, stress_inp_plus1, NULL); + // kref + if (api->iter_kref) + api->iter_kref(iter, &tmpkref); + // kvref + if (api->iter_kvref) + api->iter_kvref(iter, &tmpkvref); + // done + if (api->iter_park) + api->iter_park(iter); + else + api->iter_destroy(iter); + } + break; + case 3: + if (api->refpark) { + api->park(ref); + api->resume(ref); + } + break; + case 4: + if (api->iter_park) + api->iter_destroy(iter); + (void)kvmap_unref(api, ref); + ref = kvmap_ref(api, si->map); + if (api->iter_park) + iter = api->iter_create(ref); + break; + case 5: + if (api->merge) { + kv_dup2_key(key, tmp); + tmp->vlen = 8; + kvmap_kv_merge(api, ref, key, stress_merge_plus1, tmp); + } + break; + case 6: + if ((random_u64() & 0x7fffu) == 0x22 && api->delr) + (void)kvmap_kv_delr(api, ref, si->keys[rnext], (rnext + 10) < si->nkeys ? si->keys[rnext + 10] : NULL); + else + kvmap_kv_probe(api, ref, key); + break; + case 7: case 8: case 9: + (void)kvmap_kv_del(api, ref, key); + break; + case 10: case 11: + if (api->inpw) + kvmap_kv_inpw(api, ref, key, stress_inp_plus1, NULL); + break; + case 12: case 13: case 14: case 15: + if (!kvmap_kv_put(api, ref, key)) + wfail1++; + break; + default: + break; + } + } + nops += BATCHSIZE; + } while (time_nsec() < si->endtime); + si->wfail += wfail1; + if (api->iter_park) + api->iter_destroy(iter); + kvmap_unref(api, ref); + free(tmp); + si->tot += nops; +} + + static void +stress_co_worker(void) +{ + struct stress_info * const si = (typeof(si))co_priv(); + debug_assert(si); + stress_func(si); +} + + static void * +stress_thread_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + if (si->cpt) { + u64 hostrsp = 0; + struct corr * crs[32]; + do { // to work smoothly with ALLOCFAIL + crs[0] = corr_create(16*PGSZ, stress_co_worker, si, &hostrsp); + } while (crs[0] == NULL); + for (u32 j = 1; j < si->cpt; j++) { + do { // to work smoothly with ALLOCFAIL + crs[j] = corr_link(16*PGSZ, stress_co_worker, si, crs[j-1]); + } while (crs[j] == NULL); + } + + corr_enter(crs[0]); + for (u32 j = 0; j < si->cpt; j++) + corr_destroy(crs[j]); + } else { + stress_func(si); + } + return NULL; +} + + int +main(int argc, char ** argv) +{ + struct stress_info si = {.nkeys = 10000, .nloader = 1, .nunldr = 1, .nth = 1, .cpt = 0}; + argc--; + argv++; + int n = -1; + if ((n = kvmap_api_helper(argc, argv, NULL, &si.api, &si.map)) < 0) { + fprintf(stderr, "usage: api ... [<#keys>=10000 [<#load-threads>=1 [<#unload-threads>=1 [<#threads>=1 [<#co-per-thread>=0 (disabled) [=1 [=1]]]]]]]\n"); + kvmap_api_helper_message(); + exit(0); + } + argc -= n; + argv += n; + + const bool has_point = si.api->get && si.api->probe && si.api->del && si.api->put; + if (!has_point) { + fprintf(stderr, "api not supported\n"); + exit(0); + } + if (!si.api->inpw) + fprintf(stderr, "api->inpw function not found: ignored\n"); + if (!si.api->merge) + fprintf(stderr, "api->merge function not found: ignored\n"); + if (!si.api->delr) + fprintf(stderr, "api->delr function not found: ignored\n"); + + si.has_iter = si.api->iter_create && si.api->iter_seek && si.api->iter_peek && + si.api->iter_skip && si.api->iter_next && si.api->iter_destroy; + if (!si.has_iter) + fprintf(stderr, "iter functions not complete: ignored\n"); + + // generate keys + if (argc >= 1) + si.nkeys = a2u64(argv[0]); + si.keys = malloc(sizeof(struct kv *) * si.nkeys); + debug_assert(si.keys); + if (argc >= 2) + si.nloader = a2u32(argv[1]); + if (argc >= 3) + si.nunldr = a2u32(argv[2]); + if (argc >= 4) + si.nth = a2u32(argv[3]); + if (argc >= 5) + si.cpt = a2u32(argv[4]); + if (si.cpt > 32) + si.cpt = 32; +#if !defined(CORR) + if (si.cpt > 1) + fprintf(stderr, TERMCLR(35) "CORR not enabled. Compile with -DCORR to enable it.\n" TERMCLR(0)); +#endif // CORR + const u64 nr = (argc >= 6) ? a2u64(argv[5]) : 1; // default 1 + const u64 ne = (argc >= 7) ? a2u64(argv[6]) : 1; // default 1 + printf("stresstest: nkeys %lu ldr %u uldr %u th %u cpt %u r %lu e %lu\n", + si.nkeys, si.nloader, si.nunldr, si.nth, si.cpt, nr, ne); + + for (u64 e = 0; e < ne; e++) { + si.seqno = 0; + const u64 dtl = thread_fork_join(si.nloader, (void *)stress_load_worker, false, &si); + printf("load th %u mops %.2lf\n", si.nloader, ((double)si.nkeys) * 1e3 / ((double)dtl)); + if (si.api->fprint) + si.api->fprint(si.map, stdout); + + debug_perf_switch(); + for (u64 r = 0; r < nr; r++) { + si.tot = 0; + si.wfail = 0; + si.endtime = time_nsec() + 2000000000lu; + const u64 dt = thread_fork_join(si.nth, (void *)stress_thread_worker, false, &si); + const double mops = ((double)si.tot) * 1e3 / ((double)dt); + char ts[64]; + time_stamp(ts, 64); + const long rss = process_get_rss(); + printf("%s e %lu r %lu th %u cpt %u tot %lu mops %.2lf rss %ldkB wfail %lu\n", + ts, e, r, si.nth, si.cpt, si.tot, mops, rss, si.wfail); + debug_perf_switch(); + } + si.seqno = 0; + if (si.nunldr == 0) { // use clean + const u64 t0 = time_nsec(); + si.api->clean(si.map); + const u64 dtu = time_diff_nsec(t0); + for (u64 i = 0; i < si.nkeys; i++) + free(si.keys[i]); + printf("clean mops %.2lf\n", ((double)si.nkeys) *1e3 / ((double)dtu)); + } else { + const u64 dtu = thread_fork_join(si.nunldr, (void *)stress_unload_worker, false, &si); + printf("unload th %u mops %.2lf\n", si.nunldr, ((double)si.nkeys) *1e3 / ((double)dtu)); + } + } + + free(si.keys); + si.api->destroy(si.map); + return 0; +} diff --git a/MassTrie-beta/wormhole/stresstest.out b/MassTrie-beta/wormhole/stresstest.out new file mode 100644 index 00000000..874d359c Binary files /dev/null and b/MassTrie-beta/wormhole/stresstest.out differ diff --git a/MassTrie-beta/wormhole/wh.c b/MassTrie-beta/wormhole/wh.c new file mode 100644 index 00000000..1d31e231 --- /dev/null +++ b/MassTrie-beta/wormhole/wh.c @@ -0,0 +1,3876 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include // static_assert +#include "lib.h" +#include "ctypes.h" +#include "kv.h" +#include "wh.h" +// }}} headers + +// def {{{ +#define WH_HMAPINIT_SIZE ((1u << 12)) // 10: 16KB/64KB 12: 64KB/256KB 14: 256KB/1MB +#define WH_SLABMETA_SIZE ((1lu << 21)) // 2MB + +#ifndef HEAPCHECKING +#define WH_SLABLEAF_SIZE ((1lu << 21)) // 2MB is ok +#else +#define WH_SLABLEAF_SIZE ((1lu << 21)) // 2MB for valgrind +#endif + +#define WH_KPN ((128u)) // keys per node; power of 2 +#define WH_HDIV (((1u << 16)) / WH_KPN) +#define WH_MID ((WH_KPN >> 1)) // ideal cut point for split, the closer the better +#define WH_BKT_NR ((8)) +#define WH_KPN2 ((WH_KPN + WH_KPN)) + +#define WH_KPN_MRG (((WH_KPN + WH_MID) >> 1 )) // 3/4 + +// FO is fixed at 256. Don't change it +#define WH_FO ((256u)) // index fan-out +// number of bits in a bitmap +#define WH_BMNR ((WH_FO >> 6)) // number of u64 +// }}} def + +// struct {{{ +struct wormmeta { + struct entry13 k13; // kref+klen + struct entry13 l13; // lmost+bitmin+bitmax + struct entry13 r13; // rmost+hash32_lo + struct entry13 p13; // lpath+hash32_hi + u64 bitmap[0]; // 4 if bitmin != bitmax +}; +static_assert(sizeof(struct wormmeta) == 32, "sizeof(wormmeta) != 32"); + +struct wormkv64 { u64 key; void * ptr; }; // u64 keys (whu64) + +struct wormleaf { + // first line + rwlock leaflock; + spinlock sortlock; // to protect the seemingly "read-only" iter_seek + au64 lv; // version (dont use the first u64) + struct wormleaf * prev; // prev leaf + struct wormleaf * next; // next leaf + struct kv * anchor; + + u32 nr_sorted; + u32 nr_keys; + u64 reserved[2]; + + struct entry13 hs[WH_KPN]; // sorted by hashes + u8 ss[WH_KPN]; // sorted by keys +}; + +struct wormslot { u16 t[WH_BKT_NR]; }; +static_assert(sizeof(struct wormslot) == 16, "sizeof(wormslot) != 16"); + +struct wormmbkt { struct wormmeta * e[WH_BKT_NR]; }; +static_assert(sizeof(struct wormmbkt) == 64, "sizeof(wormmbkt) != 64"); + +struct wormhmap { + au64 hv; + struct wormslot * wmap; + struct wormmbkt * pmap; + u32 mask; + u32 maxplen; + u64 msize; + + struct slab * slab1; + struct slab * slab2; + struct kv * pbuf; +}; +static_assert(sizeof(struct wormhmap) == 64, "sizeof(wormhmap) != 64"); + +struct wormhole { + // 1 line + union { + au64 hmap_ptr; // safe + struct wormhmap * hmap; // unsafe + }; + u64 padding0[6]; + struct wormleaf * leaf0; // usually not used + // 1 line + struct kvmap_mm mm; + struct qsbr * qsbr; + struct slab * slab_leaf; + struct kv * pbuf; + u32 leaftype; + u32 padding1; + // 2 lines + struct wormhmap hmap2[2]; + // fifth line + rwlock metalock; + u32 padding2[15]; +}; + +struct wormhole_iter { + struct wormref * ref; // safe-iter only + struct wormhole * map; + struct wormleaf * leaf; + u32 is; +}; + +struct wormref { + struct wormhole * map; + struct qsbr_ref qref; +}; +// }}} struct + +// helpers {{{ + +// meta {{{ + static inline struct kv * +wormmeta_keyref_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->k13.e3); +} + + static inline u16 +wormmeta_klen_load(const struct wormmeta * const meta) +{ + return meta->k13.e1; +} + + static inline struct wormleaf * +wormmeta_lmost_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->l13.e3 & (~0x3flu)); +} + + static inline u32 +wormmeta_bitmin_load(const struct wormmeta * const meta) +{ + return (u32)(meta->l13.v64 & 0x1fflu); +} + + static inline u32 +wormmeta_bitmax_load(const struct wormmeta * const meta) +{ + return (u32)((meta->l13.v64 >> 9) & 0x1fflu); +} + + static inline u32 +wormmeta_hash32_load(const struct wormmeta * const meta) +{ + return ((u32)meta->r13.e1) | (((u32)meta->p13.e1) << 16); +} + + static inline struct wormleaf * +wormmeta_rmost_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->r13.e3); +} + + static inline struct wormleaf * +wormmeta_lpath_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->p13.e3); +} + +// internal + static inline void +wormmeta_lpath_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + entry13_update_e3(&meta->p13, ptr_to_u64(leaf)); +} + +// also updates leaf_klen_eq and + static inline void +wormmeta_lmost_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + const u64 minmax = meta->l13.v64 & 0x3fffflu; + meta->l13.v64 = (((u64)leaf) << 16) | minmax; + + const bool leaf_klen_eq = leaf->anchor->klen == wormmeta_klen_load(meta); + wormmeta_lpath_store(meta, leaf_klen_eq ? leaf : leaf->prev); +} + + static inline void +wormmeta_bitmin_store(struct wormmeta * const meta, const u32 bitmin) +{ + meta->l13.v64 = (meta->l13.v64 & (~0x1fflu)) | bitmin; +} + + static inline void +wormmeta_bitmax_store(struct wormmeta * const meta, const u32 bitmax) +{ + meta->l13.v64 = (meta->l13.v64 & (~0x3fe00lu)) | (bitmax << 9); +} + + static inline void +wormmeta_rmost_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + entry13_update_e3(&meta->r13, ptr_to_u64(leaf)); +} + +// for wormmeta_alloc + static void +wormmeta_init(struct wormmeta * const meta, struct wormleaf * const lrmost, + struct kv * const keyref, const u32 alen, const u32 bit) +{ + keyref->refcnt++; // shared + + const u32 plen = keyref->klen; + debug_assert(plen <= UINT16_MAX); + meta->k13 = entry13((u16)plen, ptr_to_u64(keyref)); + meta->l13.v64 = (ptr_to_u64(lrmost) << 16) | (bit << 9) | bit; + + const u32 hash32 = keyref->hashlo; + meta->r13 = entry13((u16)hash32, ptr_to_u64(lrmost)); + + const bool leaf_klen_eq = alen == plen; + meta->p13 = entry13((u16)(hash32 >> 16), ptr_to_u64(leaf_klen_eq ? lrmost : lrmost->prev)); +} +// }}} meta + +// meta-bitmap {{{ + static inline bool +wormmeta_bm_test(const struct wormmeta * const meta, const u32 id) +{ + debug_assert(id < WH_FO); + const u32 bitmin = wormmeta_bitmin_load(meta); + const u32 bitmax = wormmeta_bitmax_load(meta); + if (bitmin == bitmax) { // half node + return bitmin == id; + } else { // full node + return (bool)((meta->bitmap[id >> 6u] >> (id & 0x3fu)) & 1lu); + } +} + +// meta must be a full node + static void +wormmeta_bm_set(struct wormmeta * const meta, const u32 id) +{ + // need to replace meta + u64 * const ptr = &(meta->bitmap[id >> 6u]); + const u64 bit = 1lu << (id & 0x3fu); + if ((*ptr) & bit) + return; + + (*ptr) |= bit; + + // min + if (id < wormmeta_bitmin_load(meta)) + wormmeta_bitmin_store(meta, id); + + // max + const u32 oldmax = wormmeta_bitmax_load(meta); + if (oldmax == WH_FO || id > oldmax) + wormmeta_bitmax_store(meta, id); +} + +// find the lowest bit > id0 +// return WH_FO if not found + static inline u32 +wormmeta_bm_gt(const struct wormmeta * const meta, const u32 id0) +{ + u32 ix = id0 >> 6; + u64 bits = meta->bitmap[ix] & ~((1lu << (id0 & 0x3fu)) - 1lu); + if (bits) + return (ix << 6) + (u32)__builtin_ctzl(bits); + + while (++ix < WH_BMNR) { + bits = meta->bitmap[ix]; + if (bits) + return (ix << 6) + (u32)__builtin_ctzl(bits); + } + + return WH_FO; +} + +// find the highest bit that is lower than the id0 +// return WH_FO if not found + static inline u32 +wormmeta_bm_lt(const struct wormmeta * const meta, const u32 id0) +{ + u32 ix = id0 >> 6; + u64 bits = meta->bitmap[ix] & ((1lu << (id0 & 0x3fu)) - 1lu); + if (bits) + return (ix << 6) + 63u - (u32)__builtin_clzl(bits); + + while (ix--) { + bits = meta->bitmap[ix]; + if (bits) + return (ix << 6) + 63u - (u32)__builtin_clzl(bits); + } + + return WH_FO; +} + +// meta must be a full node + static inline void +wormmeta_bm_clear(struct wormmeta * const meta, const u32 id) +{ + debug_assert(wormmeta_bitmin_load(meta) < wormmeta_bitmax_load(meta)); + meta->bitmap[id >> 6u] &= (~(1lu << (id & 0x3fu))); + + // min + if (id == wormmeta_bitmin_load(meta)) + wormmeta_bitmin_store(meta, wormmeta_bm_gt(meta, id)); + + // max + if (id == wormmeta_bitmax_load(meta)) + wormmeta_bitmax_store(meta, wormmeta_bm_lt(meta, id)); +} +// }}} meta-bitmap + +// key/prefix {{{ + static inline u16 +wormhole_pkey(const u32 hash32) +{ + const u16 pkey0 = ((u16)hash32) ^ ((u16)(hash32 >> 16)); + return pkey0 ? pkey0 : 1; +} + + static inline u32 +wormhole_bswap(const u32 hashlo) +{ + return __builtin_bswap32(hashlo); +} + + static inline bool +wormhole_key_meta_match(const struct kv * const key, const struct wormmeta * const meta) +{ + return (key->klen == wormmeta_klen_load(meta)) + && (!memcmp(key->kv, wormmeta_keyref_load(meta)->kv, key->klen)); +} + +// called by get_kref_slot + static inline bool +wormhole_kref_meta_match(const struct kref * const kref, + const struct wormmeta * const meta) +{ + return (kref->len == wormmeta_klen_load(meta)) + && (!memcmp(kref->ptr, wormmeta_keyref_load(meta)->kv, kref->len)); +} + +// called from meta_down ... get_kref1_slot +// will access rmost, prefetching is effective here + static inline bool +wormhole_kref1_meta_match(const struct kref * const kref, + const struct wormmeta * const meta, const u8 cid) +{ + const u8 * const keybuf = wormmeta_keyref_load(meta)->kv; + const u32 plen = kref->len; + return ((plen + 1) == wormmeta_klen_load(meta)) + && (!memcmp(kref->ptr, keybuf, plen)) + && (keybuf[plen] == cid); +} + +// warning: be careful with buffer overflow + static inline void +wormhole_prefix(struct kv * const pfx, const u32 klen) +{ + pfx->klen = klen; + kv_update_hash(pfx); +} + +// for split + static inline void +wormhole_prefix_inc1(struct kv * const pfx) +{ + pfx->hashlo = crc32c_u8(pfx->hashlo, pfx->kv[pfx->klen]); + pfx->klen++; +} + +// meta_lcp only + static inline void +wormhole_kref_inc(struct kref * const kref, const u32 len0, + const u32 crc, const u32 inc) +{ + kref->hash32 = crc32c_inc(kref->ptr + len0, inc, crc); + kref->len = len0 + inc; +} + +// meta_lcp only + static inline void +wormhole_kref_inc_123(struct kref * const kref, const u32 len0, + const u32 crc, const u32 inc) +{ + kref->hash32 = crc32c_inc_123(kref->ptr + len0, inc, crc); + kref->len = len0 + inc; +} +// }}} key/prefix + +// alloc {{{ + static inline struct kv * +wormhole_alloc_akey(const size_t klen) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + return malloc(sizeof(struct kv) + klen); +} + + static inline void +wormhole_free_akey(struct kv * const akey) +{ + free(akey); +} + + static inline struct kv * +wormhole_alloc_mkey(const size_t klen) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + return malloc(sizeof(struct kv) + klen); +} + + static inline void +wormhole_free_mkey(struct kv * const mkey) +{ + free(mkey); +} + + static struct wormleaf * +wormleaf_alloc(struct wormhole * const map, struct wormleaf * const prev, + struct wormleaf * const next, struct kv * const anchor) +{ + struct wormleaf * const leaf = slab_alloc_safe(map->slab_leaf); + if (leaf == NULL) + return NULL; + + rwlock_init(&(leaf->leaflock)); + spinlock_init(&(leaf->sortlock)); + + // keep the old version; new version will be assigned by split functions + //leaf->lv = 0; + + leaf->prev = prev; + leaf->next = next; + leaf->anchor = anchor; + + leaf->nr_keys = 0; + leaf->nr_sorted = 0; + + // hs requires zero init. + memset(leaf->hs, 0, sizeof(leaf->hs[0]) * WH_KPN); + return leaf; +} + + static void +wormleaf_free(struct slab * const slab, struct wormleaf * const leaf) +{ + debug_assert(leaf->leaflock.opaque == 0); + wormhole_free_akey(leaf->anchor); + slab_free_safe(slab, leaf); +} + + static struct wormmeta * +wormmeta_alloc(struct wormhmap * const hmap, struct wormleaf * const lrmost, + struct kv * const keyref, const u32 alen, const u32 bit) +{ + debug_assert(alen <= UINT16_MAX); + debug_assert(lrmost && keyref); + + struct wormmeta * const meta = slab_alloc_unsafe(hmap->slab1); + if (meta == NULL) + return NULL; + + wormmeta_init(meta, lrmost, keyref, alen, bit); + return meta; +} + + static inline bool +wormhole_slab_reserve(struct wormhole * const map, const u32 nr) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return false; +#endif + for (u32 i = 0; i < 2; i++) { + if (!(map->hmap2[i].slab1 && map->hmap2[i].slab2)) + continue; + if (!slab_reserve_unsafe(map->hmap2[i].slab1, nr)) + return false; + if (!slab_reserve_unsafe(map->hmap2[i].slab2, nr)) + return false; + } + return true; +} + + static void +wormmeta_keyref_release(struct wormmeta * const meta) +{ + struct kv * const keyref = wormmeta_keyref_load(meta); + debug_assert(keyref->refcnt); + keyref->refcnt--; + if (keyref->refcnt == 0) + wormhole_free_mkey(keyref); +} + + static void +wormmeta_free(struct wormhmap * const hmap, struct wormmeta * const meta) +{ + wormmeta_keyref_release(meta); + slab_free_unsafe(hmap->slab1, meta); +} +// }}} alloc + +// lock {{{ + static void +wormleaf_lock_write(struct wormleaf * const leaf, struct wormref * const ref) +{ + if (!rwlock_trylock_write(&(leaf->leaflock))) { + wormhole_park(ref); + rwlock_lock_write(&(leaf->leaflock)); + wormhole_resume(ref); + } +} + + static void +wormleaf_lock_read(struct wormleaf * const leaf, struct wormref * const ref) +{ + if (!rwlock_trylock_read(&(leaf->leaflock))) { + wormhole_park(ref); + rwlock_lock_read(&(leaf->leaflock)); + wormhole_resume(ref); + } +} + + static void +wormleaf_unlock_write(struct wormleaf * const leaf) +{ + rwlock_unlock_write(&(leaf->leaflock)); +} + + static void +wormleaf_unlock_read(struct wormleaf * const leaf) +{ + rwlock_unlock_read(&(leaf->leaflock)); +} + + static void +wormhmap_lock(struct wormhole * const map, struct wormref * const ref) +{ + if (!rwlock_trylock_write(&(map->metalock))) { + wormhole_park(ref); + rwlock_lock_write(&(map->metalock)); + wormhole_resume(ref); + } +} + + static inline void +wormhmap_unlock(struct wormhole * const map) +{ + rwlock_unlock_write(&(map->metalock)); +} +// }}} lock + +// hmap-version {{{ + static inline struct wormhmap * +wormhmap_switch(struct wormhole * const map, struct wormhmap * const hmap) +{ + return (hmap == map->hmap2) ? (hmap + 1) : (hmap - 1); +} + + static inline struct wormhmap * +wormhmap_load(struct wormhole * const map) +{ + return (struct wormhmap *)atomic_load_explicit(&(map->hmap_ptr), MO_ACQUIRE); +} + + static inline void +wormhmap_store(struct wormhole * const map, struct wormhmap * const hmap) +{ + atomic_store_explicit(&(map->hmap_ptr), (u64)hmap, MO_RELEASE); +} + + static inline u64 +wormhmap_version_load(const struct wormhmap * const hmap) +{ + // no concurrent access + return atomic_load_explicit(&(hmap->hv), MO_ACQUIRE); +} + + static inline void +wormhmap_version_store(struct wormhmap * const hmap, const u64 v) +{ + atomic_store_explicit(&(hmap->hv), v, MO_RELEASE); +} + + static inline u64 +wormleaf_version_load(struct wormleaf * const leaf) +{ + return atomic_load_explicit(&(leaf->lv), MO_CONSUME); +} + + static inline void +wormleaf_version_store(struct wormleaf * const leaf, const u64 v) +{ + atomic_store_explicit(&(leaf->lv), v, MO_RELEASE); +} +// }}} hmap-version + +// co {{{ + static inline void +wormhmap_prefetch_pmap(const struct wormhmap * const hmap, const u32 idx) +{ +#if defined(CORR) + (void)hmap; + (void)idx; +#else + cpu_prefetch0(&(hmap->pmap[idx])); +#endif +} + + static inline struct wormmeta * +wormhmap_get_meta(const struct wormhmap * const hmap, const u32 mid, const u32 i) +{ + struct wormmeta * const meta = hmap->pmap[mid].e[i]; +#if defined(CORR) + cpu_prefetch0(meta); + corr_yield(); +#endif + return meta; +} + + static inline void +wormleaf_prefetch(struct wormleaf * const leaf, const u32 hashlo) +{ + const u32 i = wormhole_pkey(hashlo) / WH_HDIV; +#if defined(CORR) + cpu_prefetch0(leaf); + cpu_prefetch0(&(leaf->hs[i-4])); + cpu_prefetch0(&(leaf->hs[i+4])); + corr_yield(); +#else + cpu_prefetch0(&(leaf->hs[i])); +#endif +} + + static inline bool +wormhole_kref_kv_match(const struct kref * const key, const struct kv * const curr) +{ +#if defined(CORR) + const u8 * const ptr = (typeof(ptr))curr; + cpu_prefetch0(ptr); + cpu_prefetch0(ptr + 64); + if (key->len > 56) { + cpu_prefetch0(ptr + 128); + cpu_prefetch0(ptr + 192); + } + corr_yield(); +#endif + return kref_kv_match(key, curr); +} + + static inline void +wormhole_qsbr_update_pause(struct wormref * const ref, const u64 v) +{ + qsbr_update(&ref->qref, v); +#if defined(CORR) + corr_yield(); +#endif +} +// }}} co + +// }}} helpers + +// hmap {{{ +// hmap is the MetaTrieHT of Wormhole + static bool +wormhmap_init(struct wormhmap * const hmap, struct kv * const pbuf) +{ + const u64 wsize = sizeof(hmap->wmap[0]) * WH_HMAPINIT_SIZE; + const u64 psize = sizeof(hmap->pmap[0]) * WH_HMAPINIT_SIZE; + u64 msize = wsize + psize; + u8 * const mem = pages_alloc_best(msize, true, &msize); + if (mem == NULL) + return false; + + hmap->pmap = (typeof(hmap->pmap))mem; + hmap->wmap = (typeof(hmap->wmap))(mem + psize); + hmap->msize = msize; + hmap->mask = WH_HMAPINIT_SIZE - 1; + wormhmap_version_store(hmap, 0); + hmap->maxplen = 0; + hmap->pbuf = pbuf; + return true; +} + + static inline void +wormhmap_deinit(struct wormhmap * const hmap) +{ + if (hmap->pmap) { + pages_unmap(hmap->pmap, hmap->msize); + hmap->pmap = NULL; + hmap->wmap = NULL; + } +} + + static inline m128 +wormhmap_zero(void) +{ +#if defined(__x86_64__) + return _mm_setzero_si128(); +#elif defined(__aarch64__) + return vdupq_n_u8(0); +#endif +} + + static inline m128 +wormhmap_m128_pkey(const u16 pkey) +{ +#if defined(__x86_64__) + return _mm_set1_epi16((short)pkey); +#elif defined(__aarch64__) + return vreinterpretq_u8_u16(vdupq_n_u16(pkey)); +#endif +} + + static inline u32 +wormhmap_match_mask(const struct wormslot * const s, const m128 skey) +{ +#if defined(__x86_64__) + const m128 sv = _mm_load_si128((const void *)s); + return (u32)_mm_movemask_epi8(_mm_cmpeq_epi16(skey, sv)); +#elif defined(__aarch64__) + const uint16x8_t sv = vld1q_u16((const u16 *)s); // load 16 bytes at s + const uint16x8_t cmp = vceqq_u16(vreinterpretq_u16_u8(skey), sv); // cmpeq => 0xffff or 0x0000 + static const uint16x8_t mbits = {0x3, 0xc, 0x30, 0xc0, 0x300, 0xc00, 0x3000, 0xc000}; + return (u32)vaddvq_u16(vandq_u16(cmp, mbits)); +#endif +} + + static inline bool +wormhmap_match_any(const struct wormslot * const s, const m128 skey) +{ +#if defined(__x86_64__) + return wormhmap_match_mask(s, skey) != 0; +#elif defined(__aarch64__) + const uint16x8_t sv = vld1q_u16((const u16 *)s); // load 16 bytes at s + const uint16x8_t cmp = vceqq_u16(vreinterpretq_u16_u8(skey), sv); // cmpeq => 0xffff or 0x0000 + return vaddvq_u32(vreinterpretq_u32_u16(cmp)) != 0; +#endif +} + +// meta_lcp only + static inline bool +wormhmap_peek(const struct wormhmap * const hmap, const u32 hash32) +{ + const m128 sk = wormhmap_m128_pkey(wormhole_pkey(hash32)); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + return wormhmap_match_any(&(hmap->wmap[midx]), sk) + || wormhmap_match_any(&(hmap->wmap[midy]), sk); +} + + static inline struct wormmeta * +wormhmap_get_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kv * const key) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + if (likely(wormhole_key_meta_match(key, meta))) + return meta; + mask ^= (3u << i2); + } + return NULL; +} + + static struct wormmeta * +wormhmap_get(const struct wormhmap * const hmap, const struct kv * const key) +{ + const u32 hash32 = key->hashlo; + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_slot(hmap, midx, skey, key); + if (r) + return r; + return wormhmap_get_slot(hmap, midy, skey, key); +} + +// for meta_lcp only + static inline struct wormmeta * +wormhmap_get_kref_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kref * const kref) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + if (likely(wormhole_kref_meta_match(kref, meta))) + return meta; + + mask ^= (3u << i2); + } + return NULL; +} + +// for meta_lcp only + static inline struct wormmeta * +wormhmap_get_kref(const struct wormhmap * const hmap, const struct kref * const kref) +{ + const u32 hash32 = kref->hash32; + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_kref_slot(hmap, midx, skey, kref); + if (r) + return r; + return wormhmap_get_kref_slot(hmap, midy, skey, kref); +} + +// for meta_down only + static inline struct wormmeta * +wormhmap_get_kref1_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kref * const kref, const u8 cid) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + //cpu_prefetch0(wormmeta_rmost_load(meta)); // will access + if (likely(wormhole_kref1_meta_match(kref, meta, cid))) + return meta; + + mask ^= (3u << i2); + } + return NULL; +} + +// for meta_down only + static inline struct wormmeta * +wormhmap_get_kref1(const struct wormhmap * const hmap, + const struct kref * const kref, const u8 cid) +{ + const u32 hash32 = crc32c_u8(kref->hash32, cid); + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_kref1_slot(hmap, midx, skey, kref, cid); + if (r) + return r; + return wormhmap_get_kref1_slot(hmap, midy, skey, kref, cid); +} + + static inline u32 +wormhmap_slot_count(const struct wormslot * const slot) +{ + const u32 mask = wormhmap_match_mask(slot, wormhmap_zero()); + return mask ? ((u32)__builtin_ctz(mask) >> 1) : 8; +} + + static inline void +wormhmap_squeeze(const struct wormhmap * const hmap) +{ + struct wormslot * const wmap = hmap->wmap; + struct wormmbkt * const pmap = hmap->pmap; + const u32 mask = hmap->mask; + const u64 nrs64 = ((u64)(hmap->mask)) + 1; // must use u64; u32 can overflow + for (u64 si64 = 0; si64 < nrs64; si64++) { // # of buckets + const u32 si = (u32)si64; + u32 ci = wormhmap_slot_count(&(wmap[si])); + for (u32 ei = ci - 1; ei < WH_BKT_NR; ei--) { + struct wormmeta * const meta = pmap[si].e[ei]; + const u32 sj = wormmeta_hash32_load(meta) & mask; // first hash + if (sj == si) + continue; + + // move + const u32 ej = wormhmap_slot_count(&(wmap[sj])); + if (ej < WH_BKT_NR) { // has space at home location + wmap[sj].t[ej] = wmap[si].t[ei]; + pmap[sj].e[ej] = pmap[si].e[ei]; + const u32 ni = ci - 1; + if (ei < ni) { + wmap[si].t[ei] = wmap[si].t[ni]; + pmap[si].e[ei] = pmap[si].e[ni]; + } + wmap[si].t[ni] = 0; + pmap[si].e[ni] = NULL; + ci--; + } + } + } +} + + static void +wormhmap_expand(struct wormhmap * const hmap) +{ + // sync expand + const u32 mask0 = hmap->mask; + if (mask0 == UINT32_MAX) + debug_die(); + const u32 nr0 = mask0 + 1; + const u32 mask1 = mask0 + nr0; + const u64 nr1 = ((u64)nr0) << 1; // must use u64; u32 can overflow + const u64 wsize = nr1 * sizeof(hmap->wmap[0]); + const u64 psize = nr1 * sizeof(hmap->pmap[0]); + u64 msize = wsize + psize; + u8 * mem = pages_alloc_best(msize, true, &msize); + if (mem == NULL) { + // We are at a very deep call stack from wormhole_put(). + // Gracefully handling the failure requires lots of changes. + // Currently we simply wait for available memory + // TODO: gracefully return with insertion failure + char ts[64]; + time_stamp(ts, 64); + fprintf(stderr, "%s %s sleep-wait for memory allocation %lukB\n", + __func__, ts, msize >> 10); + do { + sleep(1); + mem = pages_alloc_best(msize, true, &msize); + } while (mem == NULL); + time_stamp(ts, 64); + fprintf(stderr, "%s %s memory allocation done\n", __func__, ts); + } + + struct wormhmap hmap1 = *hmap; + hmap1.pmap = (typeof(hmap1.pmap))mem; + hmap1.wmap = (typeof(hmap1.wmap))(mem + psize); + hmap1.msize = msize; + hmap1.mask = mask1; + + const struct wormslot * const wmap0 = hmap->wmap; + const struct wormmbkt * const pmap0 = hmap->pmap; + + for (u32 s = 0; s < nr0; s++) { + const struct wormmbkt * const bkt = &pmap0[s]; + for (u32 i = 0; (i < WH_BKT_NR) && bkt->e[i]; i++) { + const struct wormmeta * const meta = bkt->e[i]; + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 idx0 = hash32 & mask0; + const u32 idx1 = ((idx0 == s) ? hash32 : wormhole_bswap(hash32)) & mask1; + + const u32 n = wormhmap_slot_count(&(hmap1.wmap[idx1])); + debug_assert(n < 8); + hmap1.wmap[idx1].t[n] = wmap0[s].t[i]; + hmap1.pmap[idx1].e[n] = bkt->e[i]; + } + } + pages_unmap(hmap->pmap, hmap->msize); + hmap->pmap = hmap1.pmap; + hmap->wmap = hmap1.wmap; + hmap->msize = hmap1.msize; + hmap->mask = hmap1.mask; + wormhmap_squeeze(hmap); +} + + static bool +wormhmap_cuckoo(struct wormhmap * const hmap, const u32 mid0, + struct wormmeta * const e0, const u16 s0, const u32 depth) +{ + const u32 ii = wormhmap_slot_count(&(hmap->wmap[mid0])); + if (ii < WH_BKT_NR) { + hmap->wmap[mid0].t[ii] = s0; + hmap->pmap[mid0].e[ii] = e0; + return true; + } else if (depth == 0) { + return false; + } + + // depth > 0 + struct wormmbkt * const bkt = &(hmap->pmap[mid0]); + u16 * const sv = &(hmap->wmap[mid0].t[0]); + for (u32 i = 0; i < WH_BKT_NR; i++) { + const struct wormmeta * const meta = bkt->e[i]; + debug_assert(meta); + const u32 hash32 = wormmeta_hash32_load(meta); + + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const u32 midt = (midx != mid0) ? midx : midy; + if (midt != mid0) { // possible + // no penalty if moving someone back to its 1st hash location + const u32 depth1 = (midt == midx) ? depth : (depth - 1); + if (wormhmap_cuckoo(hmap, midt, bkt->e[i], sv[i], depth1)) { + bkt->e[i] = e0; + sv[i] = s0; + return true; + } + } + } + return false; +} + + static void +wormhmap_set(struct wormhmap * const hmap, struct wormmeta * const meta) +{ + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const u16 pkey = wormhole_pkey(hash32); + // insert with cuckoo + if (likely(wormhmap_cuckoo(hmap, midx, meta, pkey, 1))) + return; + if (wormhmap_cuckoo(hmap, midy, meta, pkey, 1)) + return; + if (wormhmap_cuckoo(hmap, midx, meta, pkey, 2)) + return; + + // expand + wormhmap_expand(hmap); + + wormhmap_set(hmap, meta); +} + + static bool +wormhmap_del_slot(struct wormhmap * const hmap, const u32 mid, + const struct wormmeta * const meta, const m128 skey) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + const struct wormmeta * const meta1 = hmap->pmap[mid].e[i2>>1]; + if (likely(meta == meta1)) { + const u32 i = i2 >> 1; + const u32 j = wormhmap_slot_count(&(hmap->wmap[mid])) - 1; + hmap->wmap[mid].t[i] = hmap->wmap[mid].t[j]; + hmap->pmap[mid].e[i] = hmap->pmap[mid].e[j]; + hmap->wmap[mid].t[j] = 0; + hmap->pmap[mid].e[j] = NULL; + return true; + } + mask -= (3u << i2); + } + return false; +} + + static bool +wormhmap_del(struct wormhmap * const hmap, const struct wormmeta * const meta) +{ + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + return wormhmap_del_slot(hmap, midx, meta, skey) + || wormhmap_del_slot(hmap, midy, meta, skey); +} + + static bool +wormhmap_replace_slot(struct wormhmap * const hmap, const u32 mid, + const struct wormmeta * const old, const m128 skey, struct wormmeta * const new) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta ** const pslot = &hmap->pmap[mid].e[i2>>1]; + if (likely(old == *pslot)) { + *pslot = new; + return true; + } + mask -= (3u << i2); + } + return false; +} + + static bool +wormhmap_replace(struct wormhmap * const hmap, const struct wormmeta * const old, struct wormmeta * const new) +{ + const u32 hash32 = wormmeta_hash32_load(old); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + return wormhmap_replace_slot(hmap, midx, old, skey, new) + || wormhmap_replace_slot(hmap, midy, old, skey, new); +} +// }}} hmap + +// create {{{ +// it's unsafe + static bool +wormhole_create_leaf0(struct wormhole * const map) +{ + const bool sr = wormhole_slab_reserve(map, 1); + if (unlikely(!sr)) + return false; + + // create leaf of empty key + struct kv * const anchor = wormhole_alloc_akey(0); + if (anchor == NULL) + return false; + kv_dup2(kv_null(), anchor); + + struct wormleaf * const leaf0 = wormleaf_alloc(map, NULL, NULL, anchor); + if (leaf0 == NULL) { + wormhole_free_akey(anchor); + return false; + } + + struct kv * const mkey = wormhole_alloc_mkey(0); + if (mkey == NULL) { + wormleaf_free(map->slab_leaf, leaf0); + return false; + } + + wormhole_prefix(mkey, 0); + mkey->refcnt = 0; + // create meta of empty key + for (u32 i = 0; i < 2; i++) { + if (map->hmap2[i].slab1) { + struct wormmeta * const m0 = wormmeta_alloc(&map->hmap2[i], leaf0, mkey, 0, WH_FO); + debug_assert(m0); // already reserved enough + wormhmap_set(&(map->hmap2[i]), m0); + } + } + + map->leaf0 = leaf0; + return true; +} + + static struct wormhole * +wormhole_create_internal(const struct kvmap_mm * const mm, const u32 nh) +{ + struct wormhole * const map = yalloc(sizeof(*map)); + if (map == NULL) + return NULL; + memset(map, 0, sizeof(*map)); + // mm + map->mm = mm ? (*mm) : kvmap_mm_dup; + + // pbuf for meta-merge + map->pbuf = yalloc(1lu << 16); // 64kB + if (map->pbuf == NULL) + goto fail; + + // hmap + for (u32 i = 0; i < nh; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (!wormhmap_init(hmap, map->pbuf)) + goto fail; + + hmap->slab1 = slab_create(sizeof(struct wormmeta), WH_SLABMETA_SIZE); + if (hmap->slab1 == NULL) + goto fail; + + hmap->slab2 = slab_create(sizeof(struct wormmeta) + (sizeof(u64) * WH_BMNR), WH_SLABMETA_SIZE); + if (hmap->slab2 == NULL) + goto fail; + } + + // leaf slab + map->slab_leaf = slab_create(sizeof(struct wormleaf), WH_SLABLEAF_SIZE); + if (map->slab_leaf == NULL) + goto fail; + + // qsbr + map->qsbr = qsbr_create(); + if (map->qsbr == NULL) + goto fail; + + // leaf0 + if (!wormhole_create_leaf0(map)) + goto fail; + + rwlock_init(&(map->metalock)); + wormhmap_store(map, &map->hmap2[0]); + return map; + +fail: + if (map->qsbr) + qsbr_destroy(map->qsbr); + + if (map->slab_leaf) + slab_destroy(map->slab_leaf); + + for (u32 i = 0; i < nh; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (hmap->slab1) + slab_destroy(hmap->slab1); + if (hmap->slab2) + slab_destroy(hmap->slab2); + wormhmap_deinit(hmap); + } + + if (map->pbuf) + free(map->pbuf); + + free(map); + return NULL; +} + + struct wormhole * +wormhole_create(const struct kvmap_mm * const mm) +{ + return wormhole_create_internal(mm, 2); +} + + struct wormhole * +whunsafe_create(const struct kvmap_mm * const mm) +{ + return wormhole_create_internal(mm, 1); +} +// }}} create + +// jump {{{ + +// lcp {{{ +// search in the hash table for the Longest Prefix Match of the search key +// The corresponding wormmeta node is returned and the LPM is recorded in kref + static struct wormmeta * +wormhole_meta_lcp(const struct wormhmap * const hmap, struct kref * const kref, const u32 klen) +{ + // invariant: lo <= lcp < (lo + gd) + // ending condition: gd == 1 + u32 gd = (hmap->maxplen < klen ? hmap->maxplen : klen) + 1u; + u32 lo = 0; + u32 loh = KV_CRC32C_SEED; + +#define META_LCP_GAP_1 ((7u)) + while (META_LCP_GAP_1 < gd) { + const u32 inc = gd >> 3 << 2; // x4 + const u32 hash32 = crc32c_inc_x4(kref->ptr + lo, inc, loh); + if (wormhmap_peek(hmap, hash32)) { + loh = hash32; + lo += inc; + gd -= inc; + } else { + gd = inc; + } + } + + while (1 < gd) { + const u32 inc = gd >> 1; + const u32 hash32 = crc32c_inc_123(kref->ptr + lo, inc, loh); + if (wormhmap_peek(hmap, hash32)) { + loh = hash32; + lo += inc; + gd -= inc; + } else { + gd = inc; + } + } +#undef META_LCP_GAP_1 + + kref->hash32 = loh; + kref->len = lo; + struct wormmeta * ret = wormhmap_get_kref(hmap, kref); + if (likely(ret != NULL)) + return ret; + + gd = lo; + lo = 0; + loh = KV_CRC32C_SEED; + +#define META_LCP_GAP_2 ((5u)) + while (META_LCP_GAP_2 < gd) { + const u32 inc = (gd * 3) >> 2; + wormhole_kref_inc(kref, lo, loh, inc); + struct wormmeta * const tmp = wormhmap_get_kref(hmap, kref); + if (tmp) { + loh = kref->hash32; + lo += inc; + gd -= inc; + ret = tmp; + if (wormmeta_bm_test(tmp, kref->ptr[lo])) { + loh = crc32c_u8(loh, kref->ptr[lo]); + lo++; + gd--; + ret = NULL; + } else { + gd = 1; + break; + } + } else { + gd = inc; + } + } + + while (1 < gd) { + const u32 inc = (gd * 3) >> 2; + wormhole_kref_inc_123(kref, lo, loh, inc); + struct wormmeta * const tmp = wormhmap_get_kref(hmap, kref); + if (tmp) { + loh = kref->hash32; + lo += inc; + gd -= inc; + ret = tmp; + if (wormmeta_bm_test(tmp, kref->ptr[lo])) { + loh = crc32c_u8(loh, kref->ptr[lo]); + lo++; + gd--; + ret = NULL; + } else { + break; + } + } else { + gd = inc; + } + } +#undef META_LCP_GAP_2 + + if (kref->len != lo) { + kref->hash32 = loh; + kref->len = lo; + } + if (ret == NULL) + ret = wormhmap_get_kref(hmap, kref); + debug_assert(ret); + return ret; +} +// }}} lcp + +// down {{{ + static struct wormleaf * +wormhole_meta_down(const struct wormhmap * const hmap, const struct kref * const lcp, + const struct wormmeta * const meta, const u32 klen) +{ + if (likely(lcp->len < klen)) { // partial match + const u32 id0 = lcp->ptr[lcp->len]; + if (wormmeta_bitmin_load(meta) > id0) { // no left, don't care about right. + return wormmeta_lpath_load(meta); + } else if (wormmeta_bitmax_load(meta) < id0) { // has left sibling but no right sibling + return wormmeta_rmost_load(meta); + } else { // has both (expensive) + return wormmeta_rmost_load(wormhmap_get_kref1(hmap, lcp, (u8)wormmeta_bm_lt(meta, id0))); + } + } else { // lcp->len == klen + return wormmeta_lpath_load(meta); + } +} +// }}} down + +// jump-rw {{{ + static struct wormleaf * +wormhole_jump_leaf(const struct wormhmap * const hmap, const struct kref * const key) +{ + struct kref kref = {.ptr = key->ptr}; + debug_assert(kv_crc32c(key->ptr, key->len) == key->hash32); + + const struct wormmeta * const meta = wormhole_meta_lcp(hmap, &kref, key->len); + return wormhole_meta_down(hmap, &kref, meta, key->len); +} + + static struct wormleaf * +wormhole_jump_leaf_read(struct wormref * const ref, const struct kref * const key) +{ + struct wormhole * const map = ref->map; +#pragma nounroll + do { + const struct wormhmap * const hmap = wormhmap_load(map); + const u64 v = wormhmap_version_load(hmap); + qsbr_update(&ref->qref, v); + struct wormleaf * const leaf = wormhole_jump_leaf(hmap, key); + wormleaf_prefetch(leaf, key->hash32); +#pragma nounroll + do { + if (rwlock_trylock_read_nr(&(leaf->leaflock), 64)) { + if (wormleaf_version_load(leaf) <= v) + return leaf; + wormleaf_unlock_read(leaf); + break; + } + // v1 is loaded before lv; if lv <= v, can update v1 without redo jump + const u64 v1 = wormhmap_version_load(wormhmap_load(map)); + if (wormleaf_version_load(leaf) > v) + break; + wormhole_qsbr_update_pause(ref, v1); + } while (true); + } while (true); +} + + static struct wormleaf * +wormhole_jump_leaf_write(struct wormref * const ref, const struct kref * const key) +{ + struct wormhole * const map = ref->map; +#pragma nounroll + do { + const struct wormhmap * const hmap = wormhmap_load(map); + const u64 v = wormhmap_version_load(hmap); + qsbr_update(&ref->qref, v); + struct wormleaf * const leaf = wormhole_jump_leaf(hmap, key); + wormleaf_prefetch(leaf, key->hash32); +#pragma nounroll + do { + if (rwlock_trylock_write_nr(&(leaf->leaflock), 64)) { + if (wormleaf_version_load(leaf) <= v) + return leaf; + wormleaf_unlock_write(leaf); + break; + } + // v1 is loaded before lv; if lv <= v, can update v1 without redo jump + const u64 v1 = wormhmap_version_load(wormhmap_load(map)); + if (wormleaf_version_load(leaf) > v) + break; + wormhole_qsbr_update_pause(ref, v1); + } while (true); + } while (true); +} +// }}} jump-rw + +// }}} jump + +// leaf-read {{{ + static inline struct kv * +wormleaf_kv_at_ih(const struct wormleaf * const leaf, const u32 ih) +{ + return u64_to_ptr(leaf->hs[ih].e3); +} + + static inline struct kv * +wormleaf_kv_at_is(const struct wormleaf * const leaf, const u32 is) +{ + return u64_to_ptr(leaf->hs[leaf->ss[is]].e3); +} + + static inline void +wormleaf_prefetch_ss(const struct wormleaf * const leaf) +{ + for (u32 i = 0; i < WH_KPN; i+=64) + cpu_prefetch0(&leaf->ss[i]); +} + +// leaf must have been sorted +// return the key at [i] as if k1 has been inserted into leaf; i <= leaf->nr_sorted + static const struct kv * +wormleaf_kv_at_is1(const struct wormleaf * const leaf, const u32 i, const u32 is1, const struct kv * const k1) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + debug_assert(is1 <= leaf->nr_sorted); + if (i < is1) + return wormleaf_kv_at_is(leaf, i); + else if (i > is1) + return wormleaf_kv_at_is(leaf, i-1); + else // i == is1 + return k1; +} + + + +// fast point-lookup +// returns WH_KPN if not found + static u32 +wormleaf_match_hs(const struct wormleaf * const leaf, const struct kref * const key) +{ + const u16 pkey = wormhole_pkey(key->hash32); + const u32 i0 = pkey / WH_HDIV; + const struct entry13 * const hs = leaf->hs; + + if (hs[i0].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i0].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i0; + } + if (hs[i0].e1 == 0) + return WH_KPN; + + // search left + u32 i = i0 - 1; + while (i < WH_KPN) { + if (hs[i].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i; + } else if (hs[i].e1 < pkey) { + break; + } + i--; + } + + // search right + i = i0 + 1; + while (i < WH_KPN) { + if (hs[i].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i; + } else if ((hs[i].e1 > pkey) || (hs[i].e1 == 0)) { + break; + } + i++; + } + + + // not found + return WH_KPN; +} + +// search for an existing entry in hs + static u32 +wormleaf_search_ih(const struct wormleaf * const leaf, const struct entry13 e) +{ + const u16 pkey = e.e1; + const u32 i0 = pkey / WH_HDIV; + const struct entry13 * const hs = leaf->hs; + const struct entry13 e0 = hs[i0]; + + if (e0.v64 == e.v64) + return i0; + + if (e0.e1 == 0) + return WH_KPN; + + // search left + u32 i = i0 - 1; + while (i < WH_KPN) { + const struct entry13 ei = hs[i]; + if (ei.v64 == e.v64) { + return i; + } else if (ei.e1 < pkey) { + break; + } + i--; + } + + // search right + i = i0 + 1; + while (i < WH_KPN) { + const struct entry13 ei = hs[i]; + if (ei.v64 == e.v64) { + return i; + } else if ((ei.e1 > pkey) || (ei.e1 == 0)) { + break; + } + i++; + } + + // not found + return WH_KPN; +} + +// search for an existing entry in ss + static u32 +wormleaf_search_is(const struct wormleaf * const leaf, const u8 ih) +{ +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 i1 = _mm256_set1_epi8((char)ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const u32 mask = (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(sv, i1)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#else // SSE4.2 + const m128 i1 = _mm_set1_epi8((char)ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const u32 mask = (u32)_mm_movemask_epi8(_mm_cmpeq_epi8(sv, i1)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#endif // __AVX2__ +#elif defined(__aarch64__) + static const m128 vtbl = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; + static const uint16x8_t mbits = {0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080}; + const m128 i1 = vdupq_n_u8(ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 cmp = vceqq_u8(vld1q_u8(leaf->ss+i), i1); // cmpeq => 0xff or 0x00 + const m128 cmp1 = vqtbl1q_u8(cmp, vtbl); // reorder + const u32 mask = (u32)vaddvq_u16(vandq_u8(vreinterpretq_u16_u8(cmp1), mbits)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#endif // __x86_64__ + debug_die(); +} + +// assumes there in no duplicated keys +// search the first key that is >= the given key +// return 0 .. nr_sorted + static u32 +wormleaf_search_ss(const struct wormleaf * const leaf, const struct kref * const key) +{ + u32 lo = 0; + u32 hi = leaf->nr_sorted; + while ((lo + 2) < hi) { + const u32 i = (lo + hi) >> 1; + const struct kv * const curr = wormleaf_kv_at_is(leaf, i); + cpu_prefetch0(curr); + cpu_prefetch0(leaf->hs + leaf->ss[(lo + i) >> 1]); + cpu_prefetch0(leaf->hs + leaf->ss[(i + 1 + hi) >> 1]); + const int cmp = kref_kv_compare(key, curr); + debug_assert(cmp != 0); + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + + while (lo < hi) { + const u32 i = (lo + hi) >> 1; + const struct kv * const curr = wormleaf_kv_at_is(leaf, i); + const int cmp = kref_kv_compare(key, curr); + debug_assert(cmp != 0); + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + return lo; +} + + static u32 +wormleaf_seek(const struct wormleaf * const leaf, const struct kref * const key) +{ + debug_assert(leaf->nr_sorted == leaf->nr_keys); + wormleaf_prefetch_ss(leaf); // effective for both hit and miss + const u32 ih = wormleaf_match_hs(leaf, key); + if (ih < WH_KPN) { // hit + return wormleaf_search_is(leaf, (u8)ih); + } else { // miss, binary search for gt + return wormleaf_search_ss(leaf, key); + } +} + +// same to search_sorted but the target is very likely beyond the end + static u32 +wormleaf_seek_end(const struct wormleaf * const leaf, const struct kref * const key) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + if (leaf->nr_sorted) { + const int cmp = kref_kv_compare(key, wormleaf_kv_at_is(leaf, leaf->nr_sorted-1)); + if (cmp > 0) + return leaf->nr_sorted; + else if (cmp == 0) + return leaf->nr_sorted - 1; + else + return wormleaf_seek(leaf, key); + } else { + return 0; + } +} +// }}} leaf-read + +// leaf-write {{{ + static void +wormleaf_sort_m2(struct wormleaf * const leaf, const u32 n1, const u32 n2) +{ + if (n1 == 0 || n2 == 0) + return; // no need to sort + + u8 * const ss = leaf->ss; + u8 et[WH_KPN/2]; // min(n1,n2) < KPN/2 + if (n1 <= n2) { // merge left + memcpy(et, &(ss[0]), sizeof(ss[0]) * n1); + u8 * eo = ss; + u8 * e1 = et; // size == n1 + u8 * e2 = &(ss[n1]); // size == n2 + const u8 * const z1 = e1 + n1; + const u8 * const z2 = e2 + n2; + while ((e1 < z1) && (e2 < z2)) { + const int cmp = kv_compare(wormleaf_kv_at_ih(leaf, *e1), wormleaf_kv_at_ih(leaf, *e2)); + if (cmp < 0) + *(eo++) = *(e1++); + else if (cmp > 0) + *(eo++) = *(e2++); + else + debug_die(); + + if (eo == e2) + break; // finish early + } + if (eo < e2) + memcpy(eo, e1, sizeof(*eo) * (size_t)(e2 - eo)); + } else { + memcpy(et, &(ss[n1]), sizeof(ss[0]) * n2); + u8 * eo = &(ss[n1 + n2 - 1]); // merge backwards + u8 * e1 = &(ss[n1 - 1]); // size == n1 + u8 * e2 = &(et[n2 - 1]); // size == n2 + const u8 * const z1 = e1 - n1; + const u8 * const z2 = e2 - n2; + while ((e1 > z1) && (e2 > z2)) { + const int cmp = kv_compare(wormleaf_kv_at_ih(leaf, *e1), wormleaf_kv_at_ih(leaf, *e2)); + if (cmp < 0) + *(eo--) = *(e2--); + else if (cmp > 0) + *(eo--) = *(e1--); + else + debug_die(); + + if (eo == e1) + break; + } + if (eo > e1) + memcpy(e1 + 1, et, sizeof(*eo) * (size_t)(eo - e1)); + } +} + +#if defined(__linux__) + static int +wormleaf_ss_cmp(const void * const p1, const void * const p2, void * priv) +{ + const struct kv * const k1 = wormleaf_kv_at_ih(priv, *(const u8 *)p1); + const struct kv * const k2 = wormleaf_kv_at_ih(priv, *(const u8 *)p2); + return kv_compare(k1, k2); +} +#else // (FreeBSD and APPLE only) + static int +wormleaf_ss_cmp(void * priv, const void * const p1, const void * const p2) +{ + const struct kv * const k1 = wormleaf_kv_at_ih(priv, *(const u8 *)p1); + const struct kv * const k2 = wormleaf_kv_at_ih(priv, *(const u8 *)p2); + return kv_compare(k1, k2); +} +#endif // __linux__ + + static inline void +wormleaf_sort_range(struct wormleaf * const leaf, const u32 i0, const u32 nr) +{ +#if defined(__linux__) + qsort_r(&(leaf->ss[i0]), nr, sizeof(leaf->ss[0]), wormleaf_ss_cmp, leaf); +#else // (FreeBSD and APPLE only) + qsort_r(&(leaf->ss[i0]), nr, sizeof(leaf->ss[0]), leaf, wormleaf_ss_cmp); +#endif // __linux__ +} + +// make sure all keys are sorted in a leaf node + static void +wormleaf_sync_sorted(struct wormleaf * const leaf) +{ + const u32 s = leaf->nr_sorted; + const u32 n = leaf->nr_keys; + if (s == n) + return; + + wormleaf_sort_range(leaf, s, n - s); + // merge-sort inplace + wormleaf_sort_m2(leaf, s, n - s); + leaf->nr_sorted = n; +} + +// shift a sequence of entries on hs and update the corresponding ss values + static void +wormleaf_shift_inc(struct wormleaf * const leaf, const u32 to, const u32 from, const u32 nr) +{ + debug_assert(to == (from+1)); + struct entry13 * const hs = leaf->hs; + memmove(&(hs[to]), &(hs[from]), sizeof(hs[0]) * nr); + +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 ones = _mm256_set1_epi8(1); + const m256 addx = _mm256_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m256 cmpx = _mm256_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const m256 add1 = _mm256_and_si256(_mm256_cmpgt_epi8(_mm256_add_epi8(sv, addx), cmpx), ones); + _mm256_store_si256((m256 *)(leaf->ss+i), _mm256_add_epi8(sv, add1)); + } +#else // SSE4.2 + const m128 ones = _mm_set1_epi8(1); + const m128 addx = _mm_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m128 cmpx = _mm_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const m128 add1 = _mm_and_si128(_mm_cmpgt_epi8(_mm_add_epi8(sv, addx), cmpx), ones); + _mm_store_si128((m128 *)(leaf->ss+i), _mm_add_epi8(sv, add1)); + } +#endif // __AVX2__ +#elif defined(__aarch64__) // __x86_64__ + // aarch64 + const m128 subx = vdupq_n_u8((u8)from); + const m128 cmpx = vdupq_n_u8((u8)nr); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = vld1q_u8(leaf->ss+i); + const m128 add1 = vshrq_n_u8(vcltq_u8(vsubq_u8(sv, subx), cmpx), 7); + vst1q_u8(leaf->ss+i, vaddq_u8(sv, add1)); + } +#endif // __x86_64__ +} + + static void +wormleaf_shift_dec(struct wormleaf * const leaf, const u32 to, const u32 from, const u32 nr) +{ + debug_assert(to == (from-1)); + struct entry13 * const hs = leaf->hs; + memmove(&(hs[to]), &(hs[from]), sizeof(hs[0]) * nr); + +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 ones = _mm256_set1_epi8(1); + const m256 addx = _mm256_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m256 cmpx = _mm256_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const m256 add1 = _mm256_and_si256(_mm256_cmpgt_epi8(_mm256_add_epi8(sv, addx), cmpx), ones); + _mm256_store_si256((m256 *)(leaf->ss+i), _mm256_sub_epi8(sv, add1)); + } +#else // SSE4.2 + const m128 ones = _mm_set1_epi8(1); + const m128 addx = _mm_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m128 cmpx = _mm_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += 16) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const m128 add1 = _mm_and_si128(_mm_cmpgt_epi8(_mm_add_epi8(sv, addx), cmpx), ones); + _mm_store_si128((m128 *)(leaf->ss+i), _mm_sub_epi8(sv, add1)); + } +#endif // __AVX2__ +#elif defined(__aarch64__) // __x86_64__ + // aarch64 + const m128 subx = vdupq_n_u8((u8)from); + const m128 cmpx = vdupq_n_u8((u8)nr); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = vld1q_u8(leaf->ss+i); + const m128 add1 = vshrq_n_u8(vcltq_u8(vsubq_u8(sv, subx), cmpx), 7); + vst1q_u8(leaf->ss+i, vsubq_u8(sv, add1)); + } +#endif // __x86_64__ +} + +// insert hs and also shift ss + static u32 +wormleaf_insert_hs(struct wormleaf * const leaf, const struct entry13 e) +{ + struct entry13 * const hs = leaf->hs; + const u16 pkey = e.e1; + const u32 i0 = pkey / WH_HDIV; + if (hs[i0].e1 == 0) { // insert + hs[i0] = e; + return i0; + } + + // find left-most insertion point + u32 i = i0; + while (i && hs[i-1].e1 && (hs[i-1].e1 >= pkey)) + i--; + while ((i < WH_KPN) && hs[i].e1 && (hs[i].e1 < pkey)) // stop at >= or empty + i++; + const u32 il = --i; // i in [0, KPN] + + // find left empty slot + if (i > (i0 - 1)) + i = i0 - 1; + while ((i < WH_KPN) && hs[i].e1) + i--; + const u32 el = i; // el < i0 or el is invalid (>= KPN) + + // find right-most insertion point. + i = il + 1; + while ((i < WH_KPN) && hs[i].e1 && (hs[i].e1 == pkey)) + i++; + const u32 ir = i; // ir >= il, in [0, KPN] + + // find right empty slot + if (i < (i0 + 1)) + i = i0 + 1; + while ((i < WH_KPN) && hs[i].e1) + i++; + const u32 er = i; // er > i0 or el is invalid (>= KPN) + + // el <= il < ir <= er (if < WH_KPN) + const u32 dl = (el < WH_KPN) ? (il - el) : WH_KPN; + const u32 dr = (er < WH_KPN) ? (er - ir) : WH_KPN; + if (dl <= dr) { // push left + debug_assert(dl < WH_KPN); + if (dl) + wormleaf_shift_dec(leaf, el, el+1, dl); + hs[il] = e; + return il; + } else { + debug_assert(dr < WH_KPN); + if (dr) + wormleaf_shift_inc(leaf, ir+1, ir, dr); + hs[ir] = e; + return ir; + } +} + + static void +wormleaf_insert_e13(struct wormleaf * const leaf, const struct entry13 e) +{ + // insert to hs and fix all existing is + const u32 ih = wormleaf_insert_hs(leaf, e); + debug_assert(ih < WH_KPN); + // append the new is + leaf->ss[leaf->nr_keys] = (u8)ih; + // fix nr + leaf->nr_keys++; +} + + static void +wormleaf_insert(struct wormleaf * const leaf, const struct kv * const new) +{ + debug_assert(new->hash == kv_crc32c_extend(kv_crc32c(new->kv, new->klen))); + debug_assert(leaf->nr_keys < WH_KPN); + + // insert + const struct entry13 e = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + const u32 nr0 = leaf->nr_keys; + wormleaf_insert_e13(leaf, e); + + // optimize for seq insertion + if (nr0 == leaf->nr_sorted) { + if (nr0) { + const struct kv * const kvn = wormleaf_kv_at_is(leaf, nr0 - 1); + if (kv_compare(new, kvn) > 0) + leaf->nr_sorted = nr0 + 1; + } else { + leaf->nr_sorted = 1; + } + } +} + + static void +wormleaf_pull_ih(struct wormleaf * const leaf, const u32 ih) +{ + struct entry13 * const hs = leaf->hs; + // try left + u32 i = ih - 1; + while ((i < WH_KPN) && hs[i].e1 && ((hs[i].e1 / WH_HDIV) > i)) + i--; + + if ((++i) < ih) { + wormleaf_shift_inc(leaf, i+1, i, ih - i); + leaf->hs[i].v64 = 0; + return; + } + + // try right + i = ih + 1; + while ((i < WH_KPN) && hs[i].e1 && ((hs[i].e1 / WH_HDIV) < i)) + i++; + + if ((--i) > ih) { + wormleaf_shift_dec(leaf, ih, ih+1, i - ih); + hs[i].v64 = 0; + } + // hs[ih] may still be 0 +} + +// internal only + static struct kv * +wormleaf_remove(struct wormleaf * const leaf, const u32 ih, const u32 is) +{ + // ss + leaf->ss[is] = leaf->ss[leaf->nr_keys - 1]; + if (leaf->nr_sorted > is) + leaf->nr_sorted = is; + + // ret + struct kv * const victim = wormleaf_kv_at_ih(leaf, ih); + // hs + leaf->hs[ih].v64 = 0; + leaf->nr_keys--; + // use magnet + wormleaf_pull_ih(leaf, ih); + return victim; +} + +// remove key from leaf but do not call free + static struct kv * +wormleaf_remove_ih(struct wormleaf * const leaf, const u32 ih) +{ + // remove from ss + const u32 is = wormleaf_search_is(leaf, (u8)ih); + debug_assert(is < leaf->nr_keys); + return wormleaf_remove(leaf, ih, is); +} + + static struct kv * +wormleaf_remove_is(struct wormleaf * const leaf, const u32 is) +{ + return wormleaf_remove(leaf, leaf->ss[is], is); +} + +// for delr (delete-range) + static void +wormleaf_delete_range(struct wormhole * const map, struct wormleaf * const leaf, + const u32 i0, const u32 end) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + for (u32 i = end; i > i0; i--) { + const u32 ir = i - 1; + struct kv * const victim = wormleaf_remove_is(leaf, ir); + map->mm.free(victim, map->mm.priv); + } +} + +// return the old kv; the caller should free the old kv + static struct kv * +wormleaf_update(struct wormleaf * const leaf, const u32 ih, const struct kv * const new) +{ + debug_assert(new->hash == kv_crc32c_extend(kv_crc32c(new->kv, new->klen))); + // search entry in ss (is) + struct kv * const old = wormleaf_kv_at_ih(leaf, ih); + debug_assert(old); + + entry13_update_e3(&leaf->hs[ih], (u64)new); + return old; +} +// }}} leaf-write + +// leaf-split {{{ +// It only works correctly in cut_search +// quickly tell if a cut between k1 and k2 can achieve a specific anchor-key length + static bool +wormhole_split_cut_alen_check(const u32 alen, const struct kv * const k1, const struct kv * const k2) +{ + debug_assert(k2->klen >= alen); + return (k1->klen < alen) || (k1->kv[alen - 1] != k2->kv[alen - 1]); +} + +// return the number of keys that should go to leaf1 +// assert(r > 0 && r <= nr_keys) +// (1) r < is1, anchor key is ss[r-1]:ss[r] +// (2) r == is1: anchor key is ss[r-1]:new +// (3) r == is1+1: anchor key is new:ss[r-1] (ss[r-1] is the ss[r] on the logically sorted array) +// (4) r > is1+1: anchor key is ss[r-2]:ss[r-1] (ss[r-2] is the [r-1] on the logically sorted array) +// edge cases: +// (case 2) is1 == nr_keys: r = nr_keys; ss[r-1]:new +// (case 3) is1 == 0, r == 1; new:ss[0] +// return 1..WH_KPN + static u32 +wormhole_split_cut_search1(struct wormleaf * const leaf, u32 l, u32 h, const u32 is1, const struct kv * const new) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + debug_assert(leaf->nr_keys); + debug_assert(l < h && h <= leaf->nr_sorted); + + const struct kv * const kl0 = wormleaf_kv_at_is1(leaf, l, is1, new); + const struct kv * const kh0 = wormleaf_kv_at_is1(leaf, h, is1, new); + const u32 alen = kv_key_lcp(kl0, kh0) + 1; + if (unlikely(alen > UINT16_MAX)) + return WH_KPN2; + + const u32 target = leaf->next ? WH_MID : WH_KPN_MRG; + while ((l + 1) < h) { + const u32 m = (l + h + 1) >> 1; + if (m <= target) { // try right + const struct kv * const k1 = wormleaf_kv_at_is1(leaf, m, is1, new); + const struct kv * const k2 = wormleaf_kv_at_is1(leaf, h, is1, new); + if (wormhole_split_cut_alen_check(alen, k1, k2)) + l = m; + else + h = m; + } else { // try left + const struct kv * const k1 = wormleaf_kv_at_is1(leaf, l, is1, new); + const struct kv * const k2 = wormleaf_kv_at_is1(leaf, m, is1, new); + if (wormhole_split_cut_alen_check(alen, k1, k2)) + h = m; + else + l = m; + } + } + return h; +} + + static void +wormhole_split_leaf_move1(struct wormleaf * const leaf1, struct wormleaf * const leaf2, + const u32 cut, const u32 is1, const struct kv * const new) +{ + const u32 nr_keys = leaf1->nr_keys; + const struct entry13 e1 = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + struct entry13 es[WH_KPN]; + + if (cut <= is1) { // e1 goes to leaf2 + // leaf2 + for (u32 i = cut; i < is1; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + wormleaf_insert_e13(leaf2, e1); + + for (u32 i = is1; i < nr_keys; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + // leaf1 + for (u32 i = 0; i < cut; i++) + es[i] = leaf1->hs[leaf1->ss[i]]; + + } else { // e1 goes to leaf1 + // leaf2 + for (u32 i = cut - 1; i < nr_keys; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + // leaf1 + for (u32 i = 0; i < is1; i++) + es[i] = leaf1->hs[leaf1->ss[i]]; + + es[is1] = e1; + + for (u32 i = is1 + 1; i < cut; i++) + es[i] = leaf1->hs[leaf1->ss[i - 1]]; + } + + leaf2->nr_sorted = leaf2->nr_keys; + + memset(leaf1->hs, 0, sizeof(leaf1->hs[0]) * WH_KPN); + leaf1->nr_keys = 0; + for (u32 i = 0; i < cut; i++) + wormleaf_insert_e13(leaf1, es[i]); + leaf1->nr_sorted = cut; + debug_assert((leaf1->nr_sorted + leaf2->nr_sorted) == (nr_keys + 1)); +} + +// create an anchor for leaf-split + static struct kv * +wormhole_split_alloc_anchor(const struct kv * const key1, const struct kv * const key2) +{ + const u32 alen = kv_key_lcp(key1, key2) + 1; + debug_assert(alen <= key2->klen); + + struct kv * const anchor = wormhole_alloc_akey(alen); + if (anchor) + kv_refill(anchor, key2->kv, alen, NULL, 0); + return anchor; +} + +// leaf1 is locked +// split leaf1 into leaf1+leaf2; insert new into leaf1 or leaf2, return leaf2 + static struct wormleaf * +wormhole_split_leaf(struct wormhole * const map, struct wormleaf * const leaf1, struct kv * const new) +{ + wormleaf_sync_sorted(leaf1); + struct kref kref_new; + kref_ref_kv(&kref_new, new); + const u32 is1 = wormleaf_search_ss(leaf1, &kref_new); // new should be inserted at [is1] + const u32 cut = wormhole_split_cut_search1(leaf1, 0, leaf1->nr_keys, is1, new); + if (unlikely(cut == WH_KPN2)) + return NULL; + + // anchor of leaf2 + debug_assert(cut && (cut <= leaf1->nr_keys)); + const struct kv * const key1 = wormleaf_kv_at_is1(leaf1, cut - 1, is1, new); + const struct kv * const key2 = wormleaf_kv_at_is1(leaf1, cut, is1, new); + struct kv * const anchor2 = wormhole_split_alloc_anchor(key1, key2); + if (unlikely(anchor2 == NULL)) // anchor alloc failed + return NULL; + + // create leaf2 with anchor2 + struct wormleaf * const leaf2 = wormleaf_alloc(map, leaf1, leaf1->next, anchor2); + if (unlikely(leaf2 == NULL)) { + wormhole_free_akey(anchor2); + return NULL; + } + + // split_hmap will unlock the leaf nodes; must move now + wormhole_split_leaf_move1(leaf1, leaf2, cut, is1, new); + // leaf1 and leaf2 should be sorted after split + debug_assert(leaf1->nr_keys == leaf1->nr_sorted); + debug_assert(leaf2->nr_keys == leaf2->nr_sorted); + + return leaf2; +} +// }}} leaf-split + +// leaf-merge {{{ +// MERGE is the only operation that deletes a leaf node (leaf2). +// It ALWAYS merges the right node into the left node even if the left is empty. +// This requires both of their writer locks to be acquired. +// This allows iterators to safely probe the next node (but not backwards). +// In other words, if either the reader or the writer lock of node X has been acquired: +// X->next (the pointer) cannot be changed by any other thread. +// X->next cannot be deleted. +// But the content in X->next can still be changed. + static bool +wormleaf_merge(struct wormleaf * const leaf1, struct wormleaf * const leaf2) +{ + debug_assert((leaf1->nr_keys + leaf2->nr_keys) <= WH_KPN); + const bool leaf1_sorted = leaf1->nr_keys == leaf1->nr_sorted; + + for (u32 i = 0; i < leaf2->nr_keys; i++) + wormleaf_insert_e13(leaf1, leaf2->hs[leaf2->ss[i]]); + if (leaf1_sorted) + leaf1->nr_sorted += leaf2->nr_sorted; + return true; +} + +// for undoing insertion under split_meta failure; leaf2 is still local +// remove the new key; merge keys in leaf2 into leaf1; free leaf2 + static void +wormleaf_split_undo(struct wormhole * const map, struct wormleaf * const leaf1, + struct wormleaf * const leaf2, struct kv * const new) +{ + if (new) { + const struct entry13 e = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + const u32 im1 = wormleaf_search_ih(leaf1, e); + if (im1 < WH_KPN) { + (void)wormleaf_remove_ih(leaf1, im1); + } else { // not found in leaf1; search leaf2 + const u32 im2 = wormleaf_search_ih(leaf2, e); + debug_assert(im2 < WH_KPN); + (void)wormleaf_remove_ih(leaf2, im2); + } + } + // this merge must succeed + if (!wormleaf_merge(leaf1, leaf2)) + debug_die(); + // Keep this to avoid triggering false alarm in wormleaf_free + leaf2->leaflock.opaque = 0; + wormleaf_free(map->slab_leaf, leaf2); +} +// }}} leaf-merge + +// get/probe {{{ + struct kv * +wormhole_get(struct wormref * const ref, const struct kref * const key, struct kv * const out) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 i = wormleaf_match_hs(leaf, key); + struct kv * const tmp = (i < WH_KPN) ? ref->map->mm.out(wormleaf_kv_at_ih(leaf, i), out) : NULL; + wormleaf_unlock_read(leaf); + return tmp; +} + + struct kv * +whsafe_get(struct wormref * const ref, const struct kref * const key, struct kv * const out) +{ + wormhole_resume(ref); + struct kv * const ret = wormhole_get(ref, key, out); + wormhole_park(ref); + return ret; +} + + struct kv * +whunsafe_get(struct wormhole * const map, const struct kref * const key, struct kv * const out) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 i = wormleaf_match_hs(leaf, key); + return (i < WH_KPN) ? map->mm.out(wormleaf_kv_at_ih(leaf, i), out) : NULL; +} + + bool +wormhole_probe(struct wormref * const ref, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 i = wormleaf_match_hs(leaf, key); + wormleaf_unlock_read(leaf); + return i < WH_KPN; +} + + bool +whsafe_probe(struct wormref * const ref, const struct kref * const key) +{ + wormhole_resume(ref); + const bool r = wormhole_probe(ref, key); + wormhole_park(ref); + return r; +} + + bool +whunsafe_probe(struct wormhole * const map, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + return wormleaf_match_hs(leaf, key) < WH_KPN; +} +// }}} get/probe + +// meta-split {{{ +// duplicate from meta1; only has one bit but will soon add a new bit + static struct wormmeta * +wormmeta_expand(struct wormhmap * const hmap, struct wormmeta * const meta1) +{ + struct wormmeta * const meta2 = slab_alloc_unsafe(hmap->slab2); + if (meta2 == NULL) + return NULL; + + memcpy(meta2, meta1, sizeof(*meta1)); + for (u32 i = 0; i < WH_BMNR; i++) + meta2->bitmap[i] = 0; + const u32 bitmin = wormmeta_bitmin_load(meta1); + debug_assert(bitmin == wormmeta_bitmax_load(meta1)); + debug_assert(bitmin < WH_FO); + // set the only bit + meta2->bitmap[bitmin >> 6u] |= (1lu << (bitmin & 0x3fu)); + + wormhmap_replace(hmap, meta1, meta2); + slab_free_unsafe(hmap->slab1, meta1); + return meta2; +} + + static struct wormmeta * +wormmeta_bm_set_helper(struct wormhmap * const hmap, struct wormmeta * const meta, const u32 id) +{ + debug_assert(id < WH_FO); + const u32 bitmin = wormmeta_bitmin_load(meta); + const u32 bitmax = wormmeta_bitmax_load(meta); + if (bitmin < bitmax) { // already in full size + wormmeta_bm_set(meta, id); + return meta; + } else if (id == bitmin) { // do nothing + return meta; + } else if (bitmin == WH_FO) { // add the first bit + wormmeta_bitmin_store(meta, id); + wormmeta_bitmax_store(meta, id); + return meta; + } else { // need to expand + struct wormmeta * const meta2 = wormmeta_expand(hmap, meta); + wormmeta_bm_set(meta2, id); + return meta2; + } +} + +// return true if a new node is created + static void +wormmeta_split_touch(struct wormhmap * const hmap, struct kv * const mkey, + struct wormleaf * const leaf, const u32 alen) +{ + struct wormmeta * meta = wormhmap_get(hmap, mkey); + if (meta) { + if (mkey->klen < alen) + meta = wormmeta_bm_set_helper(hmap, meta, mkey->kv[mkey->klen]); + if (wormmeta_lmost_load(meta) == leaf->next) + wormmeta_lmost_store(meta, leaf); + else if (wormmeta_rmost_load(meta) == leaf->prev) + wormmeta_rmost_store(meta, leaf); + } else { // create new node + const u32 bit = (mkey->klen < alen) ? mkey->kv[mkey->klen] : WH_FO; + meta = wormmeta_alloc(hmap, leaf, mkey, alen, bit); + debug_assert(meta); + wormhmap_set(hmap, meta); + } +} + + static void +wormmeta_lpath_update(struct wormhmap * const hmap, const struct kv * const a1, const struct kv * const a2, + struct wormleaf * const lpath) +{ + struct kv * const pbuf = hmap->pbuf; + kv_dup2_key(a2, pbuf); + + // only need to update a2's own branch + u32 i = kv_key_lcp(a1, a2) + 1; + debug_assert(i <= pbuf->klen); + wormhole_prefix(pbuf, i); + while (i < a2->klen) { + debug_assert(i <= hmap->maxplen); + struct wormmeta * const meta = wormhmap_get(hmap, pbuf); + debug_assert(meta); + wormmeta_lpath_store(meta, lpath); + + i++; + wormhole_prefix_inc1(pbuf); + } +} + +// for leaf1, a leaf2 is already linked at its right side. +// this function updates the meta-map by moving leaf1 and hooking leaf2 at correct positions + static void +wormmeta_split(struct wormhmap * const hmap, struct wormleaf * const leaf, + struct kv * const mkey) +{ + // left branches + struct wormleaf * const prev = leaf->prev; + struct wormleaf * const next = leaf->next; + u32 i = next ? kv_key_lcp(prev->anchor, next->anchor) : 0; + const u32 alen = leaf->anchor->klen; + + // save klen + const u32 mklen = mkey->klen; + wormhole_prefix(mkey, i); + do { + wormmeta_split_touch(hmap, mkey, leaf, alen); + if (i >= alen) + break; + i++; + wormhole_prefix_inc1(mkey); + } while (true); + + // adjust maxplen; i is the plen of the last _touch() + if (i > hmap->maxplen) + hmap->maxplen = i; + debug_assert(i <= UINT16_MAX); + + // restore klen + mkey->klen = mklen; + + if (next) + wormmeta_lpath_update(hmap, leaf->anchor, next->anchor, leaf); +} + +// all locks will be released before returning + static bool +wormhole_split_meta(struct wormref * const ref, struct wormleaf * const leaf2) +{ + struct kv * const mkey = wormhole_alloc_mkey(leaf2->anchor->klen); + if (unlikely(mkey == NULL)) + return false; + kv_dup2_key(leaf2->anchor, mkey); + + struct wormhole * const map = ref->map; + // metalock + wormhmap_lock(map, ref); + + // check slab reserve + const bool sr = wormhole_slab_reserve(map, mkey->klen); + if (unlikely(!sr)) { + wormhmap_unlock(map); + wormhole_free_mkey(mkey); + return false; + } + + struct wormhmap * const hmap0 = wormhmap_load(map); + struct wormhmap * const hmap1 = wormhmap_switch(map, hmap0); + + // link + struct wormleaf * const leaf1 = leaf2->prev; + leaf1->next = leaf2; + if (leaf2->next) + leaf2->next->prev = leaf2; + + // update versions + const u64 v1 = wormhmap_version_load(hmap0) + 1; + wormleaf_version_store(leaf1, v1); + wormleaf_version_store(leaf2, v1); + wormhmap_version_store(hmap1, v1); + + wormmeta_split(hmap1, leaf2, mkey); + + qsbr_update(&ref->qref, v1); + + // switch hmap + wormhmap_store(map, hmap1); + + wormleaf_unlock_write(leaf1); + wormleaf_unlock_write(leaf2); + + qsbr_wait(map->qsbr, v1); + + wormmeta_split(hmap0, leaf2, mkey); + + wormhmap_unlock(map); + + if (mkey->refcnt == 0) // this is possible + wormhole_free_mkey(mkey); + return true; +} + +// all locks (metalock + leaflocks) will be released before returning +// leaf1->lock (write) is already taken + static bool +wormhole_split_insert(struct wormref * const ref, struct wormleaf * const leaf1, + struct kv * const new) +{ + struct wormleaf * const leaf2 = wormhole_split_leaf(ref->map, leaf1, new); + if (unlikely(leaf2 == NULL)) { + wormleaf_unlock_write(leaf1); + return false; + } + + rwlock_lock_write(&(leaf2->leaflock)); + const bool rsm = wormhole_split_meta(ref, leaf2); + if (unlikely(!rsm)) { + // undo insertion & merge; free leaf2 + wormleaf_split_undo(ref->map, leaf1, leaf2, new); + wormleaf_unlock_write(leaf1); + } + return rsm; +} + + static bool +whunsafe_split_meta(struct wormhole * const map, struct wormleaf * const leaf2) +{ + struct kv * const mkey = wormhole_alloc_mkey(leaf2->anchor->klen); + if (unlikely(mkey == NULL)) + return false; + kv_dup2_key(leaf2->anchor, mkey); + + const bool sr = wormhole_slab_reserve(map, mkey->klen); + if (unlikely(!sr)) { + wormhmap_unlock(map); + wormhole_free_mkey(mkey); + return false; + } + + // link + leaf2->prev->next = leaf2; + if (leaf2->next) + leaf2->next->prev = leaf2; + + for (u32 i = 0; i < 2; i++) + if (map->hmap2[i].pmap) + wormmeta_split(&(map->hmap2[i]), leaf2, mkey); + if (mkey->refcnt == 0) // this is possible + wormhole_free_mkey(mkey); + return true; +} + + static bool +whunsafe_split_insert(struct wormhole * const map, struct wormleaf * const leaf1, + struct kv * const new) +{ + struct wormleaf * const leaf2 = wormhole_split_leaf(map, leaf1, new); + if (unlikely(leaf2 == NULL)) + return false; + + const bool rsm = whunsafe_split_meta(map, leaf2); + if (unlikely(!rsm)) // undo insertion, merge, free leaf2 + wormleaf_split_undo(map, leaf1, leaf2, new); + + return rsm; +} +// }}} meta-split + +// meta-merge {{{ +// now it only contains one bit + static struct wormmeta * +wormmeta_shrink(struct wormhmap * const hmap, struct wormmeta * const meta2) +{ + debug_assert(wormmeta_bitmin_load(meta2) == wormmeta_bitmax_load(meta2)); + struct wormmeta * const meta1 = slab_alloc_unsafe(hmap->slab1); + if (meta1 == NULL) + return NULL; + + memcpy(meta1, meta2, sizeof(*meta1)); + + wormhmap_replace(hmap, meta2, meta1); + slab_free_unsafe(hmap->slab2, meta2); + return meta1; +} + + static void +wormmeta_bm_clear_helper(struct wormhmap * const hmap, struct wormmeta * const meta, const u32 id) +{ + if (wormmeta_bitmin_load(meta) == wormmeta_bitmax_load(meta)) { + debug_assert(wormmeta_bitmin_load(meta) < WH_FO); + wormmeta_bitmin_store(meta, WH_FO); + wormmeta_bitmax_store(meta, WH_FO); + } else { // has more than 1 bit + wormmeta_bm_clear(meta, id); + if (wormmeta_bitmin_load(meta) == wormmeta_bitmax_load(meta)) + wormmeta_shrink(hmap, meta); + } +} + +// all locks held + static void +wormmeta_merge(struct wormhmap * const hmap, struct wormleaf * const leaf) +{ + // leaf->next is the new next after merge, which can be NULL + struct wormleaf * const prev = leaf->prev; + struct wormleaf * const next = leaf->next; + struct kv * const pbuf = hmap->pbuf; + kv_dup2_key(leaf->anchor, pbuf); + u32 i = (prev && next) ? kv_key_lcp(prev->anchor, next->anchor) : 0; + const u32 alen = leaf->anchor->klen; + wormhole_prefix(pbuf, i); + struct wormmeta * parent = NULL; + do { + debug_assert(i <= hmap->maxplen); + struct wormmeta * meta = wormhmap_get(hmap, pbuf); + if (wormmeta_lmost_load(meta) == wormmeta_rmost_load(meta)) { // delete single-child + debug_assert(wormmeta_lmost_load(meta) == leaf); + const u32 bitmin = wormmeta_bitmin_load(meta); + wormhmap_del(hmap, meta); + wormmeta_free(hmap, meta); + if (parent) { + wormmeta_bm_clear_helper(hmap, parent, pbuf->kv[i-1]); + parent = NULL; + } + if (bitmin == WH_FO) // no child + break; + } else { // adjust lmost rmost + if (wormmeta_lmost_load(meta) == leaf) + wormmeta_lmost_store(meta, next); + else if (wormmeta_rmost_load(meta) == leaf) + wormmeta_rmost_store(meta, prev); + parent = meta; + } + + if (i >= alen) + break; + i++; + wormhole_prefix_inc1(pbuf); + } while (true); + + if (next) + wormmeta_lpath_update(hmap, leaf->anchor, next->anchor, prev); +} + +// all locks (metalock + two leaflock) will be released before returning +// merge leaf2 to leaf1, removing all metadata to leaf2 and leaf2 itself + static void +wormhole_meta_merge(struct wormref * const ref, struct wormleaf * const leaf1, + struct wormleaf * const leaf2, const bool unlock_leaf1) +{ + debug_assert(leaf1->next == leaf2); + debug_assert(leaf2->prev == leaf1); + struct wormhole * const map = ref->map; + + wormhmap_lock(map, ref); + + struct wormhmap * const hmap0 = wormhmap_load(map); + struct wormhmap * const hmap1 = wormhmap_switch(map, hmap0); + const u64 v1 = wormhmap_version_load(hmap0) + 1; + + leaf1->next = leaf2->next; + if (leaf2->next) + leaf2->next->prev = leaf1; + + wormleaf_version_store(leaf1, v1); + wormleaf_version_store(leaf2, v1); + wormhmap_version_store(hmap1, v1); + + wormmeta_merge(hmap1, leaf2); + + qsbr_update(&ref->qref, v1); + + // switch hmap + wormhmap_store(map, hmap1); + + if (unlock_leaf1) + wormleaf_unlock_write(leaf1); + wormleaf_unlock_write(leaf2); + + qsbr_wait(map->qsbr, v1); + + wormmeta_merge(hmap0, leaf2); + // leaf2 is now safe to be removed + wormleaf_free(map->slab_leaf, leaf2); + wormhmap_unlock(map); +} + +// caller must acquire leaf->wlock and next->wlock +// all locks will be released when this function returns + static bool +wormhole_meta_leaf_merge(struct wormref * const ref, struct wormleaf * const leaf) +{ + struct wormleaf * const next = leaf->next; + debug_assert(next); + + // double check + if ((leaf->nr_keys + next->nr_keys) <= WH_KPN) { + if (wormleaf_merge(leaf, next)) { + wormhole_meta_merge(ref, leaf, next, true); + return true; + } + } + // merge failed but it's fine + wormleaf_unlock_write(leaf); + wormleaf_unlock_write(next); + return false; +} + + static void +whunsafe_meta_leaf_merge(struct wormhole * const map, struct wormleaf * const leaf1, + struct wormleaf * const leaf2) +{ + debug_assert(leaf1->next == leaf2); + debug_assert(leaf2->prev == leaf1); + if (!wormleaf_merge(leaf1, leaf2)) + return; + + leaf1->next = leaf2->next; + if (leaf2->next) + leaf2->next->prev = leaf1; + for (u32 i = 0; i < 2; i++) + if (map->hmap2[i].pmap) + wormmeta_merge(&(map->hmap2[i]), leaf2); + wormleaf_free(map->slab_leaf, leaf2); +} +// }}} meta-merge + +// put {{{ + bool +wormhole_put(struct wormref * const ref, struct kv * const kv) +{ + // we always allocate a new item on SET + // future optimizations may perform in-place update + struct wormhole * const map = ref->map; + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + const struct kref kref = kv_kref(new); + + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, &kref); + // update + const u32 im = wormleaf_match_hs(leaf, &kref); + if (im < WH_KPN) { + struct kv * const old = wormleaf_update(leaf, im, new); + wormleaf_unlock_write(leaf); + map->mm.free(old, map->mm.priv); + return true; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + wormleaf_unlock_write(leaf); + return true; + } + + // split_insert changes hmap + // all locks should be released in wormhole_split_insert() + const bool rsi = wormhole_split_insert(ref, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +whsafe_put(struct wormref * const ref, struct kv * const kv) +{ + wormhole_resume(ref); + const bool r = wormhole_put(ref, kv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_put(struct wormhole * const map, struct kv * const kv) +{ + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + const struct kref kref = kv_kref(new); + + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, &kref); + // update + const u32 im = wormleaf_match_hs(leaf, &kref); + if (im < WH_KPN) { // overwrite + struct kv * const old = wormleaf_update(leaf, im, new); + map->mm.free(old, map->mm.priv); + return true; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + return true; + } + + // split_insert changes hmap + const bool rsi = whunsafe_split_insert(map, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +wormhole_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + struct wormhole * const map = ref->map; + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, kref); + // update + const u32 im = wormleaf_match_hs(leaf, kref); + if (im < WH_KPN) { // update + struct kv * const kv0 = wormleaf_kv_at_ih(leaf, im); + struct kv * const kv = uf(kv0, priv); + if ((kv == kv0) || (kv == NULL)) { // no replacement + wormleaf_unlock_write(leaf); + return true; + } + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) { // mm error + wormleaf_unlock_write(leaf); + return false; + } + + struct kv * const old = wormleaf_update(leaf, im, new); + wormleaf_unlock_write(leaf); + map->mm.free(old, map->mm.priv); + return true; + } + + struct kv * const kv = uf(NULL, priv); + if (kv == NULL) { // nothing to be inserted + wormleaf_unlock_write(leaf); + return true; + } + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) { // mm error + wormleaf_unlock_write(leaf); + return false; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + wormleaf_unlock_write(leaf); + return true; + } + + // split_insert changes hmap + // all locks should be released in wormhole_split_insert() + const bool rsi = wormhole_split_insert(ref, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +whsafe_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_merge(ref, kref, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_merge(struct wormhole * const map, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, kref); + // update + const u32 im = wormleaf_match_hs(leaf, kref); + if (im < WH_KPN) { // update + struct kv * const kv0 = wormleaf_kv_at_ih(leaf, im); + struct kv * const kv = uf(kv0, priv); + if ((kv == kv0) || (kv == NULL)) + return true; + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + + struct kv * const old = wormleaf_update(leaf, im, new); + map->mm.free(old, map->mm.priv); + return true; + } + + struct kv * const kv = uf(NULL, priv); + if (kv == NULL) // nothing to be inserted + return true; + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) // mm error + return false; + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + return true; + } + + // split_insert changes hmap + const bool rsi = whunsafe_split_insert(map, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} +// }}} put + +// inplace {{{ + bool +wormhole_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { + uf(wormleaf_kv_at_ih(leaf, im), priv); + wormleaf_unlock_read(leaf); + return true; + } else { + uf(NULL, priv); + wormleaf_unlock_read(leaf); + return false; + } +} + + bool +wormhole_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { + uf(wormleaf_kv_at_ih(leaf, im), priv); + wormleaf_unlock_write(leaf); + return true; + } else { + uf(NULL, priv); + wormleaf_unlock_write(leaf); + return false; + } +} + + bool +whsafe_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_inpr(ref, key, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whsafe_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_inpw(ref, key, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_inp(struct wormhole * const map, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // overwrite + uf(wormleaf_kv_at_ih(leaf, im), priv); + return true; + } else { + uf(NULL, priv); + return false; + } +} +// }}} put + +// del {{{ + static void +wormhole_del_try_merge(struct wormref * const ref, struct wormleaf * const leaf) +{ + struct wormleaf * const next = leaf->next; + if (next && ((leaf->nr_keys == 0) || ((leaf->nr_keys + next->nr_keys) < WH_KPN_MRG))) { + // try merge, it may fail if size becomes larger after locking + wormleaf_lock_write(next, ref); + (void)wormhole_meta_leaf_merge(ref, leaf); + // locks are already released; immediately return + } else { + wormleaf_unlock_write(leaf); + } +} + + bool +wormhole_del(struct wormref * const ref, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // found + struct kv * const kv = wormleaf_remove_ih(leaf, im); + wormhole_del_try_merge(ref, leaf); + debug_assert(kv); + // free after releasing locks + struct wormhole * const map = ref->map; + map->mm.free(kv, map->mm.priv); + return true; + } else { + wormleaf_unlock_write(leaf); + return false; + } +} + + bool +whsafe_del(struct wormref * const ref, const struct kref * const key) +{ + wormhole_resume(ref); + const bool r = wormhole_del(ref, key); + wormhole_park(ref); + return r; +} + + static void +whunsafe_del_try_merge(struct wormhole * const map, struct wormleaf * const leaf) +{ + const u32 n0 = leaf->prev ? leaf->prev->nr_keys : WH_KPN; + const u32 n1 = leaf->nr_keys; + const u32 n2 = leaf->next ? leaf->next->nr_keys : WH_KPN; + + if ((leaf->prev && (n1 == 0)) || ((n0 + n1) < WH_KPN_MRG)) { + whunsafe_meta_leaf_merge(map, leaf->prev, leaf); + } else if ((leaf->next && (n1 == 0)) || ((n1 + n2) < WH_KPN_MRG)) { + whunsafe_meta_leaf_merge(map, leaf, leaf->next); + } +} + + bool +whunsafe_del(struct wormhole * const map, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // found + struct kv * const kv = wormleaf_remove_ih(leaf, im); + debug_assert(kv); + + whunsafe_del_try_merge(map, leaf); + map->mm.free(kv, map->mm.priv); + return true; + } + return false; +} + + u64 +wormhole_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end) +{ + struct wormleaf * const leafa = wormhole_jump_leaf_write(ref, start); + wormleaf_sync_sorted(leafa); + const u32 ia = wormleaf_seek(leafa, start); + const u32 iaz = end ? wormleaf_seek_end(leafa, end) : leafa->nr_keys; + if (iaz < ia) { // do nothing if end < start + wormleaf_unlock_write(leafa); + return 0; + } + u64 ndel = iaz - ia; + struct wormhole * const map = ref->map; + wormleaf_delete_range(map, leafa, ia, iaz); + if (leafa->nr_keys > ia) { // end hit; done + wormhole_del_try_merge(ref, leafa); + return ndel; + } + + while (leafa->next) { + struct wormleaf * const leafx = leafa->next; + wormleaf_lock_write(leafx, ref); + // two leaf nodes locked + wormleaf_sync_sorted(leafx); + const u32 iz = end ? wormleaf_seek_end(leafx, end) : leafx->nr_keys; + ndel += iz; + wormleaf_delete_range(map, leafx, 0, iz); + if (leafx->nr_keys == 0) { // removed all + // must hold leaf1's lock for the next iteration + wormhole_meta_merge(ref, leafa, leafx, false); + } else { // partially removed; done + (void)wormhole_meta_leaf_merge(ref, leafa); + return ndel; + } + } + wormleaf_unlock_write(leafa); + return ndel; +} + + u64 +whsafe_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end) +{ + wormhole_resume(ref); + const u64 ret = wormhole_delr(ref, start, end); + wormhole_park(ref); + return ret; +} + + u64 +whunsafe_delr(struct wormhole * const map, const struct kref * const start, + const struct kref * const end) +{ + // first leaf + struct wormhmap * const hmap = map->hmap; + struct wormleaf * const leafa = wormhole_jump_leaf(hmap, start); + wormleaf_sync_sorted(leafa); + // last leaf + struct wormleaf * const leafz = end ? wormhole_jump_leaf(hmap, end) : NULL; + + // select start/end on leafa + const u32 ia = wormleaf_seek(leafa, start); + const u32 iaz = end ? wormleaf_seek_end(leafa, end) : leafa->nr_keys; + if (iaz < ia) + return 0; + + wormleaf_delete_range(map, leafa, ia, iaz); + u64 ndel = iaz - ia; + + if (leafa == leafz) { // one node only + whunsafe_del_try_merge(map, leafa); + return ndel; + } + + // 0 or more nodes between leafa and leafz + while (leafa->next != leafz) { + struct wormleaf * const leafx = leafa->next; + ndel += leafx->nr_keys; + for (u32 i = 0; i < leafx->nr_keys; i++) + map->mm.free(wormleaf_kv_at_is(leafx, i), map->mm.priv); + leafx->nr_keys = 0; + leafx->nr_sorted = 0; + whunsafe_meta_leaf_merge(map, leafa, leafx); + } + // delete the smaller keys in leafz + if (leafz) { + wormleaf_sync_sorted(leafz); + const u32 iz = wormleaf_seek_end(leafz, end); + wormleaf_delete_range(map, leafz, 0, iz); + ndel += iz; + whunsafe_del_try_merge(map, leafa); + } + return ndel; +} +// }}} del + +// iter {{{ +// safe iter: safe sort with read-lock acquired +// unsafe iter: allow concurrent seek/skip + static void +wormhole_iter_leaf_sync_sorted(struct wormleaf * const leaf) +{ + if (unlikely(leaf->nr_keys != leaf->nr_sorted)) { + spinlock_lock(&(leaf->sortlock)); + wormleaf_sync_sorted(leaf); + spinlock_unlock(&(leaf->sortlock)); + } +} + + struct wormhole_iter * +wormhole_iter_create(struct wormref * const ref) +{ + struct wormhole_iter * const iter = malloc(sizeof(*iter)); + if (iter == NULL) + return NULL; + iter->ref = ref; + iter->map = ref->map; + iter->leaf = NULL; + iter->is = 0; + return iter; +} + + static void +wormhole_iter_fix(struct wormhole_iter * const iter) +{ + if (!wormhole_iter_valid(iter)) + return; + + while (unlikely(iter->is >= iter->leaf->nr_sorted)) { + struct wormleaf * const next = iter->leaf->next; + if (likely(next != NULL)) { + struct wormref * const ref = iter->ref; + wormleaf_lock_read(next, ref); + wormleaf_unlock_read(iter->leaf); + + wormhole_iter_leaf_sync_sorted(next); + } else { + wormleaf_unlock_read(iter->leaf); + } + iter->leaf = next; + iter->is = 0; + if (!wormhole_iter_valid(iter)) + return; + } +} + + void +wormhole_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + debug_assert(key); + if (iter->leaf) + wormleaf_unlock_read(iter->leaf); + + struct wormleaf * const leaf = wormhole_jump_leaf_read(iter->ref, key); + wormhole_iter_leaf_sync_sorted(leaf); + + iter->leaf = leaf; + iter->is = wormleaf_seek(leaf, key); + wormhole_iter_fix(iter); +} + + void +whsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + wormhole_resume(iter->ref); + wormhole_iter_seek(iter, key); +} + + bool +wormhole_iter_valid(struct wormhole_iter * const iter) +{ + return iter->leaf != NULL; +} + + static struct kv * +wormhole_iter_current(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + debug_assert(iter->is < iter->leaf->nr_sorted); + struct kv * const kv = wormleaf_kv_at_is(iter->leaf, iter->is); + return kv; + } + return NULL; +} + + struct kv * +wormhole_iter_peek(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + struct kv * const ret = iter->map->mm.out(kv, out); + return ret; + } + return NULL; +} + + bool +wormhole_iter_kref(struct wormhole_iter * const iter, struct kref * const kref) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + kref_ref_kv(kref, kv); + return true; + } + return false; +} + + bool +wormhole_iter_kvref(struct wormhole_iter * const iter, struct kvref * const kvref) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + kvref_ref_kv(kvref, kv); + return true; + } + return false; +} + + void +wormhole_iter_skip1(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + iter->is++; + wormhole_iter_fix(iter); + } +} + + void +wormhole_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + u32 todo = nr; + while (todo && wormhole_iter_valid(iter)) { + const u32 cap = iter->leaf->nr_sorted - iter->is; + const u32 nskip = (cap < todo) ? cap : todo; + iter->is += nskip; + wormhole_iter_fix(iter); + todo -= nskip; + } +} + + struct kv * +wormhole_iter_next(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const ret = wormhole_iter_peek(iter, out); + wormhole_iter_skip1(iter); + return ret; +} + + bool +wormhole_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv) +{ + struct kv * const kv = wormhole_iter_current(iter); + uf(kv, priv); // call uf even if (kv == NULL) + return kv != NULL; +} + + void +wormhole_iter_park(struct wormhole_iter * const iter) +{ + if (iter->leaf) { + wormleaf_unlock_read(iter->leaf); + iter->leaf = NULL; + } +} + + void +whsafe_iter_park(struct wormhole_iter * const iter) +{ + wormhole_iter_park(iter); + wormhole_park(iter->ref); +} + + void +wormhole_iter_destroy(struct wormhole_iter * const iter) +{ + if (iter->leaf) + wormleaf_unlock_read(iter->leaf); + free(iter); +} + + void +whsafe_iter_destroy(struct wormhole_iter * const iter) +{ + wormhole_park(iter->ref); + wormhole_iter_destroy(iter); +} +// }}} iter + +// unsafe iter {{{ + struct wormhole_iter * +whunsafe_iter_create(struct wormhole * const map) +{ + struct wormhole_iter * const iter = malloc(sizeof(*iter)); + if (iter == NULL) + return NULL; + iter->ref = NULL; + iter->map = map; + iter->leaf = NULL; + iter->is = 0; + whunsafe_iter_seek(iter, kref_null()); + return iter; +} + + static void +whunsafe_iter_fix(struct wormhole_iter * const iter) +{ + if (!wormhole_iter_valid(iter)) + return; + + while (unlikely(iter->is >= iter->leaf->nr_sorted)) { + struct wormleaf * const next = iter->leaf->next; + if (likely(next != NULL)) + wormhole_iter_leaf_sync_sorted(next); + iter->leaf = next; + iter->is = 0; + if (!wormhole_iter_valid(iter)) + return; + } +} + + void +whunsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(iter->map->hmap, key); + wormhole_iter_leaf_sync_sorted(leaf); + + iter->leaf = leaf; + iter->is = wormleaf_seek(leaf, key); + whunsafe_iter_fix(iter); +} + + void +whunsafe_iter_skip1(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + iter->is++; + whunsafe_iter_fix(iter); + } +} + + void +whunsafe_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + u32 todo = nr; + while (todo && wormhole_iter_valid(iter)) { + const u32 cap = iter->leaf->nr_sorted - iter->is; + const u32 nskip = (cap < todo) ? cap : todo; + iter->is += nskip; + whunsafe_iter_fix(iter); + todo -= nskip; + } +} + + struct kv * +whunsafe_iter_next(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const ret = wormhole_iter_peek(iter, out); + whunsafe_iter_skip1(iter); + return ret; +} + + void +whunsafe_iter_destroy(struct wormhole_iter * const iter) +{ + free(iter); +} +// }}} unsafe iter + +// misc {{{ + struct wormref * +wormhole_ref(struct wormhole * const map) +{ + struct wormref * const ref = malloc(sizeof(*ref)); + if (ref == NULL) + return NULL; + ref->map = map; + if (qsbr_register(map->qsbr, &(ref->qref)) == false) { + free(ref); + return NULL; + } + return ref; +} + + struct wormref * +whsafe_ref(struct wormhole * const map) +{ + struct wormref * const ref = wormhole_ref(map); + if (ref) + wormhole_park(ref); + return ref; +} + + struct wormhole * +wormhole_unref(struct wormref * const ref) +{ + struct wormhole * const map = ref->map; + qsbr_unregister(map->qsbr, &(ref->qref)); + free(ref); + return map; +} + + inline void +wormhole_park(struct wormref * const ref) +{ + qsbr_park(&(ref->qref)); +} + + inline void +wormhole_resume(struct wormref * const ref) +{ + qsbr_resume(&(ref->qref)); +} + + inline void +wormhole_refresh_qstate(struct wormref * const ref) +{ + qsbr_update(&(ref->qref), wormhmap_version_load(wormhmap_load(ref->map))); +} + + static void +wormhole_clean_hmap(struct wormhole * const map) +{ + for (u32 x = 0; x < 2; x++) { + if (map->hmap2[x].pmap == NULL) + continue; + struct wormhmap * const hmap = &(map->hmap2[x]); + const u64 nr_slots = ((u64)(hmap->mask)) + 1; + struct wormmbkt * const pmap = hmap->pmap; + for (u64 s = 0; s < nr_slots; s++) { + struct wormmbkt * const slot = &(pmap[s]); + for (u32 i = 0; i < WH_BKT_NR; i++) + if (slot->e[i]) + wormmeta_keyref_release(slot->e[i]); + } + + slab_free_all(hmap->slab1); + slab_free_all(hmap->slab2); + memset(hmap->pmap, 0, hmap->msize); + hmap->maxplen = 0; + } +} + + static void +wormhole_free_leaf_keys(struct wormhole * const map, struct wormleaf * const leaf) +{ + const u32 nr = leaf->nr_keys; + for (u32 i = 0; i < nr; i++) { + void * const curr = wormleaf_kv_at_is(leaf, i); + debug_assert(curr); + map->mm.free(curr, map->mm.priv); + } + wormhole_free_akey(leaf->anchor); +} + + static void +wormhole_clean_helper(struct wormhole * const map) +{ + wormhole_clean_hmap(map); + for (struct wormleaf * leaf = map->leaf0; leaf; leaf = leaf->next) + wormhole_free_leaf_keys(map, leaf); + slab_free_all(map->slab_leaf); + map->leaf0 = NULL; +} + +// unsafe + void +wormhole_clean(struct wormhole * const map) +{ + wormhole_clean_helper(map); + wormhole_create_leaf0(map); +} + + void +wormhole_destroy(struct wormhole * const map) +{ + wormhole_clean_helper(map); + for (u32 i = 0; i < 2; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (hmap->slab1) + slab_destroy(hmap->slab1); + if (hmap->slab2) + slab_destroy(hmap->slab2); + wormhmap_deinit(hmap); + } + qsbr_destroy(map->qsbr); + slab_destroy(map->slab_leaf); + free(map->pbuf); + free(map); +} + + void +wormhole_fprint(struct wormhole * const map, FILE * const out) +{ + const u64 nr_slab_ul = slab_get_nalloc(map->slab_leaf); + const u64 nr_slab_um11 = slab_get_nalloc(map->hmap2[0].slab1); + const u64 nr_slab_um12 = slab_get_nalloc(map->hmap2[0].slab2); + const u64 nr_slab_um21 = map->hmap2[1].slab1 ? slab_get_nalloc(map->hmap2[1].slab1) : 0; + const u64 nr_slab_um22 = map->hmap2[1].slab2 ? slab_get_nalloc(map->hmap2[1].slab2) : 0; + fprintf(out, "%s L-SLAB %lu M-SLAB [0] %lu+%lu [1] %lu+%lu\n", + __func__, nr_slab_ul, nr_slab_um11, nr_slab_um12, nr_slab_um21, nr_slab_um22); +} +// }}} misc + +// api {{{ +const struct kvmap_api kvmap_api_wormhole = { + .hashkey = true, + .ordered = true, + .threadsafe = true, + .unique = true, + .refpark = true, + .put = (void *)wormhole_put, + .get = (void *)wormhole_get, + .probe = (void *)wormhole_probe, + .del = (void *)wormhole_del, + .inpr = (void *)wormhole_inpr, + .inpw = (void *)wormhole_inpw, + .merge = (void *)wormhole_merge, + .delr = (void *)wormhole_delr, + .iter_create = (void *)wormhole_iter_create, + .iter_seek = (void *)wormhole_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)wormhole_iter_skip1, + .iter_skip = (void *)wormhole_iter_skip, + .iter_next = (void *)wormhole_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_park = (void *)wormhole_iter_park, + .iter_destroy = (void *)wormhole_iter_destroy, + .ref = (void *)wormhole_ref, + .unref = (void *)wormhole_unref, + .park = (void *)wormhole_park, + .resume = (void *)wormhole_resume, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + +const struct kvmap_api kvmap_api_whsafe = { + .hashkey = true, + .ordered = true, + .threadsafe = true, + .unique = true, + .put = (void *)whsafe_put, + .get = (void *)whsafe_get, + .probe = (void *)whsafe_probe, + .del = (void *)whsafe_del, + .inpr = (void *)whsafe_inpr, + .inpw = (void *)whsafe_inpw, + .merge = (void *)whsafe_merge, + .delr = (void *)whsafe_delr, + .iter_create = (void *)wormhole_iter_create, + .iter_seek = (void *)whsafe_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)wormhole_iter_skip1, + .iter_skip = (void *)wormhole_iter_skip, + .iter_next = (void *)wormhole_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_park = (void *)whsafe_iter_park, + .iter_destroy = (void *)whsafe_iter_destroy, + .ref = (void *)whsafe_ref, + .unref = (void *)wormhole_unref, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + +const struct kvmap_api kvmap_api_whunsafe = { + .hashkey = true, + .ordered = true, + .unique = true, + .put = (void *)whunsafe_put, + .get = (void *)whunsafe_get, + .probe = (void *)whunsafe_probe, + .del = (void *)whunsafe_del, + .inpr = (void *)whunsafe_inp, + .inpw = (void *)whunsafe_inp, + .merge = (void *)whunsafe_merge, + .delr = (void *)whunsafe_delr, + .iter_create = (void *)whunsafe_iter_create, + .iter_seek = (void *)whunsafe_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)whunsafe_iter_skip1, + .iter_skip = (void *)whunsafe_iter_skip, + .iter_next = (void *)whunsafe_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_destroy = (void *)whunsafe_iter_destroy, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + + static void * +wormhole_kvmap_api_create(const char * const name, const struct kvmap_mm * const mm, char ** args) +{ + (void)args; + if ((!strcmp(name, "wormhole")) || (!strcmp(name, "whsafe"))) { + return wormhole_create(mm); + } else if (!strcmp(name, "whunsafe")) { + return whunsafe_create(mm); + } else { + return NULL; + } +} + +__attribute__((constructor)) + static void +wormhole_kvmap_api_init(void) +{ + kvmap_api_register(0, "wormhole", "", wormhole_kvmap_api_create, &kvmap_api_wormhole); + kvmap_api_register(0, "whsafe", "", wormhole_kvmap_api_create, &kvmap_api_whsafe); + kvmap_api_register(0, "whunsafe", "", wormhole_kvmap_api_create, &kvmap_api_whunsafe); +} +// }}} api + +// wh {{{ +// Users often don't enjoy dealing with struct kv/kref and just want to use plain buffers. +// No problem! +// This example library shows you how to use Wormhole efficiently in the most intuitive way. + +// Use the worry-free api +static const struct kvmap_api * const wh_api = &kvmap_api_whsafe; + +// You can change the wh_api to kvmap_api_wormhole with a one-line replacement +// The standard Wormhole api can give you ~5% boost; read README for thread-safety tips +//static const struct kvmap_api * const wh_api = &kvmap_api_wormhole; + + struct wormhole * +wh_create(void) +{ + // kvmap_mm_ndf (kv.h) will let the caller allocate the kv when inserting + // This can avoid a memcpy if the caller does not have the data in a struct kv + return wormhole_create(&kvmap_mm_ndf); +} + + struct wormref * +wh_ref(struct wormhole * const wh) +{ + return wh_api->ref(wh); +} + + void +wh_unref(struct wormref * const ref) +{ + (void)wh_api->unref(ref); +} + + void +wh_park(struct wormref * const ref) +{ + if (wh_api->park) + wh_api->park(ref); +} + + void +wh_resume(struct wormref * const ref) +{ + if (wh_api->resume) + wh_api->resume(ref); +} + + void +wh_clean(struct wormhole * const map) +{ + wh_api->clean(map); +} + + void +wh_destroy(struct wormhole * const map) +{ + wh_api->destroy(map); +} + +// Do set/put with explicit kv buffers + bool +wh_put(struct wormref * const ref, const void * const kbuf, const u32 klen, + const void * const vbuf, const u32 vlen) +{ + struct kv * const newkv = kv_create(kbuf, klen, vbuf, vlen); + if (newkv == NULL) + return false; + // must use with kvmap_mm_ndf (see below) + // the newkv will be saved in the Wormhole and freed by Wormhole when upon deletion + return wh_api->put(ref, newkv); +} + +// delete a key + bool +wh_del(struct wormref * const ref, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->del(ref, &kref); +} + +// test if the key exist in Wormhole + bool +wh_probe(struct wormref * const ref, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->probe(ref, &kref); +} + +// for wh_get() +struct wh_inp_info { void * vbuf_out; u32 * vlen_out; u32 vbuf_size; }; + +// a kv_inp_func; use this to retrieve the KV's data without unnecesary memory copying + static void +wh_inp_copy_value(struct kv * const curr, void * const priv) +{ + if (curr) { // found + struct wh_inp_info * const info = (typeof(info))priv; + // copy the value data out + const u32 copy_size = info->vbuf_size < curr->vlen ? info->vbuf_size : curr->vlen; + memcpy(info->vbuf_out, kv_vptr_c(curr), copy_size); + // copy the vlen out + *info->vlen_out = curr->vlen; + } +} + +// returns a boolean value indicating whether the key is found. +// the value's data will be written to *vlen_out and vbuf_out if the key is found +// if vbuf_size < vlen, then only the first vbuf_size bytes is copied to the buffer +// a small vbuf_size can be used to reduce memcpy cost when only the first a few bytes are needed + bool +wh_get(struct wormref * const ref, const void * const kbuf, const u32 klen, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + struct wh_inp_info info = {vbuf_out, vlen_out, vbuf_size}; + // use the inplace read function to get the value if it exists + return wh_api->inpr(ref, &kref, wh_inp_copy_value, &info); +} + + bool +wh_inpr(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->inpr(ref, &kref, uf, priv); +} + +// inplace update KV's value with a user-defined hook function +// the update should only modify the data in the value; It should not change the value size + bool +wh_inpw(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->inpw(ref, &kref, uf, priv); +} + +// merge existing KV with updates with a user-defined hook function + bool +wh_merge(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_merge_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->merge(ref, &kref, uf, priv); +} + +// remove a range of KVs from start (inclusive) to end (exclusive); [start, end) + u64 +wh_delr(struct wormref * const ref, const void * const kbuf_start, const u32 klen_start, + const void * const kbuf_end, const u32 klen_end) +{ + struct kref kref_start, kref_end; + kref_ref_hash32(&kref_start, kbuf_start, klen_start); + kref_ref_hash32(&kref_end, kbuf_end, klen_end); + return wh_api->delr(ref, &kref_start, &kref_end); +} + + struct wormhole_iter * +wh_iter_create(struct wormref * const ref) +{ + return wh_api->iter_create(ref); +} + + void +wh_iter_seek(struct wormhole_iter * const iter, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + wh_api->iter_seek(iter, &kref); +} + + bool +wh_iter_valid(struct wormhole_iter * const iter) +{ + return wh_api->iter_valid(iter); +} + +// for wh_iter_peek() +// the out ptrs must be provided in pairs; use a pair of NULLs to ignore the key or value +struct wh_iter_inp_info { void * kbuf_out; void * vbuf_out; u32 kbuf_size; u32 vbuf_size; u32 * klen_out; u32 * vlen_out; }; + +// a kv_inp_func; use this to retrieve the KV's data without unnecesary memory copying + static void +inp_copy_kv_cb(struct kv * const curr, void * const priv) +{ + if (curr) { // found + struct wh_iter_inp_info * const info = (typeof(info))priv; + + // copy the key + if (info->kbuf_out) { // it assumes klen_out is also not NULL + // copy the key data out + const u32 clen = curr->klen < info->kbuf_size ? curr->klen : info->kbuf_size; + memcpy(info->kbuf_out, kv_kptr_c(curr), clen); + // copy the klen out + *info->klen_out = curr->klen; + } + + // copy the value + if (info->vbuf_out) { // it assumes vlen_out is also not NULL + // copy the value data out + const u32 clen = curr->vlen < info->vbuf_size ? curr->vlen : info->vbuf_size; + memcpy(info->vbuf_out, kv_vptr_c(curr), clen); + // copy the vlen out + *info->vlen_out = curr->vlen; + } + } +} + +// seek is similar to get + bool +wh_iter_peek(struct wormhole_iter * const iter, + void * const kbuf_out, const u32 kbuf_size, u32 * const klen_out, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out) +{ + struct wh_iter_inp_info info = {kbuf_out, vbuf_out, kbuf_size, vbuf_size, klen_out, vlen_out}; + return wh_api->iter_inp(iter, inp_copy_kv_cb, &info); +} + + void +wh_iter_skip1(struct wormhole_iter * const iter) +{ + wh_api->iter_skip1(iter); +} + + void +wh_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + wh_api->iter_skip(iter, nr); +} + + bool +wh_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv) +{ + return wh_api->iter_inp(iter, uf, priv); +} + + void +wh_iter_park(struct wormhole_iter * const iter) +{ + wh_api->iter_park(iter); +} + + void +wh_iter_destroy(struct wormhole_iter * const iter) +{ + wh_api->iter_destroy(iter); +} +// }}} wh + +// vim:fdm=marker diff --git a/MassTrie-beta/wormhole/wh.h b/MassTrie-beta/wormhole/wh.h new file mode 100644 index 00000000..bd17b38d --- /dev/null +++ b/MassTrie-beta/wormhole/wh.h @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct wormhole; +struct wormref; + +// wormhole {{{ +// the wh created by wormhole_create() can work with all of safe/unsafe operations. + extern struct wormhole * +wormhole_create(const struct kvmap_mm * const mm); + +// the wh created by whunsafe_create() can only work with the unsafe operations. + extern struct wormhole * +whunsafe_create(const struct kvmap_mm * const mm); + + extern struct kv * +wormhole_get(struct wormref * const ref, const struct kref * const key, struct kv * const out); + + extern bool +wormhole_probe(struct wormref * const ref, const struct kref * const key); + + extern bool +wormhole_put(struct wormref * const ref, struct kv * const kv); + + extern bool +wormhole_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +wormhole_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +wormhole_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +wormhole_del(struct wormref * const ref, const struct kref * const key); + + extern u64 +wormhole_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end); + + extern struct wormhole_iter * +wormhole_iter_create(struct wormref * const ref); + + extern void +wormhole_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + + extern bool +wormhole_iter_valid(struct wormhole_iter * const iter); + + extern struct kv * +wormhole_iter_peek(struct wormhole_iter * const iter, struct kv * const out); + + extern bool +wormhole_iter_kref(struct wormhole_iter * const iter, struct kref * const kref); + + extern bool +wormhole_iter_kvref(struct wormhole_iter * const iter, struct kvref * const kvref); + + extern void +wormhole_iter_skip1(struct wormhole_iter * const iter); + + extern void +wormhole_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern struct kv * +wormhole_iter_next(struct wormhole_iter * const iter, struct kv * const out); + + extern bool +wormhole_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv); + + extern void +wormhole_iter_park(struct wormhole_iter * const iter); + + extern void +wormhole_iter_destroy(struct wormhole_iter * const iter); + + extern struct wormref * +wormhole_ref(struct wormhole * const map); + + extern struct wormhole * +wormhole_unref(struct wormref * const ref); + + extern void +wormhole_park(struct wormref * const ref); + + extern void +wormhole_resume(struct wormref * const ref); + + extern void +wormhole_refresh_qstate(struct wormref * const ref); + +// clean with more threads + extern void +wormhole_clean_th(struct wormhole * const map, const u32 nr_threads); + + extern void +wormhole_clean(struct wormhole * const map); + + extern void +wormhole_destroy(struct wormhole * const map); + +// safe API (no need to refresh qstate) + + extern struct kv * +whsafe_get(struct wormref * const ref, const struct kref * const key, struct kv * const out); + + extern bool +whsafe_probe(struct wormref * const ref, const struct kref * const key); + + extern bool +whsafe_put(struct wormref * const ref, struct kv * const kv); + + extern bool +whsafe_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +whsafe_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whsafe_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whsafe_del(struct wormref * const ref, const struct kref * const key); + + extern u64 +whsafe_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end); + +// use wormhole_iter_create + extern void +whsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + + extern struct kv * +whsafe_iter_peek(struct wormhole_iter * const iter, struct kv * const out); + +// use wormhole_iter_valid +// use wormhole_iter_peek +// use wormhole_iter_kref +// use wormhole_iter_kvref +// use wormhole_iter_skip1 +// use wormhole_iter_skip +// use wormhole_iter_next +// use wormhole_iter_inp + + extern void +whsafe_iter_park(struct wormhole_iter * const iter); + + extern void +whsafe_iter_destroy(struct wormhole_iter * const iter); + + extern struct wormref * +whsafe_ref(struct wormhole * const map); + +// use wormhole_unref + +// unsafe API + + extern struct kv * +whunsafe_get(struct wormhole * const map, const struct kref * const key, struct kv * const out); + + extern bool +whunsafe_probe(struct wormhole * const map, const struct kref * const key); + + extern bool +whunsafe_put(struct wormhole * const map, struct kv * const kv); + + extern bool +whunsafe_merge(struct wormhole * const map, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +whunsafe_inp(struct wormhole * const map, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whunsafe_del(struct wormhole * const map, const struct kref * const key); + + extern u64 +whunsafe_delr(struct wormhole * const map, const struct kref * const start, + const struct kref * const end); + + extern struct wormhole_iter * +whunsafe_iter_create(struct wormhole * const map); + + extern void +whunsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + +// unsafe iter_valid: use wormhole_iter_valid +// unsafe iter_peek: use wormhole_iter_peek +// unsafe iter_kref: use wormhole_iter_kref + + extern void +whunsafe_iter_skip1(struct wormhole_iter * const iter); + + extern void +whunsafe_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern struct kv * +whunsafe_iter_next(struct wormhole_iter * const iter, struct kv * const out); + +// unsafe iter_inp: use wormhole_iter_inp + + extern void +whunsafe_iter_destroy(struct wormhole_iter * const iter); + + extern void +wormhole_fprint(struct wormhole * const map, FILE * const out); + +extern const struct kvmap_api kvmap_api_wormhole; +extern const struct kvmap_api kvmap_api_whsafe; +extern const struct kvmap_api kvmap_api_whunsafe; +// }}} wormhole + +// wh {{{ + extern struct wormhole * +wh_create(void); + + extern struct wormref * +wh_ref(struct wormhole * const wh); + + extern void +wh_unref(struct wormref * const ref); + + extern void +wh_park(struct wormref * const ref); + + extern void +wh_resume(struct wormref * const ref); + + extern void +wh_clean(struct wormhole * const map); + + extern void +wh_destroy(struct wormhole * const map); + + extern bool +wh_put(struct wormref * const ref, const void * const kbuf, const u32 klen, + const void * const vbuf, const u32 vlen); + + extern bool +wh_del(struct wormref * const ref, const void * const kbuf, const u32 klen); + + extern bool +wh_probe(struct wormref * const ref, const void * const kbuf, const u32 klen); + + extern bool +wh_get(struct wormref * const ref, const void * const kbuf, const u32 klen, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out); + + extern bool +wh_inpr(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv); + + extern bool +wh_inpw(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv); + + extern bool +wh_merge(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_merge_func uf, void * const priv); + + extern u64 +wh_delr(struct wormref * const ref, const void * const kbuf_start, const u32 klen_start, + const void * const kbuf_end, const u32 klen_end); + + extern struct wormhole_iter * +wh_iter_create(struct wormref * const ref); + + extern void +wh_iter_seek(struct wormhole_iter * const iter, const void * const kbuf, const u32 klen); + + extern bool +wh_iter_valid(struct wormhole_iter * const iter); + + extern bool +wh_iter_peek(struct wormhole_iter * const iter, + void * const kbuf_out, const u32 kbuf_size, u32 * const klen_out, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out); + + extern void +wh_iter_skip1(struct wormhole_iter * const iter); + + extern void +wh_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern bool +wh_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv); + + extern void +wh_iter_park(struct wormhole_iter * const iter); + + extern void +wh_iter_destroy(struct wormhole_iter * const iter); +// }}} wh + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/MassTrie-beta/wormhole/wh.py b/MassTrie-beta/wormhole/wh.py new file mode 100644 index 00000000..e744cec8 --- /dev/null +++ b/MassTrie-beta/wormhole/wh.py @@ -0,0 +1,192 @@ +#!/usr/bin/python3 + +# +# Copyright (c) 2016--2021 Wu, Xingbo +# +# All rights reserved. No warranty, explicit or implicit, provided. +# + +import msgpack +from ctypes import * # CDLL and c_xxx types + +# libwh {{{ +# Change this path when necessary +libwh = CDLL("./libwh.so") + +# create +libwh.wh_create.argtypes = [] +libwh.wh_create.restype = c_void_p + +# close (no return value) +libwh.wh_destroy.argtypes = [c_void_p] + +# ref +libwh.wh_ref.argtypes = [c_void_p] +libwh.wh_ref.restype = c_void_p + +# unref +libwh.wh_unref.argtypes = [c_void_p] + +# put +libwh.wh_put.argtypes = [c_void_p, c_char_p, c_uint, c_char_p, c_uint] +libwh.wh_put.restype = c_bool + +# get +libwh.wh_get.argtypes = [c_void_p, c_char_p, c_uint, c_char_p, c_uint, c_void_p] +libwh.wh_get.restype = c_bool + +# probe +libwh.wh_probe.argtypes = [c_void_p, c_char_p, c_uint] +libwh.wh_probe.restype = c_bool + +# del +libwh.wh_del.argtypes = [c_void_p, c_char_p, c_uint] +libwh.wh_del.restype = c_bool + +# iter_create +libwh.wh_iter_create.argtypes = [c_void_p] +libwh.wh_iter_create.restype = c_void_p + +# iter_seek +libwh.wh_iter_seek.argtypes = [c_void_p, c_char_p, c_uint] + +# iter_valid +libwh.wh_iter_valid.argtypes = [c_void_p] +libwh.wh_iter_valid.restype = c_bool + +# iter_skip1 +libwh.wh_iter_skip1.argtypes = [c_void_p] + +# iter_skip +libwh.wh_iter_skip.argtypes = [c_void_p, c_uint] + +# iter_peek +libwh.wh_iter_peek.argtypes = [c_void_p, c_char_p, c_uint, c_void_p, c_char_p, c_uint, c_void_p] +libwh.wh_iter_peek.restype = c_bool + +# iter_park +libwh.wh_iter_park.argtypes = [c_void_p] + +# iter_destroy +libwh.wh_iter_destroy.argtypes = [c_void_p] +# }}} libwh + +# class {{{ +class Wh: + def __init__(self, maxklen=256, maxvlen=8192): + self.whptr = libwh.wh_create() + self.kbufsz = maxklen + self.vbufsz = maxvlen + + # user must call explicitly + def destroy(self): + libwh.wh_destroy(self.whptr) + + def ref(self): + return WhRef(self.whptr, self.kbufsz, self.vbufsz) + +class WhRef: + def __init__(self, whptr, kbufsz, vbufsz): + self.refptr = libwh.wh_ref(whptr) + self.kbufsz = kbufsz + self.vbufsz = vbufsz + self.vbuf = create_string_buffer(self.vbufsz) + + # user must call explicitly + def unref(self): + libwh.wh_unref(self.refptr) + + def iter(self): + return WhIter(self.refptr, self.kbufsz, self.vbufsz) + + # key: python string; value: any (hierarchical) python object + def put(self, key, value): + binkey = key.encode() + binvalue = msgpack.packb(value) + return libwh.wh_put(self.refptr, binkey, c_uint(len(binkey)), binvalue, c_uint(len(binvalue))) + + # return the value as a python object + def get(self, key): + binkey = key.encode() + vlen = c_uint() + ret = libwh.wh_get(self.refptr, binkey, len(binkey), self.vbuf, self.vbufsz, byref(vlen)) + if ret and vlen.value <= self.vbufsz: + return msgpack.unpackb(self.vbuf.value) + else: + return None + + def delete(self, key): + binkey = key.encode() + return libwh.wh_del(self.refptr, binkey, c_uint(len(binkey))) + + def probe(self, key): + binkey = key.encode() + return libwh.wh_probe(self.refptr, binkey, c_uint(len(binkey))) + +class WhIter: + def __init__(self, refptr, kbufsz, vbufsz): + self.iptr = libwh.wh_iter_create(refptr) + self.kbufsz = kbufsz + self.vbufsz = vbufsz + self.kbuf = create_string_buffer(kbufsz) + self.vbuf = create_string_buffer(vbufsz) + + # user must call explicitly + def destroy(self): + libwh.wh_iter_destroy(self.iptr) + + def seek(self, key): + if key is None: + libwh.wh_iter_seek(self.iptr, None, c_uint(0)) + else: + binkey = key.encode() + libwh.wh_iter_seek(self.iptr, binkey, c_uint(len(binkey))) + + def valid(self): + return libwh.wh_iter_valid(self.iptr) + + def skip1(self): + libwh.wh_iter_skip1(self.iptr) + + def skip(self, nr): + libwh.wh_iter_skip(self.iptr, c_uint(nr)) + + # return (key, value) pair or None + def peek(self): + klen = c_uint() + vlen = c_uint() + ret = libwh.wh_iter_peek(self.iptr, self.kbuf, self.kbufsz, byref(klen), self.vbuf, self.vbufsz, byref(vlen)) + if ret and klen.value <= self.kbufsz and vlen.value <= self.vbufsz: + self.kbuf[klen.value] = b'\x00' + return (self.kbuf.value.decode(), klen.value, msgpack.unpackb(self.vbuf.value), vlen.value) + else: + return None + +# }}} class + +# examples +wh1 = Wh(32, 1024) +ref1 = wh1.ref() # take a ref for kv operations + +ref1.put("Hello", "pywh") +ref1.put("key1", "value1") +ref1.put("key2", "value2") +ref1.put("key3", {"xxx":"valuex", "yyy":"valuey"}) +ref1.delete("key2") + +rget = ref1.get("Hello") +print(rget) + +# don't use ref when iterating +iter1 = ref1.iter() +iter1.seek(None) +while iter1.valid(): + r = iter1.peek() + print(r) + iter1.skip1() + +iter1.destroy() # must destroy all iters before unref +ref1.unref() # must unref all refs before close() +wh1.destroy() + +# vim:fdm=marker diff --git a/MassTrie-beta/wormhole/wh.strip b/MassTrie-beta/wormhole/wh.strip new file mode 100644 index 00000000..e7b3971f --- /dev/null +++ b/MassTrie-beta/wormhole/wh.strip @@ -0,0 +1,161 @@ +-K key_size +-K key_size_align +-K kref_compare +-K kref_kv_compare +-K kref_kv_match +-K kref_lcp +-K kref_match +-K kref_null +-K kref_ref_hash32 +-K kref_ref_kv +-K kref_ref_kv_hash32 +-K kref_ref_raw +-K kref_update_hash32 +-K kv_compare +-K kv_compare_ptrs +-K kv_crc32c +-K kv_crc32c_extend +-K kv_create +-K kv_create_kref +-K kv_create_str +-K kv_create_str_str +-K kv_dup +-K kv_dup2 +-K kv_dup2_key +-K kv_dup2_key_prefix +-K kv_dup_key +-K kv_key_lcp +-K kv_kptr +-K kv_kptr_c +-K kv_kref +-K kvmap_api_whsafe +-K kvmap_api_whunsafe +-K kvmap_api_wormhole +-K kvmap_dump_keys +-K kvmap_inp_steal_kv +-K kvmap_kv_del +-K kvmap_kv_delr +-K kvmap_kv_get +-K kvmap_kv_inpr +-K kvmap_kv_inpw +-K kvmap_kv_iter_seek +-K kvmap_kv_merge +-K kvmap_kv_probe +-K kvmap_kv_put +-K kvmap_mm_dup +-K kvmap_mm_free_free +-K kvmap_mm_free_noop +-K kvmap_mm_in_dup +-K kvmap_mm_in_noop +-K kvmap_mm_ndf +-K kvmap_mm_out_dup +-K kvmap_mm_out_noop +-K kvmap_raw_del +-K kvmap_raw_get +-K kvmap_raw_inpr +-K kvmap_raw_inpw +-K kvmap_raw_iter_seek +-K kvmap_raw_probe +-K kvmap_ref +-K kvmap_unref +-K kv_match +-K kv_match_full +-K kv_null +-K kv_print +-K kv_qsort +-K kvref_dup2_key +-K kvref_dup2_kv +-K kv_refill +-K kv_refill_hex32 +-K kv_refill_hex64 +-K kv_refill_hex64_klen +-K kv_refill_kref +-K kv_refill_kref_v +-K kv_refill_str +-K kv_refill_str_str +-K kv_refill_u64 +-K kv_refill_value +-K kvref_kv_compare +-K kvref_ref_kv +-K kv_size +-K kv_size_align +-K kv_update_hash +-K kv_vptr +-K kv_vptr_c +-K wh_clean +-K wh_create +-K wh_del +-K wh_delr +-K wh_destroy +-K wh_get +-K wh_inpr +-K wh_inpw +-K wh_iter_create +-K wh_iter_destroy +-K wh_iter_inp +-K wh_iter_park +-K wh_iter_peek +-K wh_iter_seek +-K wh_iter_skip +-K wh_iter_valid +-K wh_merge +-K wh_park +-K wh_probe +-K wh_ref +-K wh_resume +-K whsafe_del +-K whsafe_delr +-K whsafe_get +-K whsafe_inpr +-K whsafe_inpw +-K whsafe_iter_destroy +-K whsafe_iter_park +-K whsafe_iter_seek +-K whsafe_merge +-K whsafe_probe +-K whsafe_ref +-K whsafe_put +-K wh_put +-K wh_unref +-K whunsafe_create +-K whunsafe_del +-K whunsafe_delr +-K whunsafe_get +-K whunsafe_inp +-K whunsafe_iter_create +-K whunsafe_iter_destroy +-K whunsafe_iter_next +-K whunsafe_iter_seek +-K whunsafe_iter_skip +-K whunsafe_merge +-K whunsafe_probe +-K whunsafe_put +-K wormhole_clean +-K wormhole_create +-K wormhole_del +-K wormhole_delr +-K wormhole_destroy +-K wormhole_fprint +-K wormhole_get +-K wormhole_inpr +-K wormhole_inpw +-K wormhole_iter_create +-K wormhole_iter_destroy +-K wormhole_iter_inp +-K wormhole_iter_kref +-K wormhole_iter_kvref +-K wormhole_iter_next +-K wormhole_iter_park +-K wormhole_iter_peek +-K wormhole_iter_seek +-K wormhole_iter_skip +-K wormhole_iter_valid +-K wormhole_kvmap_api_create +-K wormhole_merge +-K wormhole_park +-K wormhole_probe +-K wormhole_ref +-K wormhole_refresh_qstate +-K wormhole_resume +-K wormhole_put +-K wormhole_unref diff --git a/README.md b/README.md index 8f74a3fe..48ce0c53 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ $ sudo apt install g++-7 1. Clone the git repository ```bash -$ git clone https://github.com/readablesystems/sto.git +$ git clone -b masstrie https://github.com/roeeash/sto.git $ cd sto ``` @@ -63,7 +63,31 @@ $ cd sto $ git submodule update --init --recursive ``` -3. Execute configuration scripts +3. Set system variables +```bash +$ cd MassTrie-beta/wormhole +$ export LD_LIBRARY_PATH=`pwd` +$ cd ../ +$ cd ../ +``` +OR if you're on the tcsh shell + +```bash +$ cd MassTrie-beta/wormhole +$ setenv LD_LIBRARY_PATH=`pwd` +$ cd ../ +$ cd ../ +``` + +4. Additional system setup: +If you do not have autoconf and python-is-python3 installed, run: + +```bash +$ sudo apt install python-is-python3 +$ sudo apt install autoconf +``` + +5. Execute configuration scripts ```bash $ ./bootstrap.sh $ ./configure @@ -74,7 +98,7 @@ enable it for STO by running `./configure CC=gcc-7 CXX=g++-7`. (Note: if you use macOS you should probably run `./configure CXX='clang++ -stdlib=libc++'`) -4. Build +6. Build ```bash $ make -jN # launch N parallel build jobs ``` @@ -90,6 +114,19 @@ by continuous integration. - `make micro_bench`: Build the array-based microbenchmark. - `make clean`: You know what it does. + +7. Build (specifically for benchmark files) + +```bash +$ make -jN unit-test_MTrie # launch N parallel build jobs +$ ./unit-test_MTrie +``` + +```bash +$ make -jN unit-dboindex # launch N parallel build jobs +$ ./unit-dboindex +``` + See [Wiki](https://github.com/readablesystems/sto/wiki) for advanced buid options. ## IDE Support & cmake diff --git a/benchmark/DB_oindexMTrie.hh b/benchmark/DB_oindexMTrie.hh new file mode 100644 index 00000000..d454287b --- /dev/null +++ b/benchmark/DB_oindexMTrie.hh @@ -0,0 +1,3053 @@ +#pragma once + + + +#include "DB_index.hh" + + + +#include "../MassTrie-beta/MassTrie.hh" + + + +namespace bench { + +template + +class MTrie_ordered_index : public TObject { + +public: + + typedef K key_type; + + typedef V value_type; + + typedef commutators::Commutator comm_type; + + + + //typedef typename get_occ_version::type occ_version_type; + + typedef typename get_version::type version_type; + + + + using accessor_t = typename index_common::accessor_t; + + + + static constexpr typename version_type::type invalid_bit = TransactionTid::user_bit; + + static constexpr TransItem::flags_type insert_bit = TransItem::user0_bit; + + static constexpr TransItem::flags_type delete_bit = TransItem::user0_bit << 1u; + + static constexpr TransItem::flags_type row_update_bit = TransItem::user0_bit << 2u; + + static constexpr TransItem::flags_type row_cell_bit = TransItem::user0_bit << 3u; + + static constexpr uintptr_t internode_bit = 1; + + // TicToc node version bit + + static constexpr uintptr_t ttnv_bit = 1 << 1u; + + + + typedef typename value_type::NamedColumn NamedColumn; + + typedef IndexValueContainer value_container_type; + + + + static constexpr bool value_is_small = is_small::value; + + + + static constexpr bool index_read_my_write = DBParams::RdMyWr; + + + + struct internal_elem { + + key_type key; + + value_container_type row_container; + + bool deleted; + + + + internal_elem(const key_type& k, const value_type& v, bool valid) + + : key(k), + + row_container((valid ? Sto::initialized_tid() : (Sto::initialized_tid() | invalid_bit)), + + !valid, v), + + deleted(false) {} + + + + version_type& version() { + + return row_container.row_version(); + + } + + + + bool valid() { + + return !(version().value() & invalid_bit); + + } + + }; + + + + struct table_params : public Masstree::nodeparams<15,15> { + + typedef internal_elem* value_type; + + typedef Masstree::value_print value_print_type; + + typedef threadinfo threadinfo_type; + + + + static constexpr bool track_nodes = (DBParams::NodeTrack && DBParams::TicToc); + + typedef std::conditional_t aux_tracker_type; + + }; + + + + typedef Masstree::Str Str; + + typedef Masstree::basic_table table_type; + + typedef Masstree::unlocked_tcursor unlocked_cursor_type; + + typedef Masstree::tcursor cursor_type; + + typedef Masstree::leaf leaf_type; + + + + typedef typename table_type::node_type node_type; + + typedef typename unlocked_cursor_type::nodeversion_value_type nodeversion_value_type; + + + + typedef MassTrie* MTrie_table_type; + + + + using column_access_t = typename split_version_helpers>::column_access_t; + + using item_key_t = typename split_version_helpers>::item_key_t; + + template + + static constexpr auto column_to_cell_accesses + + = split_version_helpers>::template column_to_cell_accesses; + + template + + static constexpr auto extract_item_list + + = split_version_helpers>::template extract_item_list; + + + + typedef std::tuple sel_return_type; + + typedef std::tuple ins_return_type; + + typedef std::tuple del_return_type; + + typedef std::tuple> sel_split_return_type; + + + + static __thread typename table_params::threadinfo_type *ti; + + + + MTrie_ordered_index(size_t init_size) { + + this->table_init(); + + (void)init_size; + + } + + MTrie_ordered_index() { + + this->table_init(); + + } + + + + void table_init() { + + if (ti == nullptr) + + ti = threadinfo::make(threadinfo::TI_MAIN, -1); + + //table_.initialize(*ti); + + key_gen_ = 0; + + + + //MTrie init + + if(!this->MTrie_table) + + this->MTrie_table = new MassTrie(); + + + + } + + + + static void thread_init() { + + if (ti == nullptr) + + ti = threadinfo::make(threadinfo::TI_PROCESS, TThread::id()); + + Transaction::tinfo[TThread::id()].trans_start_callback = []() { + + ti->rcu_start(); + + }; + + Transaction::tinfo[TThread::id()].trans_end_callback = []() { + + ti->rcu_stop(); + + }; + + } + + + + uint64_t gen_key() { + + return fetch_and_add(&key_gen_, 1); + + } + + + +#if 0 + + sel_return_type + + select_row(const key_type& key, RowAccess acc) { + + unlocked_cursor_type lp(table_, key); + + bool found = lp.find_unlocked(*ti); + + internal_elem *e = lp.value(); + + if (found) { + + return select_row(reinterpret_cast(e), acc); + + } else { + + if (!register_internode_version(lp.node(), lp.full_version_value())) + + return {false, false, 0, UniRecordAccessor(nullptr)}; + + return {true, false, 0, UniRecordAccessor(nullptr)}; + + } + + } + +#endif + + + + sel_split_return_type + + select_split_row(const key_type& key, std::initializer_list accesses) { + + + + + + + + bool MTrie_found =MTrie_table->probe(&key,sizeof(key)) ; + + + + + + + + + + + + if (MTrie_found) { + + + + void * vbuf_out = MTrie_table->get(MTrie_table->ref,&key,sizeof(key)); + + + + internal_elem ** e = reinterpret_cast(vbuf_out); + + + + return select_split_row(reinterpret_cast(*e), accesses); + + } + + + + + + //else, if key is not in MassTrie + + + + void * res = MTrie_table->find_closest(&key); + + + + + + + + void * vbuf_out = MTrie_table->get(MTrie_table->ref,res,sizeof(res)); + + + + internal_elem ** e = reinterpret_cast(vbuf_out); + + + + + + //cout<<" register_internode_version(*e) = "<(nullptr) + + }; + + } + + + +#if 0 + + sel_return_type + + select_row(uintptr_t rid, RowAccess access) { + + auto e = reinterpret_cast(rid); + + bool ok = true; + + TransProxy row_item = Sto::item(this, item_key_t::row_item_key(e)); + + + + if (is_phantom(e, row_item)) + + goto abort; + + + + if (index_read_my_write) { + + if (has_delete(row_item)) { + + return sel_return_type(true, false, 0, nullptr); + + } + + if (has_row_update(row_item)) { + + value_type *vptr; + + if (has_insert(row_item)) + + vptr = &e->row_container.row; + + else + + vptr = row_item.template raw_write_value(); + + return sel_return_type(true, true, rid, vptr); + + } + + } + + + + switch (access) { + + case RowAccess::UpdateValue: + + ok = version_adapter::select_for_update(row_item, e->version()); + + row_item.add_flags(row_update_bit); + + break; + + case RowAccess::ObserveExists: + + case RowAccess::ObserveValue: + + ok = row_item.observe(e->version()); + + break; + + default: + + break; + + } + + + + if (!ok) + + goto abort; + + + + return sel_return_type(true, true, rid, &(e->row_container.row)); + + + + abort: + + return sel_return_type(false, false, 0, nullptr); + + } + +#endif + + + + sel_split_return_type + + select_split_row(uintptr_t rid, std::initializer_list accesses) { + + auto e = reinterpret_cast(rid); + + TransProxy row_item = Sto::item(this, item_key_t::row_item_key(e)); + + + + // Translate from column accesses to cell accesses + + // all buffered writes are only stored in the wdata_ of the row item (to avoid redundant copies) + + auto cell_accesses = column_to_cell_accesses(accesses); + + + + std::array cell_items {}; + + bool any_has_write; + + bool ok; + + std::tie(any_has_write, cell_items) = extract_item_list(cell_accesses, this, e); + + + + if (is_phantom(e, row_item)) + + goto abort; + + + + if (index_read_my_write) { + + if (has_delete(row_item)) { + + return {true, false, 0, UniRecordAccessor(nullptr)}; + + } + + if (any_has_write || has_row_update(row_item)) { + + value_type *vptr; + + if (has_insert(row_item)) + + vptr = &e->row_container.row; + + else + + vptr = row_item.template raw_write_value(); + + return {true, true, rid, UniRecordAccessor(vptr)}; + + } + + } + + + + ok = access_all(cell_accesses, cell_items, e->row_container); + + if (!ok) + + goto abort; + + + + return {true, true, rid, UniRecordAccessor(&(e->row_container.row))}; + + + + abort: + + return {false, false, 0, UniRecordAccessor(nullptr)}; + + } + + + + void update_row(uintptr_t rid, value_type *new_row) { + + auto e = reinterpret_cast(rid); + + auto row_item = Sto::item(this, item_key_t::row_item_key(e)); + + if (value_is_small) { + + row_item.acquire_write(e->version(), *new_row); + + } else { + + row_item.acquire_write(e->version(), new_row); + + } + + } + + + + void update_row(uintptr_t rid, const comm_type &comm) { + + assert(&comm); + + auto row_item = Sto::item(this, item_key_t::row_item_key(reinterpret_cast(rid))); + + row_item.add_commute(comm); + + } + + + + // insert assumes common case where the row doesn't exist in the table + + // if a row already exists, then use select (FOR UPDATE) instead + + ins_return_type + + insert_row(const key_type& key, value_type *vptr, bool overwrite = false) { + + //cursor_type lp(table_, key); + + //bool found = lp.find_insert(*ti); + + + + bool MTrie_found =MTrie_table->probe(&key,sizeof(&key)) ; + + + + + + if (MTrie_found) { + + // NB: the insert method only manipulates the row_item. It is possible + + // this insert is overwriting some previous updates on selected columns + + // The expected behavior is that this row-level operation should overwrite + + // all changes made by previous updates (in the same transaction) on this + + // row. We achieve this by granting this row_item a higher priority. + + // During the install phase, if we notice that the row item has already + + // been locked then we simply ignore installing any changes made by cell items. + + // It should be trivial for a cell item to find the corresponding row item + + // and figure out if the row-level version is locked. + + void * vbuf_out = MTrie_table->get(MTrie_table->ref,&key,sizeof(key)); + + + + internal_elem ** e = reinterpret_cast(vbuf_out); + + + + internal_elem *MTrie_e = *e; + + //lp.finish(0, *ti); + + + + TransProxy row_item = Sto::item(this, item_key_t::row_item_key(MTrie_e)); + + + + if (is_phantom(MTrie_e, row_item)) + + goto abort; + + + + if (index_read_my_write) { + + if (has_delete(row_item)) { + + auto proxy = row_item.clear_flags(delete_bit).clear_write(); + + + + if (value_is_small) + + proxy.add_write(*vptr); + + else + + proxy.add_write(vptr); + + + + return ins_return_type(true, false); + + } + + } + + + + if (overwrite) { + + bool ok; + + if (value_is_small) + + ok = version_adapter::select_for_overwrite(row_item, MTrie_e->version(), *vptr); + + else + + ok = version_adapter::select_for_overwrite(row_item, MTrie_e->version(), vptr); + + if (!ok) + + goto abort; + + if (index_read_my_write) { + + if (has_insert(row_item)) { + + copy_row(MTrie_e, vptr); + + } + + } + + } else { + + // observes that the row exists, but nothing more + + if (!row_item.observe(MTrie_e->version())) + + goto abort; + + } + + + + } else { + + + + + + + + auto e = new internal_elem(key, vptr ? *vptr : value_type(), + + false /*!valid*/); + + + + //put in mtrie table + + MTrie_table->put(&key,sizeof(&key), + + reinterpret_cast(&e),sizeof(reinterpret_cast(&e))); + + + + + + //cout<<"MTrie_table ="<version()); + + row_item.add_flags(insert_bit); + + + + // update the node version already in the read set and modified by split + + //if (!update_internode_version(node, orig_nv, new_nv)) + + // goto abort; + + } + + + + return ins_return_type(true, MTrie_found); + + + + abort: + + return ins_return_type(false, false); + + } + + + + del_return_type + + delete_row(const key_type& key) { + + + + bool MTrie_found =MTrie_table->probe(&key,sizeof(&key)); + + + + //cout<<"MTrie_found in delete = "<get(MTrie_table->ref,&key,sizeof(key)); + + + + internal_elem ** MTrie_e = reinterpret_cast(vbuf_out); + + + + internal_elem *e = *MTrie_e; + + + + TransProxy row_item = Sto::item(this, item_key_t::row_item_key(e)); + + + + if (is_phantom(e, row_item)) { + + goto abort; + + } + + + + if (index_read_my_write) { + + if (has_delete(row_item)) + + return del_return_type(true, false); + + + + if (!e->valid() && has_insert(row_item)) { + + row_item.add_flags(delete_bit); + + return del_return_type(true, true); + + } + + } + + + + // Register a TicToc write to the leaf node when necessary. + + //ttnv_register_node_write(lp.node()); + + + + + + // select_for_update will register an observation and set the write bit of + + // the TItem + + if (!version_adapter::select_for_update(row_item, e->version())) { + + goto abort; + + } + + fence(); + + if (e->deleted) { + + goto abort; + + } + + row_item.add_flags(delete_bit); + + + + + + + + } else { + + + + //else, if key is not in MassTrie + + void * res = MTrie_table->find_closest(&key); + + + + bool r ; + + + + + + + + + + void * vbuf_out = MTrie_table->get(MTrie_table->ref,res,sizeof(res)); + + + + internal_elem ** e = reinterpret_cast(vbuf_out); + + + + r=register_internode_version(*e); + + + + if (!r) { + + goto abort; + + } + + + + } + + + + + + return del_return_type(true, MTrie_found); + + + + abort: + + return del_return_type(false, false); + + } + + + + template + + bool range_scan(const key_type& begin, const key_type& end, Callback callback, + + std::initializer_list accesses, bool phantom_protection = true, int limit = -1) { + + //instructed to be an empty function + + return true; + + } + + + + template + + bool range_scan(const key_type& begin, const key_type& end, Callback callback, + + RowAccess access, bool phantom_protection = true, int limit = -1) { + + //instructed to be an empty function + + return true; + + } + + + + value_type *nontrans_get(const key_type& k) { + + + + bool MTrie_found =MTrie_table->probe(&k,sizeof(&k)); + + + + //cout<<"MTrie found = "<< MTrie_found<get(MTrie_table->ref,&k,sizeof(k)); + + + + + + internal_elem **MTrie_e = reinterpret_cast(vbuf_out); + + + + + + + + return &((*MTrie_e)->row_container.row); + + } else + + return nullptr; + + } + + + + void nontrans_put(const key_type& k, const value_type& v) { + + + + + + bool MTrie_found =MTrie_table->probe(&k,sizeof(&k)) ; + + + + + + + + if (MTrie_found ) { + + + + + + void * vbuf_out = MTrie_table->get(MTrie_table->ref,&k,sizeof(k)); + + + + + + internal_elem **e = reinterpret_cast(vbuf_out); + + + + + + + + if (value_is_small) + + (*e)->row_container.row = v; + + else + + copy_row(*e, &v); + + + + //put in MTrie_table + + MTrie_table->put(&k,sizeof(k), + + reinterpret_cast(e),sizeof(reinterpret_cast(e))); + + + + + + } else { + + + + + + + + + + + + + + internal_elem *e = new internal_elem(k, v, true); + + + + + + + + //put in MTrie_table + + MTrie_table->put(&k,sizeof(k), + + reinterpret_cast(&e),sizeof(reinterpret_cast(&e))); + + + + + + //cout<<"MTrie_table = "<(n)->get_aux_tracker()); + + } + + } + + auto key = item.key(); + + auto e = key.internal_elem_ptr(); + + if (key.is_row_item()) + + return txn.try_lock(item, e->version()); + + else + + return txn.try_lock(item, e->row_container.version_at(key.cell_num())); + + } + + + + bool check(TransItem& item, Transaction& txn) override { + + if (is_internode(item)) { + + node_type *n = get_internode_address(item); + + auto curr_nv = static_cast(n)->full_version_value(); + + auto read_nv = item.template read_value(); + + return (curr_nv == read_nv); + + } else { + + if constexpr (table_params::track_nodes) { + + if (is_ttnv(item)) { + + auto n = get_internode_address(item); + + return static_cast(n)->get_aux_tracker()->cp_check_version(txn, item); + + } + + } + + auto key = item.key(); + + auto e = key.internal_elem_ptr(); + + if (key.is_row_item()) + + return e->version().cp_check_version(txn, item); + + else + + return e->row_container.version_at(key.cell_num()).cp_check_version(txn, item); + + } + + } + + + + void install(TransItem& item, Transaction& txn) override { + + assert(!is_internode(item)); + + + + if constexpr (table_params::track_nodes) { + + if (is_ttnv(item)) { + + auto n = get_internode_address(item); + + txn.set_version_unlock(*static_cast(n)->get_aux_tracker(), item); + + return; + + } + + } + + + + auto key = item.key(); + + auto e = key.internal_elem_ptr(); + + + + if (key.is_row_item()) { + + //assert(e->version.is_locked()); + + if (has_delete(item)) { + + assert(e->valid() && !e->deleted); + + e->deleted = true; + + txn.set_version(e->version()); + + return; + + } + + + + if (!has_insert(item)) { + + if (item.has_commute()) { + + comm_type &comm = item.write_value(); + + if (has_row_update(item)) { + + copy_row(e, comm); + + } else if (has_row_cell(item)) { + + e->row_container.install_cell(comm); + + } + + } else { + + value_type *vptr; + + if (value_is_small) { + + vptr = &(item.write_value()); + + } else { + + vptr = item.write_value(); + + } + + + + if (has_row_update(item)) { + + if (value_is_small) { + + e->row_container.row = *vptr; + + } else { + + copy_row(e, vptr); + + } + + } else if (has_row_cell(item)) { + + // install only the difference part + + // not sure if works when there are more than 1 minor version fields + + // should still work + + e->row_container.install_cell(0, vptr); + + } + + } + + } + + txn.set_version_unlock(e->version(), item); + + } else { + + // skip installation if row-level update is present + + auto row_item = Sto::item(this, item_key_t::row_item_key(e)); + + if (!has_row_update(row_item)) { + + if (row_item.has_commute()) { + + comm_type &comm = row_item.template write_value(); + + assert(&comm); + + e->row_container.install_cell(comm); + + } else { + + value_type *vptr; + + if (value_is_small) + + vptr = &(row_item.template raw_write_value()); + + else + + vptr = row_item.template raw_write_value(); + + + + e->row_container.install_cell(key.cell_num(), vptr); + + } + + } + + + + txn.set_version_unlock(e->row_container.version_at(key.cell_num()), item); + + } + + } + + + + void unlock(TransItem& item) override { + + assert(!is_internode(item)); + + if constexpr (table_params::track_nodes) { + + if (is_ttnv(item)) { + + auto n = get_internode_address(item); + + static_cast(n)->get_aux_tracker()->cp_unlock(item); + + return; + + } + + } + + auto key = item.key(); + + auto e = key.internal_elem_ptr(); + + if (key.is_row_item()) + + e->version().cp_unlock(item); + + else + + e->row_container.version_at(key.cell_num()).cp_unlock(item); + + } + + + + void cleanup(TransItem& item, bool committed) override { + + if (committed ? has_delete(item) : has_insert(item)) { + + auto key = item.key(); + + assert(key.is_row_item()); + + internal_elem *e = key.internal_elem_ptr(); + + bool ok = _remove(e->key); + + + + + + if (!ok) { + + std::cout << "committed=" << committed << ", " + + << "has_delete=" << has_delete(item) << ", " + + << "has_insert=" << has_insert(item) << ", " + + << "locked_at_commit=" << item.locked_at_commit() << std::endl; + + always_assert(false, "insert-bit exclusive ownership violated"); + + } + + item.clear_needs_unlock(); + + } + + } + + + +protected: + + template + + class range_scanner { + + public: + + range_scanner(const Str upper, NodeCallback ncb, ValueCallback vcb, int limit) : + + boundary_(upper), boundary_compar_(false), scan_succeeded_(true), limit_(limit), scancount_(0), + + node_callback_(ncb), value_callback_(vcb) {} + + + + template + + void check(const ITER& iter, const KEY& key) { + + int min = std::min(boundary_.length(), key.prefix_length()); + + int cmp = memcmp(boundary_.data(), key.full_string().data(), min); + + if (!Reverse) { + + if (cmp < 0 || (cmp == 0 && boundary_.length() <= key.prefix_length())) + + boundary_compar_ = true; + + else if (cmp == 0) { + + uint64_t last_ikey = iter.node()->ikey0_[iter.permutation()[iter.permutation().size() - 1]]; + + uint64_t slice = string_slice::make_comparable(boundary_.data() + key.prefix_length(), + + std::min(boundary_.length() - key.prefix_length(), 8)); + + boundary_compar_ = (slice <= last_ikey); + + } + + } else { + + if (cmp >= 0) + + boundary_compar_ = true; + + } + + } + + + + template + + void visit_leaf(const ITER& iter, const Masstree::key& key, threadinfo&) { + + if (!node_callback_(iter.node(), iter.full_version_value())) { + + scan_succeeded_ = false; + + } + + if (this->boundary_) { + + check(iter, key); + + } + + } + + + + bool visit_value(const Masstree::key& key, internal_elem *e, threadinfo&) { + + if (this->boundary_compar_) { + + if ((Reverse && (boundary_ >= key.full_string())) || + + (!Reverse && (boundary_ <= key.full_string()))) + + return false; + + } + + bool visited = false; + + bool count = true; + + if (!value_callback_(key.full_string(), e, visited, count)) { + + scan_succeeded_ = false; + + if (count) {++scancount_;} + + return false; + + } else { + + if (!visited) + + scan_succeeded_ = false; + + if (count) {++scancount_;} + + if (limit_ > 0 && scancount_ >= limit_) { + + return false; + + } + + return visited; + + } + + } + + + + Str boundary_; + + bool boundary_compar_; + + bool scan_succeeded_; + + int limit_; + + int scancount_; + + + + NodeCallback node_callback_; + + ValueCallback value_callback_; + + }; + + + +private: + + MTrie_table_type MTrie_table; + + //table_type table_; + + uint64_t key_gen_; + + + + + + static bool + + access_all(std::array& cell_accesses, std::array& cell_items, value_container_type& row_container) { + + for (size_t idx = 0; idx < cell_accesses.size(); ++idx) { + + auto& access = cell_accesses[idx]; + + auto proxy = TransProxy(*Sto::transaction(), *cell_items[idx]); + + if (static_cast(access) & static_cast(access_t::read)) { + + if (!proxy.observe(row_container.version_at(idx))) + + return false; + + } + + if (static_cast(access) & static_cast(access_t::write)) { + + if (!proxy.acquire_write(row_container.version_at(idx))) + + return false; + + if (proxy.item().key().is_row_item()) { + + proxy.item().add_flags(row_cell_bit); + + } + + } + + } + + return true; + + } + + + + static bool has_insert(const TransItem& item) { + + return (item.flags() & insert_bit) != 0; + + } + + static bool has_delete(const TransItem& item) { + + return (item.flags() & delete_bit) != 0; + + } + + static bool has_row_update(const TransItem& item) { + + return (item.flags() & row_update_bit) != 0; + + } + + static bool has_row_cell(const TransItem& item) { + + return (item.flags() & row_cell_bit) != 0; + + } + + static bool is_phantom(internal_elem *e, const TransItem& item) { + + return (!e->valid() && !has_insert(item)); + + } + + + + bool register_internode_version(node_type *node, unlocked_cursor_type& cursor) { + + if constexpr (table_params::track_nodes) { + + return ttnv_register_node_read_with_snapshot(node, *cursor.get_aux_tracker()); + + } else { + + TransProxy item = Sto::item(this, get_internode_key(node)); + + if constexpr (DBParams::Opaque) { + + return item.add_read_opaque(cursor.full_version_value()); + + } else { + + return item.add_read(cursor.full_version_value()); + + } + + } + + } + + + + bool register_internode_version(internal_elem * e) { + + TransProxy row_item = Sto::item(this, item_key_t::row_item_key(e)); + + return row_item.add_read(e->version()); + + + + } + + + + // Used in scan helpers to track leaf node timestamps for phantom protection. + + bool scan_track_node_version(node_type *node, nodeversion_value_type nodeversion) { + + if constexpr (table_params::track_nodes) { + + (void)nodeversion; + + return ttnv_register_node_read(node); + + } else { + + TransProxy item = Sto::item(this, get_internode_key(node)); + + if constexpr (DBParams::Opaque) { + + return item.add_read_opaque(nodeversion); + + } else { + + return item.add_read(nodeversion); + + } + + } + + } + + + + bool update_internode_version(node_type *node, + + nodeversion_value_type prev_nv, nodeversion_value_type new_nv) { + + ttnv_register_node_write(node); + + TransProxy item = Sto::item(this, get_internode_key(node)); + + if (!item.has_read()) { + + return true; + + } + + if (prev_nv == item.template read_value()) { + + item.update_read(prev_nv, new_nv); + + return true; + + } + + return false; + + } + + + + void ttnv_register_node_write(node_type* node) { + + (void)node; + + if constexpr (table_params::track_nodes) { + + static_assert(DBParams::TicToc, "Node tracking requires TicToc."); + + always_assert(node->isleaf(), "Tracking non-leaf node!!"); + + auto tt_item = Sto::item(this, get_ttnv_key(node)); + + tt_item.acquire_write(*static_cast(node)->get_aux_tracker()); + + } + + } + + + + bool ttnv_register_node_read_with_snapshot(node_type* node, typename table_params::aux_tracker_type& snapshot) { + + (void)node; (void)snapshot; + + if constexpr (table_params::track_nodes) { + + static_assert(DBParams::TicToc, "Node tracking requires TicToc."); + + always_assert(node->isleaf(), "Tracking non-leaf node!!"); + + auto tt_item = Sto::item(this, get_ttnv_key(node)); + + return tt_item.observe(*static_cast(node)->get_aux_tracker(), snapshot); + + } else { + + return true; + + } + + } + + + + bool ttnv_register_node_read(node_type* node) { + + (void)node; + + if constexpr (table_params::track_nodes) { + + static_assert(DBParams::TicToc, "Node tracking requires TicToc."); + + always_assert(node->isleaf(), "Tracking non-leaf node!!"); + + auto tt_item = Sto::item(this, get_ttnv_key(node)); + + return tt_item.observe(*static_cast(node)->get_aux_tracker()); + + } else { + + return true; + + } + + } + + + + bool _remove(const key_type& key) { + + //cursor_type lp(table_, key); + + //bool found = lp.find_locked(*ti); + + + + bool MTrie_found =MTrie_table->probe(&key,sizeof(key)) ; + + + + + + if (MTrie_found) { + + + + void * vbuf_out = MTrie_table->get(MTrie_table->ref,&key,sizeof(key)); + + + + internal_elem ** e = reinterpret_cast(vbuf_out); + + + + + + internal_elem *el = *e; + + + + //remove from MTrie_table + + MTrie_table->del(&el->key,sizeof(&el->key)); + + + + Transaction::rcu_delete(el); + + } else { + + // XXX is this correct? + + + + } + + return MTrie_found; + + } + + + + static uintptr_t get_internode_key(node_type* node) { + + return reinterpret_cast(node) | internode_bit; + + } + + static bool is_internode(TransItem& item) { + + return (item.key() & internode_bit) != 0; + + } + + static node_type *get_internode_address(TransItem& item) { + + if (is_internode(item)) { + + return reinterpret_cast(item.key() & ~internode_bit); + + } else if (is_ttnv(item)) { + + return reinterpret_cast(item.key() & ~ttnv_bit); + + } + + assert(false); + + return nullptr; + + } + + + + static uintptr_t get_ttnv_key(node_type* node) { + + return reinterpret_cast(node) | ttnv_bit; + + } + + static bool is_ttnv(TransItem& item) { + + return (item.key() & ttnv_bit); + + } + + + + static void copy_row(internal_elem *e, comm_type &comm) { + + comm.operate(e->row_container.row); + + } + + static void copy_row(internal_elem *e, const value_type *new_row) { + + if (new_row == nullptr) + + return; + + e->row_container.row = *new_row; + + } + +}; + + + +template + +__thread typename MTrie_ordered_index::table_params::threadinfo_type* MTrie_ordered_index::ti; + + + +template + +class MTrie_mvcc_ordered_index : public TObject { + +public: + + typedef K key_type; + + typedef V value_type; + + typedef commutators::Commutator comm_type; + + + + static constexpr bool Commute = DBParams::Commute; + + + + static constexpr TransItem::flags_type insert_bit = TransItem::user0_bit; + + static constexpr TransItem::flags_type delete_bit = TransItem::user0_bit << 1u; + + static constexpr TransItem::flags_type row_update_bit = TransItem::user0_bit << 2u; + + static constexpr TransItem::flags_type row_cell_bit = TransItem::user0_bit << 3u; + + static constexpr uintptr_t internode_bit = 1; + + + + typedef typename value_type::NamedColumn NamedColumn; + + + + static constexpr bool index_read_my_write = DBParams::RdMyWr; + + + + typedef typename index_common::MvInternalElement internal_elem; + + + + struct table_params : public Masstree::nodeparams<15,15> { + + typedef internal_elem* value_type; + + typedef Masstree::value_print value_print_type; + + typedef threadinfo threadinfo_type; + + }; + + + + typedef Masstree::Str Str; + + typedef Masstree::basic_table table_type; + + typedef Masstree::unlocked_tcursor unlocked_cursor_type; + + typedef Masstree::tcursor cursor_type; + + typedef Masstree::leaf leaf_type; + + + + typedef typename table_type::node_type node_type; + + typedef typename unlocked_cursor_type::nodeversion_value_type nodeversion_value_type; + + + + typedef MassTrie* MTrie_table_type; + + + + using accessor_t = typename index_common::accessor_t; + + + + typedef std::tuple sel_return_type; + + typedef std::tuple ins_return_type; + + typedef std::tuple del_return_type; + + typedef std::tuple> sel_split_return_type; + + + + using index_t = MTrie_mvcc_ordered_index; + + using column_access_t = typename split_version_helpers::column_access_t; + + using item_key_t = typename split_version_helpers::item_key_t; + + template static constexpr auto mvcc_column_to_cell_accesses = + + split_version_helpers::template mvcc_column_to_cell_accesses; + + template static constexpr auto extract_item_list = + + split_version_helpers::template extract_item_list; + + using MvSplitAccessAll = typename split_version_helpers::template MvSplitAccessAll>; + + + + static __thread typename table_params::threadinfo_type *ti; + + + + MTrie_mvcc_ordered_index(size_t init_size) { + + this->table_init(); + + (void)init_size; + + } + + MTrie_mvcc_ordered_index() { + + this->table_init(); + + } + + + + void table_init() { + + static_assert(DBParams::Opaque, "MVCC must operate in opaque mode."); + + if (ti == nullptr) + + ti = threadinfo::make(threadinfo::TI_MAIN, -1); + + table_.initialize(*ti); + + key_gen_ = 0; + + } + + + + static void thread_init() { + + if (ti == nullptr) + + ti = threadinfo::make(threadinfo::TI_PROCESS, TThread::id()); + + Transaction::tinfo[TThread::id()].trans_start_callback = []() { + + ti->rcu_start(); + + }; + + Transaction::tinfo[TThread::id()].trans_end_callback = []() { + + ti->rcu_stop(); + + }; + + } + + + + uint64_t gen_key() { + + return fetch_and_add(&key_gen_, 1); + + } + + + + sel_return_type + + select_row(const key_type& key, RowAccess acc) { + + unlocked_cursor_type lp(table_, key); + + bool found = lp.find_unlocked(*ti); + + internal_elem *e = lp.value(); + + if (found) { + + return select_row(reinterpret_cast(e), acc); + + } else { + + if (!register_internode_version(lp.node(), lp.full_version_value())) + + goto abort; + + return sel_return_type(true, false, 0, nullptr); + + } + + + + abort: + + return sel_return_type(false, false, 0, nullptr); + + } + + + + sel_return_type + + select_row(const key_type& key, std::initializer_list accesses) { + + unlocked_cursor_type lp(table_, key); + + bool found = lp.find_unlocked(*ti); + + internal_elem *e = lp.value(); + + if (found) { + + return select_row(reinterpret_cast(e), accesses); + + } else { + + if (!register_internode_version(lp.node(), lp.full_version_value())) + + return sel_return_type(false, false, 0, nullptr); + + return sel_return_type(true, false, 0, nullptr); + + } + + + + return sel_return_type(false, false, 0, nullptr); + + } + + + + // Split version select row + + sel_split_return_type + + select_split_row(const key_type& key, std::initializer_list accesses) { + + unlocked_cursor_type lp(table_, key); + + bool found = lp.find_unlocked(*ti); + + internal_elem *e = lp.value(); + + + + + + if (found) { + + return select_splits(reinterpret_cast(e), accesses); + + } else { + + return { + + register_internode_version(lp.node(), lp.full_version_value()), + + false, + + 0, + + SplitRecordAccessor({ nullptr }) + + }; + + } + + } + + + + sel_split_return_type + + select_splits(uintptr_t rid, std::initializer_list accesses) { + + using split_params = SplitParams; + + auto e = reinterpret_cast(rid); + + auto cell_accesses = mvcc_column_to_cell_accesses(accesses); + + bool found; + + auto result = MvSplitAccessAll::run_select(&found, cell_accesses, this, e); + + return {true, found, rid, SplitRecordAccessor(result)}; + + } + + + + void update_row(uintptr_t rid, value_type* new_row) { + + // Update entire row using overwrite. + + // In timestamp-split tables, this will add a write set item to each "cell item". + + MvSplitAccessAll::run_update(this, reinterpret_cast(rid), new_row); + + } + + + + void update_row(uintptr_t rid, const comm_type &comm) { + + // Update row using commutatively. + + // In timestamp-split tables, this will add a commutator to each "cell item". The + + // per-cell commutators should be supplied by the user (defined for each split) and + + // they should be subclasses of the row commutator. + + // Internally this run_update() implementation below uses a down-cast to convert + + // row commutators to cell commutators. + + MvSplitAccessAll::run_update(this, reinterpret_cast(rid), comm); + + } + + + + // insert assumes common case where the row doesn't exist in the table + + // if a row already exists, then use select (FOR UPDATE) instead + + ins_return_type + + insert_row(const key_type& key, value_type *vptr, bool overwrite = false) { + + cursor_type lp(table_, key); + + bool found = lp.find_insert(*ti); + + bool should_abort = false; + + internal_elem *e; + + if (!found) { + + e = new internal_elem(this, key); + + lp.value() = e; + + + + node_type *node; + + nodeversion_value_type orig_nv; + + nodeversion_value_type new_nv; + + + + bool split_right = (lp.node() != lp.original_node()); + + if (split_right) { + + node = lp.original_node(); + + orig_nv = lp.original_version_value(); + + new_nv = lp.updated_version_value(); + + } else { + + node = lp.node(); + + orig_nv = lp.previous_full_version_value(); + + new_nv = lp.next_full_version_value(1); + + } + + + + fence(); + + lp.finish(1, *ti); + + //fence(); + + + + // update the node version already in the read set and modified by split + + should_abort = !update_internode_version(node, orig_nv, new_nv); + + } else { + + e = lp.value(); + + lp.finish(0, *ti); + + } + + + + if (!should_abort) { + + // NB: the insert method only manipulates the row_item. It is possible + + // this insert is overwriting some previous updates on selected columns + + // The expected behavior is that this row-level operation should overwrite + + // all changes made by previous updates (in the same transaction) on this + + // row. We achieve this by granting this row_item a higher priority. + + // During the install phase, if we notice that the row item has already + + // been locked then we simply ignore installing any changes made by cell items. + + // It should be trivial for a cell item to find the corresponding row item + + // and figure out if the row-level version is locked. + + + + // Use cell-id 0 to represent the row item. + + auto row_item = Sto::item(this, item_key_t(e, 0)); + + + + auto h = e->template chain_at<0>()->find(txn_read_tid()); + + found = !h->status_is(DELETED); + + if (is_phantom(h, row_item)) { + + MvAccess::read(row_item, h); + + auto val_ptrs = TxSplitInto(vptr); + + for (size_t cell_id = 0; cell_id < SplitParams::num_splits; ++cell_id) { + + TransProxy cell_item = Sto::item(this, item_key_t(e, cell_id)); + + cell_item.add_write(val_ptrs[cell_id]); + + cell_item.add_flags(insert_bit); + + } + + return ins_return_type(true, false); + + } + + + + if (index_read_my_write) { + + if (has_delete(row_item)) { + + auto proxy = row_item.clear_flags(delete_bit).clear_write(); + + proxy.add_write(*vptr); + + return ins_return_type(true, false); + + } + + } + + + + if (overwrite) { + + for (size_t i = 0; i < SplitParams::num_splits; ++i) { + + auto item = Sto::item(this, item_key_t(e, i)); + + item.add_write(); + + } + + this->update_row(reinterpret_cast(e), vptr); + + } else { + + // TODO: This now acts like a full read of the value + + // at rtid. Once we add predicates we can change it to + + // something else. + + MvAccess::read(row_item, h); + + } + + + + return ins_return_type(true, found); + + } + + + + return ins_return_type(false, false); + + } + + + + del_return_type + + delete_row(const key_type& key) { + + unlocked_cursor_type lp(table_, key); + + bool found = lp.find_unlocked(*ti); + + if (found) { + + internal_elem *e = lp.value(); + + // Use cell 0 to probe for existence of the row. + + auto row_item = Sto::item(this, item_key_t(e, 0)); + + auto h = e->template chain_at<0>()->find(txn_read_tid()); + + + + if (is_phantom(h, row_item)) { + + MvAccess::read(row_item, h); + + return del_return_type(true, false); + + } + + + + if (index_read_my_write) { + + if (has_delete(row_item)) + + return del_return_type(true, false); + + if (h->status_is(DELETED) && has_insert(row_item)) { + + for (size_t i = 0; i < SplitParams::num_splits; i++) { + + auto item = Sto::item(this, item_key_t(e, i)); + + item.add_flags(delete_bit); + + } + + return del_return_type(true, true); + + } + + } + + + + MvAccess::read(row_item, h); + + if (h->status_is(DELETED)) + + return del_return_type(true, false); + + for (size_t i = 0; i < SplitParams::num_splits; i++) { + + auto item = Sto::item(this, item_key_t(e, i)); + + item.add_write(0); + + item.add_flags(delete_bit); + + } + + } else { + + if (!register_internode_version(lp.node(), lp.full_version_value())) + + goto abort; + + } + + + + return del_return_type(true, found); + + + + abort: + + return del_return_type(false, false); + + } + + + + template + + bool range_scan(const key_type& begin, const key_type& end, Callback callback, + + std::initializer_list accesses, + + bool phantom_protection = true, int limit = -1) { + + //instructed to be an empty function + + return true; + + } + + + + template + + bool range_scan(const key_type& begin, const key_type& end, Callback callback, + + RowAccess access, bool phantom_protection = true, int limit = -1) { + + //instructed to be an empty function + + return true; + + } + + + + bool nontrans_get(const key_type& k, value_type* value_out) { + + unlocked_cursor_type lp(table_, k); + + bool found = lp.find_unlocked(*ti); + + if (found) { + + internal_elem *e = lp.value(); + + MvSplitAccessAll::run_nontrans_get(value_out, e); + + return true; + + } else { + + return false; + + } + + } + + + + void nontrans_put(const key_type& k, const value_type& v) { + + cursor_type lp(table_, k); + + bool found = lp.find_insert(*ti); + + if (found) { + + internal_elem *e = lp.value(); + + MvSplitAccessAll::run_nontrans_put(v, e); + + lp.finish(0, *ti); + + } else { + + internal_elem *e = new internal_elem(this, k); + + MvSplitAccessAll::run_nontrans_put(v, e); + + lp.value() = e; + + lp.finish(1, *ti); + + } + + } + + + + template + + bool lock_impl_per_chain(TransItem& item, Transaction& txn, MvObject* chain) { + + return mvcc_chain_operations::lock_impl_per_chain(item, txn, chain); + + } + + template + + bool check_impl_per_chain(TransItem& item, Transaction& txn, MvObject* chain) { + + return mvcc_chain_operations::check_impl_per_chain(item, txn, chain); + + } + + template + + void install_impl_per_chain(TransItem& item, Transaction& txn, MvObject* chain, void (*dcb)(void*)) { + + mvcc_chain_operations::install_impl_per_chain(item, txn, chain, dcb); + + } + + template + + void cleanup_impl_per_chain(TransItem& item, bool committed, MvObject* chain) { + + mvcc_chain_operations::cleanup_impl_per_chain(item, committed, chain); + + } + + + + // TObject interface methods + + bool lock(TransItem& item, Transaction& txn) override { + + assert(!is_internode(item)); + + auto key = item.key(); + + return MvSplitAccessAll::run_lock(key.cell_num(), txn, item, this, key.internal_elem_ptr()); + + } + + + + bool check(TransItem& item, Transaction& txn) override { + + if (is_internode(item)) { + + node_type *n = get_internode_address(item); + + auto curr_nv = static_cast(n)->full_version_value(); + + auto read_nv = item.template read_value(); + + auto result = (curr_nv == read_nv); + + TXP_ACCOUNT(txp_tpcc_check_abort1, txn.special_txp && !result); + + return result; + + } else { + + int cell_id = item.key().cell_num(); + + return MvSplitAccessAll::run_check(cell_id, txn, item, this); + + } + + } + + + + void install(TransItem& item, Transaction& txn) override { + + assert(!is_internode(item)); + + auto key = item.key(); + + MvSplitAccessAll::run_install(key.cell_num(), txn, item, this, has_delete(item) ? _delete_cb2 : nullptr); + + } + + + + void unlock(TransItem& item) override { + + (void)item; + + assert(!is_internode(item)); + + } + + + + void cleanup(TransItem& item, bool committed) override { + + assert(!is_internode(item)); + + auto key = item.key(); + + MvSplitAccessAll::run_cleanup(key.cell_num(), item, committed, this); + + } + + + +//protected: + + template + + class range_scanner { + + public: + + range_scanner(const Str upper, NodeCallback ncb, ValueCallback vcb, int limit) : + + boundary_(upper), boundary_compar_(false), scan_succeeded_(true), limit_(limit), scancount_(0), + + node_callback_(ncb), value_callback_(vcb) {} + + + + template + + void check(const ITER& iter, const KEY& key) { + + int min = std::min(boundary_.length(), key.prefix_length()); + + int cmp = memcmp(boundary_.data(), key.full_string().data(), min); + + if (!Reverse) { + + if (cmp < 0 || (cmp == 0 && boundary_.length() <= key.prefix_length())) + + boundary_compar_ = true; + + else if (cmp == 0) { + + uint64_t last_ikey = iter.node()->ikey0_[iter.permutation()[iter.permutation().size() - 1]]; + + uint64_t slice = string_slice::make_comparable(boundary_.data() + key.prefix_length(), + + std::min(boundary_.length() - key.prefix_length(), 8)); + + boundary_compar_ = (slice <= last_ikey); + + } + + } else { + + if (cmp >= 0) + + boundary_compar_ = true; + + } + + } + + + + template + + void visit_leaf(const ITER& iter, const Masstree::key& key, threadinfo&) { + + if (!node_callback_(iter.node(), iter.full_version_value())) { + + scan_succeeded_ = false; + + } + + if (this->boundary_) { + + check(iter, key); + + } + + } + + + + bool visit_value(const Masstree::key& key, internal_elem *e, threadinfo&) { + + if (this->boundary_compar_) { + + if ((Reverse && (boundary_ >= key.full_string())) || + + (!Reverse && (boundary_ <= key.full_string()))) + + return false; + + } + + bool visited = false; + + bool count = true; + + if (!value_callback_(key.full_string(), e, visited, count)) { + + scan_succeeded_ = false; + + if (count) {++scancount_;} + + return false; + + } else { + + if (!visited) + + scan_succeeded_ = false; + + if (count) {++scancount_;} + + if (limit_ > 0 && scancount_ >= limit_) { + + return false; + + } + + return visited; + + } + + } + + + + Str boundary_; + + bool boundary_compar_; + + bool scan_succeeded_; + + int limit_; + + int scancount_; + + + + NodeCallback node_callback_; + + ValueCallback value_callback_; + + }; + + + +//private: + + MTrie_table_type MTrie_table; + + table_type table_; + + uint64_t key_gen_; + + + + //static bool + + //access_all(std::array&, std::array&, internal_elem*) { + + // always_assert(false, "Not implemented."); + + // return true; + + //} + + + + static TransactionTid::type txn_read_tid() { + + return Sto::read_tid(); + + } + + + + static bool has_insert(const TransItem& item) { + + return (item.flags() & insert_bit) != 0; + + } + + static bool has_delete(const TransItem& item) { + + return (item.flags() & delete_bit) != 0; + + } + + static bool has_row_update(const TransItem& item) { + + return (item.flags() & row_update_bit) != 0; + + } + + static bool has_row_cell(const TransItem& item) { + + return (item.flags() & row_cell_bit) != 0; + + } + + template + + static bool is_phantom(const MvHistory* h, const TransItem& item) { + + return (h->status_is(DELETED) && !has_insert(item)); + + } + + + + bool register_internode_version(node_type *node, nodeversion_value_type nodeversion) { + + TransProxy item = Sto::item(this, get_internode_key(node)); + + return item.add_read(nodeversion); + + } + + + + bool register_internode_version(internal_elem * e) { + + TransProxy item = Sto::item(this, item_key_t::row_item_key(e)); + + return item.add_read(e->version()); + + } + + + + + + bool update_internode_version(node_type *node, + + nodeversion_value_type prev_nv, nodeversion_value_type new_nv) { + + TransProxy item = Sto::item(this, get_internode_key(node)); + + if (!item.has_read()) { + + return true; + + } + + if (prev_nv == item.template read_value()) { + + item.update_read(prev_nv, new_nv); + + return true; + + } + + return false; + + } + + + + static void _delete_cb2(void* history_ptr) { + + using history_type = typename internal_elem::object0_type::history_type; + + auto hp = reinterpret_cast(history_ptr); + + auto obj = hp->object(); + + if (obj->find_latest(false) == hp) { + + auto el = internal_elem::from_chain(obj); + + auto table = reinterpret_cast*>(el->table); + + cursor_type lp(table->table_, el->key); + + if (lp.find_locked(*table->ti) && lp.value() == el) { + + hp->status_poisoned(); + + if (obj->find_latest(true) == hp) { + + lp.finish(-1, *table->ti); + + Transaction::rcu_call(gc_internal_elem, el); + + } else { + + hp->status_unpoisoned(); + + lp.finish(0, *table->ti); + + } + + } else { + + lp.finish(0, *table->ti); + + } + + } + + } + + + + static void gc_internal_elem(void* el_ptr) { + + auto el = reinterpret_cast(el_ptr); + + delete el; + + } + + + + static uintptr_t get_internode_key(node_type* node) { + + return reinterpret_cast(node) | internode_bit; + + } + + static bool is_internode(TransItem& item) { + + return (item.key() & internode_bit) != 0; + + } + + static node_type *get_internode_address(TransItem& item) { + + assert(is_internode(item)); + + + + return reinterpret_cast(item.key() & ~internode_bit); + + } + +}; + + + +template + +__thread typename MTrie_mvcc_ordered_index::table_params::threadinfo_type* MTrie_mvcc_ordered_index::ti; + + + +} // namespace bench \ No newline at end of file diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..48e341a0 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,3 @@ +{ + "lockfileVersion": 1 +} diff --git a/run/MassTrie-beta/MassTrie.hh b/run/MassTrie-beta/MassTrie.hh new file mode 100644 index 00000000..53cfd776 --- /dev/null +++ b/run/MassTrie-beta/MassTrie.hh @@ -0,0 +1,318 @@ +#include + +#include + +#include + +#include + +#include + +#include + +#include "wormhole/lib.h" + +#include "wormhole/kv.h" + +#include "wormhole/wh.h" + +#define NUM_THREADS 64 + +#define MAX_SIZE 64 + +using namespace std; + +//~~~~~~~~~CLASS MASSTRIE~~~~~~~~~~~~~~ + +class MassTrie +{ + +public: + // constructor + + MassTrie() + { + + // creating wh wormhole mapping key to internal_elem (as uintptr_t) + + wh = wh_create(); + + ref = wh_ref(this->wh); + + iter = wh_iter_create(this->ref); + + this->kbuf_out = (void *)malloc(sizeof(char) * MAX_SIZE); + + this->vbuf_out = (void *)malloc(sizeof(char) * MAX_SIZE); + + r = false; + } + + // destructor + + ~MassTrie() + { + + wh_iter_destroy(this->iter); + + wh_unref(this->ref); + + wh_clean(this->wh); + + wh_destroy(this->wh); + + free(kbuf_out); + + free(vbuf_out); + } + + //~~~~~~~~~MASSTRIE FUNCTIONS~~~~~~~~~~~~~~ + + // put function - putting a uintptr_t which is the internal_elem + + bool put(const void *key, int klen, const void *value, int vlen) + { + + return (wh_put(this->ref, key, klen, value, vlen)); + } + + // get function + + void *get(struct wormref *const ref, const void *key, int klen) + { + + // variables + + // bool r; + + u32 vlen_out = 0; + + // get action performed + + r = wh_get(ref, key, klen, vbuf_out, sizeof(vbuf_out), &vlen_out); + + return r ? vbuf_out : nullptr; + } + + // delete function + + bool del(const void *key, int klen) + { + + return (wh_del(this->ref, key, klen)); + } + + // probe function - returns true if key exists, false otherwise + + bool probe(const void *key, int klen) + { + + r = (wh_probe(this->ref, key, klen)); + + return r; + } + + // finds the closest pointer currently in the MassTrie + + // to a pointer passed as a parameter + + void *find_closest(const void *key) + { + + // variables + + u32 klen_out = 0; + + u32 vlen_out = 0; + + // bool r; + + int min = INT_MAX; + + int curr; + + void *res = NULL; + + // search loop + + wh_iter_seek(this->iter, NULL, 0); // seek to the head + + // printf("wh_iter_seek closest pointer to key\"\"\n"); + + while (wh_iter_valid(this->iter)) + { + + r = wh_iter_peek(this->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) + { + + // calculate disatnce + + curr = abs((long)(reinterpret_cast(kbuf_out)) - (long)(reinterpret_cast(key))); + + if (curr < min) + { + + // perform malloc + + if (!res) + + res = (void *)malloc(sizeof(char) * MAX_SIZE); + + // error handling + + if (res == NULL) + { + + printf("Error! memory not allocated."); + + exit(1); + } + + min = curr; + + // cout<<"curr = "<iter); + + memset(kbuf_out, 0, sizeof(kbuf_out)); + + memset(vbuf_out, 0, sizeof(vbuf_out)); + } + + return (res != NULL) ? res : nullptr; + } + + // deletes all from MassTrie + + void delete_all() + { + + // variables + + u32 klen_out = 0; + + u32 vlen_out = 0; + + // bool + + // search loop + + wh_iter_seek(this->iter, NULL, 0); // seek to the head + + // printf("wh_iter_seek closest pointer to key\"\"\n"); + + while (wh_iter_valid(this->iter)) + { + + r = wh_iter_peek(this->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) + { + + // delete key + + this->del(kbuf_out, sizeof(kbuf_out)); + } + + else + { + + printf("ERROR!\n"); + } + + wh_iter_skip1(this->iter); + + memset(kbuf_out, 0, sizeof(kbuf_out)); + + memset(vbuf_out, 0, sizeof(vbuf_out)); + } + } + + // data members + + struct wormhole *wh; + + struct wormref *ref; + + struct wormhole_iter *iter; + + void *kbuf_out; + + void *vbuf_out; + + bool r; + +}; // class MassTrie + +/** + +//override the << operation + + + +ostream& operator<<(ostream &os, MassTrie* m){ + + + +u32 klen_out = 0; + + char kbuf_out[MAX_SIZE] = {}; + + u32 vlen_out = 0; + + char vbuf_out[MAX_SIZE] = {}; + + bool r; + + + + wh_iter_seek(m->iter, NULL, 0); // seek to the head + + printf("wh_iter_seek \"\"\n"); + + while (wh_iter_valid(m->iter)) { + + r = wh_iter_peek(m->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) { + + os << "wh_iter_peek: key = "<(kbuf_out)<<" , klen = "<< klen_out<<" , "<< + + " value= "<(vbuf_out) << ", vlen= "<< vlen_out<iter); + + + + memset(kbuf_out,0,sizeof(kbuf_out)); + + memset(vbuf_out,0,sizeof(vbuf_out)); + + } + + return os; + +} + + + +**/ diff --git a/run/MassTrie-beta/wormhole/LICENSE b/run/MassTrie-beta/wormhole/LICENSE new file mode 100644 index 00000000..f288702d --- /dev/null +++ b/run/MassTrie-beta/wormhole/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/run/MassTrie-beta/wormhole/Makefile b/run/MassTrie-beta/wormhole/Makefile new file mode 100644 index 00000000..f00e6b59 --- /dev/null +++ b/run/MassTrie-beta/wormhole/Makefile @@ -0,0 +1,45 @@ +# Makefile +# rules (always with .out) +# SRC-X.out += abc # extra source: abc.c +# MOD-X.out += abc # extra module: abc.c abc.h +# ASM-X.out += abc # extra assembly: abc.S +# DEP-X.out += abc # extra dependency: abc +# FLG-X.out += -finline # extra flags +# LIB-X.out += abc # extra -labc options + +# X.out : xyz.h xyz.c # for extra dependences that are to be compiled/linked. + +# X => X.out +TARGETS += easydemo concbench stresstest +# X => X.c only +SOURCES += +# X => X.S only +ASSMBLY += +# X => X.c X.h +MODULES += lib kv wh +# X => X.h +HEADERS += ctypes + +FLG += +LIB += m + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),FreeBSD) +LIB += execinfo +endif + +# when $ make FORKER_PAPI=y +ifeq ($(strip $(FORKER_PAPI)),y) +LIB += papi +FLG += -DFORKER_PAPI +endif + +bin : libwh.so +libwh.so : Makefile Makefile.common lib.c lib.h kv.c kv.h wh.c wh.h wh.strip + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) -shared -fPIC) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + $(CCC) $(ALLFLG) -o $@ lib.c kv.c wh.c $(ALLLIB) + strip --strip-all --discard-all @wh.strip $@ + + +include Makefile.common diff --git a/run/MassTrie-beta/wormhole/Makefile.common b/run/MassTrie-beta/wormhole/Makefile.common new file mode 100644 index 00000000..ecd761e7 --- /dev/null +++ b/run/MassTrie-beta/wormhole/Makefile.common @@ -0,0 +1,216 @@ +#usage: include Makefile.common at the end of your Makefile + +# no builtin rules/vars (CC, CXX, etc. are still defined but will be empty) +MAKEFLAGS += -r -R + +HDR = $(addsuffix .h,$(MODULES) $(HEADERS)) +SRC = $(addsuffix .c,$(MODULES) $(SOURCES)) +ASM = $(addsuffix .S,$(ASSMBLY)) +OBJ = $(addsuffix .o,$(MODULES) $(SOURCES) $(ASSEMBLY)) +DEP = Makefile.common Makefile $(HDR) $(EXTERNDEP) $(EXTERNSRC) +BIN = $(addsuffix .out,$(TARGETS)) +DIS = $(addsuffix .dis,$(TARGETS)) + +# clang: +# EXTRA="-Rpass=loop-vectorize" # IDs loops that were successfully V-ed +# EXTRA="-Rpass-missed=loop-vectorize" # IDs loops that failed V +# EXTRA="-Rpass-analysis=loop-vectorize" # IDs the statements that caused V to fail +# EXTRA="-Rpass=\ *" # remarks for all passes +# other passes: https://llvm.org/docs/Passes.html + +O ?= rg + +# predefined OPT: make O={rg,r,0g,3g,p,0s,3s,cov,mc,hc,wn,stk} +ifeq ($O,rg) # make O=rg +OPT ?= -DNDEBUG -g3 -O3 -flto -fno-stack-protector +else ifeq ($O,r) # make O=r (for release) +OPT ?= -DNDEBUG -O3 -flto -fno-stack-protector +else ifeq ($O,ns) # make O=ns (no signal handlers) +OPT ?= -DNDEBUG -O3 -flto -fno-stack-protector -DNOSIGNAL +else ifeq ($O,0g) # make O=0g +OPT ?= -g3 -O0 -fno-inline +else ifeq ($O,2g) # make O=2g +OPT ?= -g3 -O2 +else ifeq ($O,3g) # make O=3g +OPT ?= -g3 -O3 -flto -fno-inline +else ifeq ($O,p) # make O=p (profiling: rg+noinline) +OPT ?= -DNDEBUG -g3 -O3 -flto -fno-stack-protector -fno-inline +else ifeq ($O,0s) # make O=0s (address sanitizer) +OPT ?= -g3 -O0 -fno-inline -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls -DHEAPCHECKING +else ifeq ($O,3s) # make O=3s (address sanitizer) +OPT ?= -g3 -O3 -fno-inline -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls -DHEAPCHECKING +else ifeq ($O,t) # make O=0t (thread sanitizer) +OPT ?= -g3 -O1 -fno-inline -fsanitize=thread -fno-stack-protector +else ifeq ($O,cov) # make O=cov (for gcov) +OPT ?= -g3 -DNDEBUG -O0 --coverage +CCC = gcc +else ifeq ($O,mc) # make O=mc (for valgrind memcheck) +OPT ?= -g3 -O1 -fno-inline -DHEAPCHECKING +ARCH ?= broadwell +else ifeq ($O,hc) # make O=hc (for gperftools heapcheck) +OPT ?= -g3 -O1 -fno-inline +LIB += tcmalloc +else ifeq ($O,wn) # more warning +OPT ?= -g3 -O3 -Wvla -Wformat=2 -Wconversion -Wstrict-prototypes -Wmissing-prototypes +else ifeq ($O,stk) # check stack usage with gcc +OPT ?= -g3 -O3 -DNDEBUG -fstack-usage +CCC = gcc +endif + +# malloc: g:glibc, t:tcmalloc, j:jemalloc +M ?= g + +ifeq ($M,t) + LIB += tcmalloc + FLG += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free +else ifeq ($M,j) + LIB += jemalloc +endif + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Linux) + CHECK_S := -D__linux__ + LIB += rt +else ifeq ($(UNAME_S),FreeBSD) + CHECK_S := -D__FreeBSD__ + FLG += -I/usr/local/include -L/usr/local/lib + LIB += rt + LIB += execinfo + TPUT := /usr/local/bin/tput +else ifeq ($(UNAME_S),Darwin) + CHECK_S := -D__APPLE__ -D__MACH__ + # do nothing +else + $(error "Supported Platforms: Linux, FreeBSD, Darwin") +endif +TPUT ?= tput + +CCC ?= clang +CSTD = -std=gnu18 +XCC ?= clang++ +XSTD = -std=gnu++17 + +UNAME_M := $(shell uname -m) +ifeq ($(UNAME_M),aarch64) # "native" does not work for clang@aarch64 + CHECK_M := -D__aarch64__ + ARCH ?= armv8-a+crc +else ifeq ($(UNAME_M),arm64) # "native" does not work for clang@aarch64 + CHECK_M := -D__aarch64__ + ARCH ?= armv8-a+crc +else ifeq ($(UNAME_M),x86_64) + CHECK_M := -D__x86_64__ + ARCH ?= native +else ifeq ($(UNAME_M),amd64) # freebsd + CHECK_M := -D__x86_64__ + ARCH ?= native +else + $(error "Supported Platforms: aarch64, x86_64") +endif + +TUNE ?= native + +NBI += memcpy memmove memcmp + +# minimal requirement on x86_64: -march=nehalem +# minimal requirement on aarch64: -march=armv8-a+crc +FLG += -march=$(ARCH) -mtune=$(TUNE) +FLG += -pthread -Wall -Wextra -Wshadow #-Weverything +FLG += $(addprefix -fno-builtin-,$(NBI)) +FLG += $(OPT) + +ifneq ($(shell $(CCC) --version 2>/dev/null | grep clang),) +FLG += -ferror-limit=3 +CCCTYPE := clang +else ifneq ($(shell $(CCC) --version 2>/dev/null | grep gcc),) +FLG += -fmax-errors=3 +FLG += -Wno-unknown-pragmas +CCCTYPE := gcc +else + $(error "Supported Compilers: clang, gcc") +endif + +ifeq ($(CCCTYPE),clang) + CCINST = /usr/lib/clang/$(shell $(CCC) --version 2>/dev/null | awk '/^clang/ { print $$3 }') + CCINC = $(CCINST)/include +else ifeq ($(CCCTYPE),gcc) + CCINST = /usr/lib/gcc/$(shell $(CCC) -dumpmachine)/$(shell $(CCC) -dumpversion) + CCINC = $(CCINST)/include $(CCINST)/include-fixed +endif +CCINC = /usr/include /usr/local/include + +ifneq ($(shell find $(CCINC) -name backtrace-supported.h 2>/dev/null),) + LIB += backtrace + FLG += -DBACKTRACE +endif + +ifneq ($(shell find $(CCINC) -name liburing.h 2>/dev/null),) + LIB += uring + FLG += -DLIBURING +endif + + +uniq = $(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1))) +magentatxt := $(shell $(TPUT) setaf 5) +greentxt := $(shell $(TPUT) setaf 2) +bluetxt := $(shell $(TPUT) setaf 4) +normaltxt := $(shell $(TPUT) sgr0) + +.PHONY : bin dis def clean cleanx check tags + +bin : $(BIN) +dis : $(DIS) bin +.DEFAULT_GOAL = bin +.SECONDEXPANSION: + +ifeq ($(J),o) +# DANGER. Don't use unless it works! +# build from .o files but target-specific flags are missing in %.o : %.x +%.out : %.o $(OBJ) $$(addsuffix .o,$$(SRC-$$@) $$(MOD-$$@) $$(ASM-$$@)) + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$*.o) $(FLG-$@) -rdynamic) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + $(CCC) $(ALLFLG) -o $@ $^ $(ALLLIB) +# +else # default: all-in-one command +%.out : %.c $(SRC) $(ASM) $(DEP) $$(DEP-$$@) $$(addsuffix .c,$$(SRC-$$@) $$(MOD-$$@)) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) $$(addsuffix .S,$$(ASM-$$@)) + $(eval ALLSRC := $(SRC) $(addsuffix .c,$(SRC-$@) $(MOD-$@)) $(ASM) $(addsuffix .S,$(ASM-$@))) + $(eval UNIQSRC := $(call uniq,$(ALLSRC))) + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) $(FLG-$@) -rdynamic) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + @printf '$(bluetxt)$@$(magentatxt) <= $(greentxt)$< $(UNIQSRC)$(normaltxt)\n' + $(CCC) $(ALLFLG) -o $@ $< $(UNIQSRC) $(ALLLIB) +# +endif + + +%.dis : %.out + objdump -SlwtC $< 1>$@ 2>/dev/null + +%.o : %.cc $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(XCC) $(XSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.o : %.c $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.o : %.S $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.s : %.c $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$*.o) -S -o $@ -c $< + +def : + $(CCC) $(FLG) -dM -E - = "h"; the iter will be placed on "hello" + r = wh_iter_valid(iter); // r == true; You should always check if iter is valid after a seek() and skip() + r = wh_iter_peek(iter, buf, 6, &len_out, NULL, 0, NULL); // only need the key: will get "hello" and 5 + r = wh_iter_peek(iter, NULL, 0, NULL, buf, 6, &len_out); // only need the value: will get "world!" and 6 + // (you can also get both key and value using one call with two buffers) + wh_iter_skip1(iter); // skip the current key; equivalent to wh_iter_skip(iter, 1); + r = wh_iter_valid(iter); // r == false; already passed the end of the dataset + wh_iter_park(iter); // an iter may hold locks; It's a good manner to "park" the iter before sleep. + sleep(10); // not interacting with the wormhole instance. + wh_iter_seek(iter, NULL, 0); // need to do another seek to reactivate the iter + r = wh_iter_valid(iter); // r == true; on the zero-sized key now + wh_iter_destroy(iter); // now we're done with the iter + wh_del(ref, "hello", 5); // delete a key + wh_del(ref, NULL, NULL); // delete the zero-sized key + wh_unref(ref); // the current thread is no longer interested in accessing the index + wh_destroy(wh); // fully destroy the index; all references should have been released before calling this +} +``` + +## Integer keys + +Wormhole supports binary keys, which means you don't need to print integers into text when using Wormhole to index integer keys. +Here are some quick examples for using Wormhole as an integer-key index. A little-endian CPU is assumed. + +```C +{ + // 32-bit unsigned integer keys + u32 key = __builtin_bswap32(1000); // reverse byte order of key 1000 + wh_put(ref, &key, 4, NULL, 0); + key = __builtin_bswap32(2000); // reverse byte order of key 2000 +    wh_put(ref, &key, 4, NULL, 0); + struct wormhole_iter * iter = wh_iter_create(ref); + key = __builtin_bswap32(999); + wh_iter_seek(iter, &key, 4); // seek 999 + u32 key_out, len_out; + r = wh_iter_peek(iter, &key_out, 4, &len_out, NULL, 0, NULL); // see 1000 in key_out in reversed byte order + wh_iter_skip1(iter); + r = wh_iter_peek(iter, &key_out, 4, &len_out, NULL, 0, NULL); // see 2000 in key_out in reversed byte order +} +``` + +# Advanced APIs + +If the simple and thread-safe `wh_*` interface already meets your performance requirements, You don't need to read the following sections. +Using the `wormhole_*` and `whunsafe_*` APIs can maximize the efficiency of your code with a roughly 5%-10% speedup. +However, inefficient use of these APIs, such as repeatedly calling malloc() to prepare the key buffer, can easily hurt the performance. + +## `struct kv` and `struct kref` + +There are a handful of helper functions (`kv_*` and `kref_*` functions) at the beginning of wh.h. +It's worth noting that the *key's hash* (`hash` of `struct kv` and `hash32` of `struct kref`) +must be up-to-date before passed to wormhole. +The `kv_refill*` helper functions internally update the hash after filling the kv contents. +In a more general case, `kv_update_hash` directly updates a `struct kv`'s hash. +Similarly, `kref_refill_hash32()` calculates the 32-bit hash for `struct kref`. +Performing the hash calculation at the client side can achieve the best efficiency on the server (the index operations). + +## The Wormhole API + +`concbench.c` and `stresstest.c` are examples of how to use a Wormhole index. +There are three sets of Wormhole API: `whsafe`, `wormhole`, and `whunsafe`. +* `whsafe`: The *worry-free* thread-safe API. If you use Wormhole in a concurrent environment and want minimal complexity in your code, you should use `whsafe`. +* `wormhole`: The standard thread-safe API. It offers better efficiency than `whsafe` but requires some extra effort for blocking prevention. +* `whunsafe`: the thread-unsafe API. It offers the best speed and efficiency but does not perform internal concurrency control. +External synchronization should be employed when accessing `whunsafe` in a concurrent environment. + +The functions of each API can be found near the end of `wh.c` (search `kvmap_api_whsafe`, `kvmap_api_wormhole`, and `kvmap_api_whunsafe`). +Note that each API contains a mix of `whsafe_*`, `wormhole_*`, and `whunsafe_*` functions. + +### The `whsafe` API +The `whsafe` API functions are listed in the `kvmap_api_whsafe` structure in `wh.c`. The API consists of a mix of `wormhole_*` and `whsafe_*` functions. + +The index operations (GET, SET, UPDATE, DEL, PROBE, INPLACE, MERGE, and SCAN (`wormhole_iter_*` functions)) are all *thread safe*. +A thread needs to hold a reference of the index (_wormref_) to perform safe index operations. + +An example of using point-query operations using the `whsafe` API. + +```C +{ + wh = wormhole_create(NULL); // use NULL here unless you want to change the allocator. + ref = whsafe_ref(wh); + for (...) { + whsafe_put(ref, ...); + whsafe_get(ref, ...); + whsafe_del(ref, ...); + ... // other safe operations + } + ... // other safe operations + wormhole_unref(ref); + wormhole_destroy(wh); +} +``` + +An example of range-query operations: + +```C +{ + ref = whsafe_ref(wh); + // ... assume we already have a valid ref + iter = wormhole_iter_create(ref); + for (...) { + whsafe_iter_seek(iter, key); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 1); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 3); + wormhole_iter_inp(iter, uf, priv); + // other iter operations + } + // An active iterator is likely holding a lock. + whsafe_iter_park(iter); // Release resources to avoid blocking other threads + // it's now safe to do something such as sleep() or waitpid() + // ... start using the iterator again + whsafe_iter_seek(iter, key2); + // ... other iter operations + whsafe_iter_destroy(iter); + // ... do something + // must destroy iterators before unref() + wormhole_unref(ref); +} +``` + +### The `wormhole` API +Similar to `whsafe`, `wormhole` is also thread safe. It's often faster than `whsafe` but requires extra caution when using it. + +An example of using point-query operations using the `wormhole` API. + +```C +{ + wh = wormhole_create(NULL); // use NULL here unless you want to change the allocator. + ref = wormhole_ref(wh); + for (...) { + wormhole_put(ref, ...); + wormhole_get(ref, ...); + wormhole_del(ref, ...); + ... // other safe operations + } + ... // other safe operations + wormhole_unref(ref); + wormhole_destroy(wh); +} +``` + +An example of range-query operations: + +```C +{ + ref = wormhole_ref(wh); + // ... assume we already have a valid ref + iter = wormhole_iter_create(ref); + for (...) { + wormhole_iter_seek(iter, key); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 1); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 3); + wormhole_iter_inp(iter, uf, priv); + // other iter operations + } + // An active iterator is likely holding a lock. + wormhole_iter_park(iter); // Release resources to avoid blocking other threads + while (condition not met) { // See below for explanation + wormhole_refresh_qstate(ref); + } + // ... start using the iterator again + wormhole_iter_seek(iter, key2); + // ... other iter operations + wormhole_iter_destroy(iter); + // ... do something + // must destroy iterators before unref() + wormhole_unref(ref); +} +``` + +### Avoid blocking writers when using the `wormhole` API +Wormhole internally uses QSBR RCU to synchronize readers/writers so every holder of a reference (`ref`) +needs to actively perform index operations. +An ref-holder, if not actively performing index operations, may block a writer thread that is performing split/merge operations. +(because of not periodically announcing its quiescent state). +If a ref-holder is about to become inactive from Wormhole's perspective (doing something else or just sleeping), +it is recommended that the holder temporarily releases the `ref` before entering the inactive status (such as calling `sleep(10)`), +and reactivate the `ref` before performing the next index operation. + +```C +{ + // assume we already have an active ref + wormhole_park(ref); // this will avoid blocking any other threads + sleep(10); + wormhole_resume(ref); // this will reactivate the ref + // continue to perform index operations +} +``` + +A common scenario of dead-locking is acquiring locks with an active wormhole reference, +The following example could cause deadlock between two threads. + +```C +// Thread A has an active ref and try to lock() +{ + struct wormref * ref = wormhole_ref(wh); + lock(just_a_lock); // << block here forever +} + +// Thread B already acquired the lock and wants to insert a key to wh +{ + lock(just_a_lock); + wormhole_put(ref, kv); << block here forever +} +``` + +To avoid this scenario, thread A should either call `wormhole_park(ref)` before acquiring the lock, or keep updating the qstate of the ref: +```C +// Solution A.1: use wormhole_park() +{ + struct wormref * ref = wormhole_ref(wh); + wormhole_park(ref); + lock(just_a_lock); + wormhole_resume(ref); // can use ref afterward +} + +// Solution A.2: use try_lock and wormhole_refresh_qstate() +{ + struct wormref * ref = wormhole_ref(wh); + while (!try_lock(just_a_lock)) { + wormhole_refresh_qstate(ref); + } + // continue to use ref +} +``` + +The above issues with QSBR are specific to the `wormhole` API. `whsafe` does not have these issues. + +### The `whunsafe` API +A set of *thread-unsafe* functions are also provided. See the functions with _prefix_ `whunsafe`. +The thread-unsafe functions don't use the reference (_wormref_). +Simply feed them with the pointer to the wormhole index: + +```C +{ + wh = whunsafe_create(NULL); + for (...) { + whunsafe_put(wh, ...); + whunsafe_get(wh, ...); + whunsafe_del(wh, ...); + ... // other unsafe operations + } + ... // other unsafe operations + wormhole_destroy(wh); +} +``` + +### In-place update with user-defined function +`wormhole_inp` executes a user-defined function on an existing key-value item. +If the key does not exist, a NULL pointer will be passed to the user-defined function. +A simple example would be incrementing a counter stored in a key-value pair. + +```C +{ + // user-defined in-place update function + void myadd1(struct kv * kv, void * priv) { + if (kv != NULL) { + assert(kv->vlen >= sizeof(u64)); + u64 * pvalue = kv_vptr(kv); + (*pvalue)++; + } + } + + // create the counter + u64 zero = 0; + struct kv * tmp = kv_create("counter", 7, &zero, 8); // malloc-ed + wormhole_put(ref, tmp); + + // perform +1 on the stored value + struct kref kref = kv_ref(tmp); // create a kref of tmp + wormhole_inp(ref, &kref, myadd1, NULL); +} +``` + +Note that the user-defined function should ONLY change the value's content, and nothing else. +Otherwise, the index can be corrupted. +A similar mechanism is also provided for iterators (`wormhole_iter_inp`). + +The inplace function can also be used to retrieve key-value data. For example: + +```C +{ + void inplace_getu64(struct kv * kv, void * priv) { + if (kv != NULL) { + assert(kv->vlen >= sizeof(u64)); + u64 * pvalue = kv_vptr(kv); + *(u64 *)priv = *pvalue; + } else { + *(u64 *)priv = 0; + } + } + ... + struct kref kref = ... + u64 val; + wormhole_inp(ref, &kref, inplace_getu64, &val); +} +``` + +### `merge`: atomic Read-Modify-Write +The `wormhole_merge` and `whsafe_merge` functions perform atomic Read-Modify-Write (RMW) operations. +In a RMW operation, if the search key is found, the KV pair will be passed to a user-defined callback function `uf` (short for user function). +Otherwise, a NULL pointer is passed to `uf`. +`uf` could update the KV in-place if it does not require any memory reallocation. +In such a case, `uf` should return the KV's pointer back and the merge function will do nothing else. +If `uf` want to replace the KV with something new, it should return a pointer that is different than the original KV pointer. +The `uf` should not make memory allocation by itself. +Instead, the `merge` function will copy the returned KV and replace the existing KV with the newly created one. +`uf` should not return NULL unless the key was not found. + +### Iterator +The `wormhole_iter_{seek,peek,skip,next,inp}` functions provide range-search functionalities. +If the search key does not exist, the `seek` operation will put the cursor on the item that is greater than the search-key. +`next` will return the item under the current cursor and move the cursor forward. +`peek` is similar but does not move the cursor. For example, with keys `{1,3,5}`, `seek(2); r = next()` will see `r == 3`. + +Currently Wormhole does not provide `seek_for_less_equal()` and `prev()` for backward scanning. This feature will be added in the future. + +# Memory management + +By default, Wormhole manages all the key-value data internally and only copies to or from a user-supplied +buffer (a `struct kv` object). +This draws a clear boundary in the memory space between the index structure and its users. +After a call to any of the index operations, the caller can immediately free +the buffer holding the key-reference or the key-value data. +This also allows users to use stack-allocated variables to interact with Wormhole. + +The memory manager of the internal key-value objects can be customized when creating a new Wormhole (see `wormhole_create`). +The customization will _only_ affect the internal `struct kv` objects. +Actually, the memory manager can be configured to directly use the caller's `struct kv` object and store it in Wormhole. +This `struct kvmap_mm` structure shows an example: + +```C +{ + const struct kvmap_mm kvmap_mm_ualloc { + .in = kvmap_mm_in_noop, // in wormhole_put(), store caller's kv in wh + .out = kvmap_mm_out_dup, // but still make a copy in wormhole_get() + .free = kvmap_mm_free_free, // call free() for delete/update + }; + ... + struct wormhole * wh = wormhole_create(&kvmap_mm_ualloc); + struct wormref * ref = wormhole_ref(wh); + ... + struct kv * newkv = malloc(size); + ... + wormhole_put(ref, newkv); + // Don't free newkv! it's now managed by wh +} +``` + +Each of the in/out/free functions can be freely customized. +A few `kvmap_mm_*` functions are already provided for common scenarios. +`kvmap_mm_ndf` is identical to the `kvmap_mm_ualloc` structure in the above example. + +## Hugepages +Wormhole uses hugepages when available. To reserve some hugepages in Linux (10000 * 2MB): + + # echo 10000 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + +# Tuning + +A few macros in `wh.c` can be tuned. + +* `WH_SLABLEAF_SIZE` controls the slab size for leaf node allocation. +The default is `((1lu << 21))` (2MB slabs). If 1GB hugepages are available, `WH_SLABLEAF_SIZE` can be set to `((1lu << 30))` to utilize 1GB hugepages. +Using 1GB hugepages can improve search performance on a large dataset. + +* `WH_KPN` controls "Keys Per (leaf-)Node". The default value is 128. +Compared to the default, `WH_KPN=256` can offer 5-10%+ higher point query and update speed. +However, range queries prefer a smaller node size such as 64. + + +* `QSBR_STATES_NR` and `QSBR_SHARDS_NR` control the capacity (number of active references) of the QSBR RCU. +The product of the two values is the capacity. For efficiency, `QSBR_STATES_NR` can be set to 23, 39, and 55, and `QSBR_SHARDS_NR` must be 2^n, n<=6. +The defaults are 23 and 32, respectively. The QSBR registry can run out of space if there are a few hundred of threads, which is not a problem in practice. + +# Limitations + +## Key Patterns +A **split** operation will fail when **129** (`WH_KPN + 1`) keys share a common prefix of 65535+ bytes. +In Wormhole, the maximum _anchor-key_ length is 65535 (2^16) bytes, which is shorter than the maximum key-length (2^32). + +## Memory Allocation +Insertions/updates can fail and return false when a memory allocation fails. +On memory-allocation failure, the hash-table expansion function will block and wait for available memory. + +# Performance +Some benchmarking results with some real-world datasets: See [this](https://github.com/wuxb45/wormhole/issues/5) page for more information. + +![Concurrent GET](https://user-images.githubusercontent.com/564235/112712778-704d7200-8e9f-11eb-9f4d-795de46772d1.png) diff --git a/run/MassTrie-beta/wormhole/README.txt b/run/MassTrie-beta/wormhole/README.txt new file mode 100644 index 00000000..e70108ef --- /dev/null +++ b/run/MassTrie-beta/wormhole/README.txt @@ -0,0 +1,31 @@ +To setup the project: + +If you're not already in the folder 'wormhole', preform: + +1. cd wormhole + +Once you're there, set the variable LD_LIBRARY_PATH to the +current working directory using: + +2. setenv LD_LIBRARY_PATH `pwd` + +You can check (optionally) that this operation was exceuted properly using: + +3. echo $LD_LIBRARY_PATH + + +Then, do: + +4. cd sto + +5. /./bootstrap.sh + +6. ./configure + +To run the test file do: + +7. make unit-testMTrie + +Then run it using: + +8. ./unit-test_MTrie diff --git a/run/MassTrie-beta/wormhole/concbench.c b/run/MassTrie-beta/wormhole/concbench.c new file mode 100644 index 00000000..f18abde9 --- /dev/null +++ b/run/MassTrie-beta/wormhole/concbench.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2018-2019 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "lib.h" +#include "kv.h" +#include "wh.h" + +atomic_uint_least64_t __seqno = 0; +u64 __nth = 0; +struct kv ** __samples = NULL; +u64 __nkeys = 0; +atomic_uint_least64_t __tot = 0; +u64 __endtime = 0; + + static void * +kv_load_worker(struct wormhole * const wh) +{ + srandom_u64(time_nsec() * time_nsec()); + struct wormref * const ref = wormhole_ref(wh); + const u64 seq = atomic_fetch_add(&__seqno, 1); + const u64 n0 = __nkeys / __nth * seq; + const u64 nz = (seq == (__nth - 1)) ? __nkeys : (__nkeys / __nth * (seq + 1)); + printf("load worker %lu %lu\n", n0, nz); + for (u64 i = n0; i < nz; i++) + wormhole_put(ref, __samples[i]); + wormhole_unref(ref); + return NULL; +} + + static void * +kv_probe_worker(struct wormhole * const wh) +{ + struct wormref * const ref = wormhole_ref(wh); + struct kv * next = __samples[random_u64() % __nkeys]; + u64 rnext = random_u64() % __nkeys; + u64 count = 0; + u64 succ = 0; +#define BATCH ((10000)) + do { + for (u64 i = 0; i < BATCH; i++) { + // reading kv samples leads to unnecessary cache misses + // use prefetch to minimize overhead on workload generation + struct kv * const key = next; + next = __samples[rnext]; + __builtin_prefetch(next, 0, 0); + __builtin_prefetch(((u8 *)next) + 64, 0, 0); + rnext = random_u64() % __nkeys; + __builtin_prefetch(&(__samples[rnext])); + + // do probe + // customize your benchmark: do a mix of wh operations with switch-cases + const struct kref kref = kv_kref(key); + if (wormhole_probe(ref, &kref)) + succ++; + } + count += BATCH; + } while (time_nsec() < __endtime); + if (count != succ) + printf("count %lu success %lu\n", count, succ); + (void)atomic_fetch_add(&__tot, count); + wormhole_unref(ref); + return NULL; +} + + int +main(int argc, char ** argv) +{ + if (argc < 3) { + printf("usage: <#keys> <#threads>\n"); + printf(" Get words.txt: wget https://github.com/dwyl/english-words/raw/master/words.txt\n"); + printf(" Example: %s words.txt 1000000 4\n", argv[0]); + printf(" Better to use only one numa node with numactl -N 0\n"); + printf(" Better to run X thread on X cores\n"); + return 0; + } + + char ** const words = malloc(sizeof(char *) * 1000000); // or `wc -l words.txt` + u64 nr_words = 0; + char * buf = malloc(8192); + size_t bufsize = 8192; + FILE * const fwords = fopen(argv[1], "r"); + if (fwords == NULL) { + printf("open words file failed\n"); + return 0; + } + + // read all words to words + while (getline(&buf, &bufsize, fwords) > 0) { + buf[strlen(buf)-1] = '\0'; + words[nr_words] = strdup(buf); + nr_words++; + } + fclose(fwords); + + // generate keys + const u64 nkeys = strtoull(argv[2], NULL, 10); + struct kv ** const samples = malloc(sizeof(struct kv *) * nkeys); + char * ss[6]; + for (u64 i = 0; i < nkeys; i++) { + for (u64 j = 0; j < 6; j++) + ss[j] = words[random() % nr_words]; + sprintf(buf, "%s %s %s %s %s %s!", ss[0], ss[1], ss[2], ss[3], ss[4], ss[5]); + samples[i] = kv_create_str(buf, NULL, 0); + } + // free words & buf + for (u64 i = 0; i < nr_words; i++) + free(words[i]); + free(words); + free(buf); + + // load (4) + __samples = samples; + __nkeys = nkeys; + struct wormhole * const wh = wormhole_create(NULL); + __nth = 4; + const u64 dtl = thread_fork_join(4, (void *)kv_load_worker, false, (void *)wh); + printf("load x4 %.2lf mops\n", ((double)nkeys) * 1e3 / ((double)dtl)); + + const u64 nth = strtoull(argv[3], NULL, 10); + printf("probe with %lu threads. each round takes 3 seconds\n", nth); + for (u64 i = 0; i < 3; i++) { + __tot = 0; + __endtime = time_nsec() + 3e9; // 3 sec + const u64 dt = thread_fork_join(nth, (void *)kv_probe_worker, false, (void *)wh); + const double mops = ((double)__tot) * 1e3 / ((double)dt); + printf("probe x%lu %.2lf mops\n", nth, mops); + sleep(1); + } + + // final clean up for valgrind + for (u64 i = 0; i < nkeys; i++) + free(samples[i]); + free(samples); + wormhole_destroy(wh); + return 0; +} diff --git a/run/MassTrie-beta/wormhole/concbench.out b/run/MassTrie-beta/wormhole/concbench.out new file mode 100644 index 00000000..ee87ca31 Binary files /dev/null and b/run/MassTrie-beta/wormhole/concbench.out differ diff --git a/run/MassTrie-beta/wormhole/ctypes.h b/run/MassTrie-beta/wormhole/ctypes.h new file mode 100644 index 00000000..314ca5dc --- /dev/null +++ b/run/MassTrie-beta/wormhole/ctypes.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +// C types only; C++ source code don't use this + +#include +#include + +/* C11 atomic types */ +typedef atomic_bool abool; + +typedef atomic_uchar au8; +typedef atomic_ushort au16; +typedef atomic_uint au32; +typedef atomic_ulong au64; +static_assert(sizeof(au8) == 1, "sizeof(au8)"); +static_assert(sizeof(au16) == 2, "sizeof(au16)"); +static_assert(sizeof(au32) == 4, "sizeof(au32)"); +static_assert(sizeof(au64) == 8, "sizeof(au64)"); + +typedef atomic_char as8; +typedef atomic_short as16; +typedef atomic_int as32; +typedef atomic_long as64; +static_assert(sizeof(as8) == 1, "sizeof(as8)"); +static_assert(sizeof(as16) == 2, "sizeof(as16)"); +static_assert(sizeof(as32) == 4, "sizeof(as32)"); +static_assert(sizeof(as64) == 8, "sizeof(as64)"); + +// shorten long names +#define MO_RELAXED memory_order_relaxed +#define MO_CONSUME memory_order_consume +#define MO_ACQUIRE memory_order_acquire +#define MO_RELEASE memory_order_release +#define MO_ACQ_REL memory_order_acq_rel +#define MO_SEQ_CST memory_order_seq_cst diff --git a/run/MassTrie-beta/wormhole/easydemo.c b/run/MassTrie-beta/wormhole/easydemo.c new file mode 100644 index 00000000..f095a6ac --- /dev/null +++ b/run/MassTrie-beta/wormhole/easydemo.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE +#include + +#include "lib.h" +#include "kv.h" +#include "wh.h" + + int +main(int argc, char ** argv) +{ + (void)argc; + (void)argv; + struct wormhole * const wh = wh_create(); + struct wormref * const ref = wh_ref(wh); + + bool r; + + r = wh_put(ref, "wormhole", 8, "easy", 4); + printf("wh_put wormhole easy %c\n", r?'T':'F'); + + r = wh_put(ref, "time_travel", 11, "impossible", 10); + printf("wh_put time_travel impossible %c\n", r?'T':'F'); + + r = wh_del(ref, "time_travel", 11); + printf("wh_del time_travel %c\n", r?'T':'F'); + + r = wh_probe(ref, "time_travel", 11); + printf("wh_probe time_travel %c\n", r?'T':'F'); + + u32 klen_out = 0; + char kbuf_out[8] = {}; + u32 vlen_out = 0; + char vbuf_out[8] = {}; + r = wh_get(ref, "wormhole", 8, vbuf_out, 8, &vlen_out); + printf("wh_get wormhole %c %u %.*s\n", r?'T':'F', vlen_out, vlen_out, vbuf_out); + + // in a concurrent environment, the kvmap_api_wormhole need park&resume when a thread is about to go idle + // don't need park&resume if you're using the default kvmap_api_whsafe in whwh.c! + wh_park(ref); + usleep(10); + wh_resume(ref); + + // prepare a few keys for range ops + wh_put(ref, "00", 2, "0_value", 7); + wh_put(ref, "11", 2, "1_value", 7); + wh_put(ref, "22", 2, "2_value", 7); + + struct wormhole_iter * const iter = wh_iter_create(ref); + + wh_iter_seek(iter, NULL, 0); // seek to the head + printf("wh_iter_seek \"\"\n"); + while (wh_iter_valid(iter)) { + r = wh_iter_peek(iter, kbuf_out, 8, &klen_out, vbuf_out, 8, &vlen_out); + if (r) { + printf("wh_iter_peek klen=%u key=%.*s vlen=%u value=%.*s\n", + klen_out, klen_out, kbuf_out, vlen_out, vlen_out, vbuf_out); + } else { + printf("ERROR!\n"); + } + wh_iter_skip1(iter); + } + + // call iter_park if you will go idle but want to use the iter later + // don't need to call iter_park if you're actively using iter + wh_iter_park(iter); + usleep(10); + + wh_iter_seek(iter, "0", 1); + printf("wh_iter_seek \"0\"\n"); + // this time we don't want to copy the value + r = wh_iter_peek(iter, kbuf_out, 8, &klen_out, NULL, 0, NULL); + if (r){ + printf("wh_iter_peek klen=%u key=%.*s\n", klen_out, klen_out, kbuf_out); + } else { + printf("ERROR: iter_peek failed\n"); + } + + wh_iter_destroy(iter); + // there must be no active iter when calling unref() + wh_unref(ref); + + // unsafe operations: should have released all references + wh_clean(wh); // just for demonstration + wh_destroy(wh); // destroy also calls clean interally + return 0; +} diff --git a/run/MassTrie-beta/wormhole/easydemo.out b/run/MassTrie-beta/wormhole/easydemo.out new file mode 100644 index 00000000..32521210 Binary files /dev/null and b/run/MassTrie-beta/wormhole/easydemo.out differ diff --git a/run/MassTrie-beta/wormhole/kv.c b/run/MassTrie-beta/wormhole/kv.c new file mode 100644 index 00000000..a1720e88 --- /dev/null +++ b/run/MassTrie-beta/wormhole/kv.c @@ -0,0 +1,1131 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include // static_assert +#include +#include "lib.h" +#include "ctypes.h" +#include "kv.h" +// }}} headers + +// crc32c {{{ + inline u32 +kv_crc32c(const void * const ptr, u32 len) +{ + return crc32c_inc((const u8 *)ptr, len, KV_CRC32C_SEED); +} + + inline u64 +kv_crc32c_extend(const u32 lo) +{ + const u64 hi = (u64)(~lo); + return (hi << 32) | ((u64)lo); +} +// }}} crc32c + +// kv {{{ + +// size {{{ + inline size_t +kv_size(const struct kv * const kv) +{ + return sizeof(*kv) + kv->klen + kv->vlen; +} + + inline size_t +kv_size_align(const struct kv * const kv, const u64 align) +{ + debug_assert(align && ((align & (align - 1)) == 0)); + return (sizeof(*kv) + kv->klen + kv->vlen + (align - 1)) & (~(align - 1)); +} + + inline size_t +key_size(const struct kv *const key) +{ + return sizeof(*key) + key->klen; +} + + inline size_t +key_size_align(const struct kv *const key, const u64 align) +{ + debug_assert(align && ((align & (align - 1)) == 0)); + return (sizeof(*key) + key->klen + (align - 1)) & (~(align - 1)); +} +// }}} size + +// construct {{{ + inline void +kv_update_hash(struct kv * const kv) +{ + const u32 lo = kv_crc32c((const void *)kv->kv, kv->klen); + kv->hash = kv_crc32c_extend(lo); +} + + inline void +kv_refill_value(struct kv * const kv, const void * const value, const u32 vlen) +{ + debug_assert((vlen == 0) || value); + memcpy(&(kv->kv[kv->klen]), value, vlen); + kv->vlen = vlen; +} + + inline void +kv_refill(struct kv * const kv, const void * const key, const u32 klen, + const void * const value, const u32 vlen) +{ + debug_assert(kv); + kv->klen = klen; + memcpy(&(kv->kv[0]), key, klen); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_str(struct kv * const kv, const char * const key, + const void * const value, const u32 vlen) +{ + kv_refill(kv, key, (u32)strlen(key), value, vlen); +} + + inline void +kv_refill_str_str(struct kv * const kv, const char * const key, + const char * const value) +{ + kv_refill(kv, key, (u32)strlen(key), value, (u32)strlen(value)); +} + +// the u64 key is filled in big-endian byte order for correct ordering + inline void +kv_refill_u64(struct kv * const kv, const u64 key, const void * const value, const u32 vlen) +{ + kv->klen = sizeof(u64); + *(u64 *)(kv->kv) = __builtin_bswap64(key); // bswap on little endian + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex32(struct kv * const kv, const u32 hex, const void * const value, const u32 vlen) +{ + kv->klen = 8; + strhex_32(kv->kv, hex); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex64(struct kv * const kv, const u64 hex, const void * const value, const u32 vlen) +{ + kv->klen = 16; + strhex_64(kv->kv, hex); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex64_klen(struct kv * const kv, const u64 hex, + const u32 klen, const void * const value, const u32 vlen) +{ + strhex_64(kv->kv, hex); + if (klen > 16) { + kv->klen = klen; + memset(kv->kv + 16, '!', klen - 16); + } else { + kv->klen = 16; + } + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_kref(struct kv * const kv, const struct kref * const kref) +{ + kv->klen = kref->len; + kv->vlen = 0; + kv->hash = kv_crc32c_extend(kref->hash32); + memmove(kv->kv, kref->ptr, kref->len); +} + + inline void +kv_refill_kref_v(struct kv * const kv, const struct kref * const kref, + const void * const value, const u32 vlen) +{ + kv->klen = kref->len; + kv->vlen = vlen; + kv->hash = kv_crc32c_extend(kref->hash32); + memmove(kv->kv, kref->ptr, kref->len); + memcpy(kv->kv + kv->klen, value, vlen); +} + + inline struct kref +kv_kref(const struct kv * const key) +{ + return (struct kref){.ptr = key->kv, .len = key->klen, .hash32 = key->hashlo}; +} + + inline struct kv * +kv_create(const void * const key, const u32 klen, const void * const value, const u32 vlen) +{ + struct kv * const kv = malloc(sizeof(*kv) + klen + vlen); + if (kv) + kv_refill(kv, key, klen, value, vlen); + return kv; +} + + inline struct kv * +kv_create_str(const char * const key, const void * const value, const u32 vlen) +{ + return kv_create(key, (u32)strlen(key), value, vlen); +} + + inline struct kv * +kv_create_str_str(const char * const key, const char * const value) +{ + return kv_create(key, (u32)strlen(key), value, (u32)strlen(value)); +} + + inline struct kv * +kv_create_kref(const struct kref * const kref, const void * const value, const u32 vlen) +{ + return kv_create(kref->ptr, kref->len, value, vlen); +} + +static struct kv __kv_null = {}; + +__attribute__((constructor)) + static void +kv_null_init(void) +{ + kv_update_hash(&__kv_null); +} + + inline const struct kv * +kv_null(void) +{ + return &__kv_null; +} +// }}} construct + +// dup {{{ + inline struct kv * +kv_dup(const struct kv * const kv) +{ + if (kv == NULL) + return NULL; + + const size_t sz = kv_size(kv); + struct kv * const new = malloc(sz); + if (new) + memcpy(new, kv, sz); + return new; +} + + inline struct kv * +kv_dup_key(const struct kv * const kv) +{ + if (kv == NULL) + return NULL; + + const size_t sz = key_size(kv); + struct kv * const new = malloc(sz); + if (new) { + memcpy(new, kv, sz); + new->vlen = 0; + } + return new; +} + + inline struct kv * +kv_dup2(const struct kv * const from, struct kv * const to) +{ + if (from == NULL) + return NULL; + const size_t sz = kv_size(from); + struct kv * const new = to ? to : malloc(sz); + if (new) + memcpy(new, from, sz); + return new; +} + + inline struct kv * +kv_dup2_key(const struct kv * const from, struct kv * const to) +{ + if (from == NULL) + return NULL; + const size_t sz = key_size(from); + struct kv * const new = to ? to : malloc(sz); + if (new) { + memcpy(new, from, sz); + new->vlen = 0; + } + return new; +} + + inline struct kv * +kv_dup2_key_prefix(const struct kv * const from, struct kv * const to, const u32 plen) +{ + if (from == NULL) + return NULL; + debug_assert(plen <= from->klen); + const size_t sz = key_size(from) - from->klen + plen; + struct kv * const new = to ? to : malloc(sz); + if (new) { + new->klen = plen; + memcpy(new->kv, from->kv, plen); + new->vlen = 0; + kv_update_hash(new); + } + return new; +} +// }}} dup + +// compare {{{ + static inline int +klen_compare(const u32 len1, const u32 len2) +{ + if (len1 < len2) + return -1; + else if (len1 > len2) + return 1; + else + return 0; +} + +// compare whether the two keys are identical +// optimistic: do not check hash + inline bool +kv_match(const struct kv * const key1, const struct kv * const key2) +{ + //cpu_prefetch0(((u8 *)key2) + 64); + //return (key1->hash == key2->hash) + // && (key1->klen == key2->klen) + // && (!memcmp(key1->kv, key2->kv, key1->klen)); + return (key1->klen == key2->klen) && (!memcmp(key1->kv, key2->kv, key1->klen)); +} + +// compare whether the two keys are identical +// check hash first +// pessimistic: return false quickly if their hashes mismatch + inline bool +kv_match_hash(const struct kv * const key1, const struct kv * const key2) +{ + return (key1->hash == key2->hash) + && (key1->klen == key2->klen) + && (!memcmp(key1->kv, key2->kv, key1->klen)); +} + + inline bool +kv_match_full(const struct kv * const kv1, const struct kv * const kv2) +{ + return (kv1->kvlen == kv2->kvlen) + && (!memcmp(kv1, kv2, sizeof(*kv1) + kv1->klen + kv1->vlen)); +} + + bool +kv_match_kv128(const struct kv * const sk, const u8 * const kv128) +{ + debug_assert(sk); + debug_assert(kv128); + + u32 klen128 = 0; + u32 vlen128 = 0; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(kv128, &klen128), &vlen128); + (void)vlen128; + return (sk->klen == klen128) && (!memcmp(sk->kv, pdata, klen128)); +} + + inline int +kv_compare(const struct kv * const kv1, const struct kv * const kv2) +{ + const u32 len = kv1->klen < kv2->klen ? kv1->klen : kv2->klen; + const int cmp = memcmp(kv1->kv, kv2->kv, (size_t)len); + return cmp ? cmp : klen_compare(kv1->klen, kv2->klen); +} + +// for qsort and bsearch + static int +kv_compare_ptrs(const void * const p1, const void * const p2) +{ + const struct kv * const * const pp1 = (typeof(pp1))p1; + const struct kv * const * const pp2 = (typeof(pp2))p2; + return kv_compare(*pp1, *pp2); +} + + int +kv_k128_compare(const struct kv * const sk, const u8 * const k128) +{ + debug_assert(sk); + const u32 klen1 = sk->klen; + u32 klen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(k128, &klen2); + debug_assert(ptr2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->kv, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + + int +kv_kv128_compare(const struct kv * const sk, const u8 * const kv128) +{ + debug_assert(sk); + const u32 klen1 = sk->klen; + u32 klen2 = 0; + u32 vlen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(vi128_decode_u32(kv128, &klen2), &vlen2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->kv, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + + inline void +kv_qsort(struct kv ** const kvs, const size_t nr) +{ + qsort(kvs, nr, sizeof(kvs[0]), kv_compare_ptrs); +} + +// return the length of longest common prefix of the two keys + inline u32 +kv_key_lcp(const struct kv * const key1, const struct kv * const key2) +{ + const u32 max = (key1->klen < key2->klen) ? key1->klen : key2->klen; + return memlcp(key1->kv, key2->kv, max); +} + +// return the length of longest common prefix of the two keys with a known lcp0 + inline u32 +kv_key_lcp_skip(const struct kv * const key1, const struct kv * const key2, const u32 lcp0) +{ + const u32 max = (key1->klen < key2->klen) ? key1->klen : key2->klen; + debug_assert(max >= lcp0); + return lcp0 + memlcp(key1->kv+lcp0, key2->kv+lcp0, max-lcp0); +} +// }}} + +// psort {{{ + static inline void +kv_psort_exchange(struct kv ** const kvs, const u64 i, const u64 j) +{ + if (i != j) { + struct kv * const tmp = kvs[i]; + kvs[i] = kvs[j]; + kvs[j] = tmp; + } +} + + static u64 +kv_psort_partition(struct kv ** const kvs, const u64 lo, const u64 hi) +{ + if (lo >= hi) + return lo; + + const u64 p = (lo+hi) >> 1; + kv_psort_exchange(kvs, lo, p); + u64 i = lo; + u64 j = hi + 1; + do { + while (kv_compare(kvs[++i], kvs[lo]) < 0 && i < hi); + while (kv_compare(kvs[--j], kvs[lo]) > 0); + if (i >= j) + break; + kv_psort_exchange(kvs, i, j); + } while (true); + kv_psort_exchange(kvs, lo, j); + return j; +} + + static void +kv_psort_rec(struct kv ** const kvs, const u64 lo, const u64 hi, const u64 tlo, const u64 thi) +{ + if (lo >= hi) + return; + const u64 c = kv_psort_partition(kvs, lo, hi); + + if (c > tlo) // go left + kv_psort_rec(kvs, lo, c-1, tlo, thi); + + if (c < thi) // go right + kv_psort_rec(kvs, c+1, hi, tlo, thi); +} + + inline void +kv_psort(struct kv ** const kvs, const u64 nr, const u64 tlo, const u64 thi) +{ + debug_assert(tlo <= thi); + debug_assert(thi < nr); + kv_psort_rec(kvs, 0, nr-1, tlo, thi); +} +// }}} psort + +// ptr {{{ + inline void * +kv_vptr(struct kv * const kv) +{ + return (void *)(&(kv->kv[kv->klen])); +} + + inline void * +kv_kptr(struct kv * const kv) +{ + return (void *)(&(kv->kv[0])); +} + + inline const void * +kv_vptr_c(const struct kv * const kv) +{ + return (const void *)(&(kv->kv[kv->klen])); +} + + inline const void * +kv_kptr_c(const struct kv * const kv) +{ + return (const void *)(&(kv->kv[0])); +} +// }}} ptr + +// print {{{ +// cmd "KV" K and V can be 's': string, 'x': hex, 'd': dec, or else for not printing. +// n for newline after kv + void +kv_print(const struct kv * const kv, const char * const cmd, FILE * const out) +{ + debug_assert(cmd); + const u32 klen = kv->klen; + fprintf(out, "#%016lx k[%3u]", kv->hash, klen); + + switch(cmd[0]) { + case 's': fprintf(out, " %.*s", klen, kv->kv); break; + case 'x': str_print_hex(out, kv->kv, klen); break; + case 'd': str_print_dec(out, kv->kv, klen); break; + default: break; + } + + const u32 vlen = kv->vlen; + switch (cmd[1]) { + case 's': fprintf(out, " v[%4u] %.*s", vlen, vlen, kv->kv+klen); break; + case 'x': fprintf(out, " v[%4u]", vlen); str_print_hex(out, kv->kv+klen, vlen); break; + case 'd': fprintf(out, " v[%4u]", vlen); str_print_dec(out, kv->kv+klen, vlen); break; + default: break; + } + if (strchr(cmd, 'n')) + fprintf(out, "\n"); +} +// }}} print + +// mm {{{ + struct kv * +kvmap_mm_in_noop(struct kv * const kv, void * const priv) +{ + (void)priv; + return kv; +} + +// copy-out + struct kv * +kvmap_mm_out_noop(struct kv * const kv, struct kv * const out) +{ + (void)out; + return kv; +} + + void +kvmap_mm_free_noop(struct kv * const kv, void * const priv) +{ + (void)kv; + (void)priv; +} + +// copy-in + struct kv * +kvmap_mm_in_dup(struct kv * const kv, void * const priv) +{ + (void)priv; + return kv_dup(kv); +} + +// copy-out + struct kv * +kvmap_mm_out_dup(struct kv * const kv, struct kv * const out) +{ + return kv_dup2(kv, out); +} + + void +kvmap_mm_free_free(struct kv * const kv, void * const priv) +{ + (void)priv; + free(kv); +} + +const struct kvmap_mm kvmap_mm_dup = { + .in = kvmap_mm_in_dup, + .out = kvmap_mm_out_dup, + .free = kvmap_mm_free_free, + .priv = NULL, +}; + +const struct kvmap_mm kvmap_mm_ndf = { + .in = kvmap_mm_in_noop, + .out = kvmap_mm_out_dup, + .free = kvmap_mm_free_free, + .priv = NULL, +}; + +// }}} mm + +// kref {{{ + inline void +kref_ref_raw(struct kref * const kref, const u8 * const ptr, const u32 len) +{ + kref->ptr = ptr; + kref->len = len; + kref->hash32 = 0; +} + + inline void +kref_ref_hash32(struct kref * const kref, const u8 * const ptr, const u32 len) +{ + kref->ptr = ptr; + kref->len = len; + kref->hash32 = kv_crc32c(ptr, len); +} + + inline void +kref_update_hash32(struct kref * const kref) +{ + kref->hash32 = kv_crc32c(kref->ptr, kref->len); +} + + inline void +kref_ref_kv(struct kref * const kref, const struct kv * const kv) +{ + kref->ptr = kv->kv; + kref->len = kv->klen; + kref->hash32 = kv->hashlo; +} + + inline void +kref_ref_kv_hash32(struct kref * const kref, const struct kv * const kv) +{ + kref->ptr = kv->kv; + kref->len = kv->klen; + kref->hash32 = kv_crc32c(kv->kv, kv->klen); +} + + inline bool +kref_match(const struct kref * const k1, const struct kref * const k2) +{ + return (k1->len == k2->len) && (!memcmp(k1->ptr, k2->ptr, k1->len)); +} + +// match a kref and a key + inline bool +kref_kv_match(const struct kref * const kref, const struct kv * const k) +{ + return (kref->len == k->klen) && (!memcmp(kref->ptr, k->kv, kref->len)); +} + + inline int +kref_compare(const struct kref * const kref1, const struct kref * const kref2) +{ + const u32 len = kref1->len < kref2->len ? kref1->len : kref2->len; + const int cmp = memcmp(kref1->ptr, kref2->ptr, (size_t)len); + return cmp ? cmp : klen_compare(kref1->len, kref2->len); +} + +// compare a kref and a key + inline int +kref_kv_compare(const struct kref * const kref, const struct kv * const k) +{ + debug_assert(kref); + debug_assert(k); + const u32 len = kref->len < k->klen ? kref->len : k->klen; + const int cmp = memcmp(kref->ptr, k->kv, (size_t)len); + return cmp ? cmp : klen_compare(kref->len, k->klen); +} + + inline u32 +kref_lcp(const struct kref * const k1, const struct kref * const k2) +{ + const u32 max = (k1->len < k2->len) ? k1->len : k2->len; + return memlcp(k1->ptr, k2->ptr, max); +} + + inline u32 +kref_kv_lcp(const struct kref * const kref, const struct kv * const kv) +{ + const u32 max = (kref->len < kv->klen) ? kref->len : kv->klen; + return memlcp(kref->ptr, kv->kv, max); +} + +// klen, key, ... + inline int +kref_k128_compare(const struct kref * const sk, const u8 * const k128) +{ + debug_assert(sk); + const u32 klen1 = sk->len; + u32 klen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(k128, &klen2); + debug_assert(ptr2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->ptr, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + +// klen, vlen, key, ... + inline int +kref_kv128_compare(const struct kref * const sk, const u8 * const kv128) +{ + debug_assert(sk); + const u32 klen1 = sk->len; + u32 klen2 = 0; + u32 vlen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(vi128_decode_u32(kv128, &klen2), &vlen2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->ptr, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + +static struct kref __kref_null = {.hash32 = KV_CRC32C_SEED}; + + inline const struct kref * +kref_null(void) +{ + return &__kref_null; +} +// }}} kref + +// kvref {{{ + inline void +kvref_ref_kv(struct kvref * const ref, struct kv * const kv) +{ + ref->kptr = kv->kv; + ref->vptr = kv->kv + kv->klen; + ref->hdr = *kv; +} + + struct kv * +kvref_dup2_kv(struct kvref * const ref, struct kv * const to) +{ + if (ref == NULL) + return NULL; + const size_t sz = sizeof(*to) + ref->hdr.klen + ref->hdr.vlen; + struct kv * const new = to ? to : malloc(sz); + if (new == NULL) + return NULL; + + *new = ref->hdr; + memcpy(new->kv, ref->kptr, new->klen); + memcpy(new->kv + new->klen, ref->vptr, new->vlen); + return new; +} + + struct kv * +kvref_dup2_key(struct kvref * const ref, struct kv * const to) +{ + if (ref == NULL) + return NULL; + const size_t sz = sizeof(*to) + ref->hdr.klen; + struct kv * const new = to ? to : malloc(sz); + if (new == NULL) + return NULL; + + *new = ref->hdr; + memcpy(new->kv, ref->kptr, new->klen); + return new; +} + + int +kvref_kv_compare(const struct kvref * const ref, const struct kv * const kv) +{ + const u32 len = ref->hdr.klen < kv->klen ? ref->hdr.klen : kv->klen; + const int cmp = memcmp(ref->kptr, kv->kv, (size_t)len); + return cmp ? cmp : klen_compare(ref->hdr.klen, kv->klen); +} +// }}} kvref + +// kv128 {{{ +// estimate the encoded size + inline size_t +kv128_estimate_kv(const struct kv * const kv) +{ + return vi128_estimate_u32(kv->klen) + vi128_estimate_u32(kv->vlen) + kv->klen + kv->vlen; +} + +// create a kv128 from kv + u8 * +kv128_encode_kv(const struct kv * const kv, u8 * const out, size_t * const pesize) +{ + u8 * const ptr = out ? out : malloc(kv128_estimate_kv(kv)); + if (!ptr) + return NULL; + + u8 * const pdata = vi128_encode_u32(vi128_encode_u32(ptr, kv->klen), kv->vlen); + memcpy(pdata, kv->kv, kv->klen + kv->vlen); + + if (pesize) + *pesize = (size_t)(pdata - ptr) + kv->klen + kv->vlen; + return ptr; // return the head of the encoded kv128 +} + +// dup kv128 to a kv + struct kv * +kv128_decode_kv(const u8 * const ptr, struct kv * const out, size_t * const pesize) +{ + u32 klen, vlen; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(ptr, &klen), &vlen); + struct kv * const ret = out ? out : malloc(sizeof(struct kv) + klen + vlen); + if (ret) + kv_refill(ret, pdata, klen, pdata + klen, vlen); + + if (pesize) + *pesize = (size_t)(pdata - ptr) + klen + vlen; + return ret; // return the kv +} + + inline size_t +kv128_size(const u8 * const ptr) +{ + u32 klen, vlen; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(ptr, &klen), &vlen); + return ((size_t)(pdata - ptr)) + klen + vlen; +} +// }}} kv128 + +// }}} kv + +// kvmap {{{ + +// registry {{{ +// increase MAX if need more +#define KVMAP_API_MAX ((32)) +static struct kvmap_api_reg kvmap_api_regs[KVMAP_API_MAX]; +static u64 kvmap_api_regs_nr = 0; + + void +kvmap_api_register(const int nargs, const char * const name, const char * const args_msg, + void * (*create)(const char *, const struct kvmap_mm *, char **), const struct kvmap_api * const api) +{ + if (kvmap_api_regs_nr < KVMAP_API_MAX) { + kvmap_api_regs[kvmap_api_regs_nr].nargs = nargs; + kvmap_api_regs[kvmap_api_regs_nr].name = name; + kvmap_api_regs[kvmap_api_regs_nr].args_msg = args_msg; + kvmap_api_regs[kvmap_api_regs_nr].create = create; + kvmap_api_regs[kvmap_api_regs_nr].api = api; + kvmap_api_regs_nr++; + } else { + fprintf(stderr, "%s failed to register [%s]\n", __func__, name); + } +} + void +kvmap_api_helper_message(void) +{ + fprintf(stderr, "%s Usage: api ...\n", __func__); + for (u64 i = 0; i < kvmap_api_regs_nr; i++) { + fprintf(stderr, "%s example: api %s %s\n", __func__, + kvmap_api_regs[i].name, kvmap_api_regs[i].args_msg); + } +} + + int +kvmap_api_helper(int argc, char ** const argv, const struct kvmap_mm * const mm, + const struct kvmap_api ** const api_out, void ** const map_out) +{ + // "api" "name" "arg1", ... + if (argc < 2 || strcmp(argv[0], "api") != 0) + return -1; + + for (u64 i = 0; i < kvmap_api_regs_nr; i++) { + const struct kvmap_api_reg * const reg = &kvmap_api_regs[i]; + if (0 != strcmp(argv[1], reg->name)) + continue; + + if ((argc - 2) < reg->nargs) + return -1; + + void * const map = reg->create(argv[1], mm, argv + 2); // skip "api" "name" + if (map) { + *api_out = reg->api; + *map_out = map; + return 2 + reg->nargs; + } else { + return -1; + } + } + + // no match + return -1; +} +// }}} registry + +// misc {{{ + void +kvmap_inp_steal_kv(struct kv * const kv, void * const priv) +{ + // steal the kv pointer out so we don't need a dangerous get_key_interanl() + if (priv) + *(struct kv **)priv = kv; +} + + inline void * +kvmap_ref(const struct kvmap_api * const api, void * const map) +{ + return api->ref ? api->ref(map) : map; +} + +// return the original map pointer; usually unused by caller + inline void * +kvmap_unref(const struct kvmap_api * const api, void * const ref) +{ + return api->unref ? api->unref(ref) : ref; +} +// }}} misc + +// kvmap_kv_op {{{ + inline struct kv * +kvmap_kv_get(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, struct kv * const out) +{ + const struct kref kref = kv_kref(key); + return api->get(ref, &kref, out); +} + + inline bool +kvmap_kv_probe(const struct kvmap_api * const api, void * const ref, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + return api->probe(ref, &kref); +} + + inline bool +kvmap_kv_put(const struct kvmap_api * const api, void * const ref, + struct kv * const kv) +{ + return api->put(ref, kv); +} + + inline bool +kvmap_kv_del(const struct kvmap_api * const api, void * const ref, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + return api->del(ref, &kref); +} + + inline bool +kvmap_kv_inpr(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->inpr(ref, &kref, uf, priv); +} + + inline bool +kvmap_kv_inpw(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->inpw(ref, &kref, uf, priv); +} + + inline bool +kvmap_kv_merge(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_merge_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->merge(ref, &kref, uf, priv); +} + + inline u64 +kvmap_kv_delr(const struct kvmap_api * const api, void * const ref, + const struct kv * const start, const struct kv * const end) +{ + const struct kref kref0 = kv_kref(start); + if (end) { + const struct kref krefz = kv_kref(end); + return api->delr(ref, &kref0, &krefz); + } else { + return api->delr(ref, &kref0, NULL); + } +} + + inline void +kvmap_kv_iter_seek(const struct kvmap_api * const api, void * const iter, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + api->iter_seek(iter, &kref); +} +// }}} kvmap_kv_op + +// kvmap_raw_op {{{ + inline struct kv * +kvmap_raw_get(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, struct kv * const out) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->get(ref, &kref, out); +} + + inline bool +kvmap_raw_probe(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->probe(ref, &kref); +} + + inline bool +kvmap_raw_del(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->del(ref, &kref); +} + + inline bool +kvmap_raw_inpr(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->inpr(ref, &kref, uf, priv); +} + + inline bool +kvmap_raw_inpw(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->inpw(ref, &kref, uf, priv); +} + + inline void +kvmap_raw_iter_seek(const struct kvmap_api * const api, void * const iter, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + api->iter_seek(iter, &kref); +} +// }}}} kvmap_raw_op + +// dump {{{ + u64 +kvmap_dump_keys(const struct kvmap_api * const api, void * const map, const int fd) +{ + void * const ref = kvmap_ref(api, map); + void * const iter = api->iter_create(ref); + api->iter_seek(iter, kref_null()); + u64 i = 0; + while (api->iter_valid(iter)) { + struct kvref kvref; + api->iter_kvref(iter, &kvref); + dprintf(fd, "%010lu [%3u] %.*s [%u]\n", i, kvref.hdr.klen, kvref.hdr.klen, kvref.kptr, kvref.hdr.vlen); + i++; + api->iter_skip1(iter); + } + api->iter_destroy(iter); + kvmap_unref(api, ref); + return i; +} +// }}} dump + +// kv64 {{{ +struct kv64 { // internal only + struct kv kv; + u64 key_be; // must be in big endian + u64 value; +}; + + inline bool +kvmap_kv64_get(const struct kvmap_api * const api, void * const ref, + const u64 key, u64 * const out) +{ + struct kv64 keybuf, kvout; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + struct kv * const ret = api->get(ref, &kref, &kvout.kv); + if (ret) { + *out = kvout.value; + return true; + } else { + return false; + } +} + + inline bool +kvmap_kv64_probe(const struct kvmap_api * const api, void * const ref, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + return api->probe(ref, &kref); +} + + inline bool +kvmap_kv64_put(const struct kvmap_api * const api, void * const ref, + const u64 key, const u64 value) +{ + struct kv64 kv; + kv.key_be = __builtin_bswap64(key); + kv.value = value; + kv.kv.klen = sizeof(key); + kv.kv.vlen = sizeof(value); + if (api->hashkey) + kv_update_hash(&kv.kv); + + return api->put(ref, &kv.kv); +} + + inline bool +kvmap_kv64_del(const struct kvmap_api * const api, void * const ref, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + return api->del(ref, &kref); +} + + inline void +kvmap_kv64_iter_seek(const struct kvmap_api * const api, void * const iter, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + api->iter_seek(iter, &kref); +} + + inline bool +kvmap_kv64_iter_peek(const struct kvmap_api * const api, void * const iter, + u64 * const key_out, u64 * const value_out) +{ + struct kv64 kvout; + struct kv * const ret = api->iter_peek(iter, &kvout.kv); + if (key_out) + *key_out = __builtin_bswap64(kvout.key_be); // to LE + if (value_out) + *value_out = kvout.value; + return ret != NULL; +} +// }}} kv64 + +// }}} kvmap + +// vim:fdm=marker diff --git a/run/MassTrie-beta/wormhole/kv.h b/run/MassTrie-beta/wormhole/kv.h new file mode 100644 index 00000000..1e251e58 --- /dev/null +++ b/run/MassTrie-beta/wormhole/kv.h @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +// crc32c {{{ +#define KV_CRC32C_SEED ((0xDEADBEEFu)) + + extern u32 +kv_crc32c(const void * const ptr, u32 len); + + extern u64 +kv_crc32c_extend(const u32 crc32c); +// }}} crc32c + +// kv {{{ + +// struct {{{ +/* + * Some internal union names can be ignored: + * struct kv { + * u32 klen; + * u32 vlen; + * u64 hash; + * u8 kv[]; + * }; + */ +struct kv { + union { // the first u64 + u64 kvlen; + struct { + u32 klen; + union { u32 vlen; u32 refcnt; }; + }; + }; + union { + u64 hash; // hashvalue of the key + u64 priv; // can hide a value here if hash is not used + void * privptr; + struct { u32 hashlo; u32 hashhi; }; // little endian + struct { u32 privlo; u32 privhi; }; + }; + u8 kv[0]; // len(kv) == klen + vlen +} __attribute__((packed)); + +struct kref { + u32 len; + union { u32 hash32; u32 priv; }; + const u8 * ptr; +} __attribute__((packed)); + +struct kvref { + const u8 * kptr; // read-only + const u8 * vptr; // read-only + struct kv hdr; // hdr.kv[] is invalid +}; +// }}} struct + +// kv {{{ +typedef int (*kv_kv_cmp_func)(const struct kv *, const struct kv *); + + extern size_t +kv_size(const struct kv * const kv); + + extern size_t +kv_size_align(const struct kv * const kv, const u64 align); + + extern size_t +key_size(const struct kv * const key); + + extern size_t +key_size_align(const struct kv * const key, const u64 align); + + extern void +kv_update_hash(struct kv * const kv); + + extern void +kv_refill_value(struct kv * const kv, const void * const value, const u32 vlen); + + extern void +kv_refill(struct kv * const kv, const void * const key, const u32 klen, + const void * const value, const u32 vlen); + + extern void +kv_refill_str(struct kv * const kv, const char * const key, + const void * const value, const u32 vlen); + + extern void +kv_refill_str_str(struct kv * const kv, const char * const key, + const char * const value); + +// the u64 key is filled in big-endian byte order + extern void +kv_refill_u64(struct kv * const kv, const u64 key, const void * const value, const u32 vlen); + + extern void +kv_refill_hex32(struct kv * const kv, const u32 hex, const void * const value, const u32 vlen); + + extern void +kv_refill_hex64(struct kv * const kv, const u64 hex, const void * const value, const u32 vlen); + + extern void +kv_refill_hex64_klen(struct kv * const kv, const u64 hex, const u32 klen, + const void * const value, const u32 vlen); + + extern void +kv_refill_kref(struct kv * const kv, const struct kref * const kref); + + extern void +kv_refill_kref_v(struct kv * const kv, const struct kref * const kref, + const void * const value, const u32 vlen); + + extern struct kref +kv_kref(const struct kv * const key); + + extern struct kv * +kv_create(const void * const key, const u32 klen, const void * const value, const u32 vlen); + + extern struct kv * +kv_create_str(const char * const key, const void * const value, const u32 vlen); + + extern struct kv * +kv_create_str_str(const char * const key, const char * const value); + + extern struct kv * +kv_create_kref(const struct kref * const kref, const void * const value, const u32 vlen); + +// a static kv with klen == 0 + extern const struct kv * +kv_null(void); + + extern struct kv * +kv_dup(const struct kv * const kv); + + extern struct kv * +kv_dup_key(const struct kv * const kv); + + extern struct kv * +kv_dup2(const struct kv * const from, struct kv * const to); + + extern struct kv * +kv_dup2_key(const struct kv * const from, struct kv * const to); + + extern struct kv * +kv_dup2_key_prefix(const struct kv * const from, struct kv * const to, const u32 plen); + + extern bool +kv_match(const struct kv * const key1, const struct kv * const key2); + + extern bool +kv_match_hash(const struct kv * const key1, const struct kv * const key2); + + extern bool +kv_match_full(const struct kv * const kv1, const struct kv * const kv2); + + extern bool +kv_match_kv128(const struct kv * const sk, const u8 * const kv128); + + extern int +kv_compare(const struct kv * const kv1, const struct kv * const kv2); + + extern int +kv_k128_compare(const struct kv * const sk, const u8 * const k128); + + extern int +kv_kv128_compare(const struct kv * const sk, const u8 * const kv128); + + extern void +kv_qsort(struct kv ** const kvs, const size_t nr); + + extern u32 +kv_key_lcp(const struct kv * const key1, const struct kv * const key2); + + extern u32 +kv_key_lcp_skip(const struct kv * const key1, const struct kv * const key2, const u32 lcp0); + + extern void +kv_psort(struct kv ** const kvs, const u64 nr, const u64 tlo, const u64 thi); + + extern void * +kv_vptr(struct kv * const kv); + + extern void * +kv_kptr(struct kv * const kv); + + extern const void * +kv_vptr_c(const struct kv * const kv); + + extern const void * +kv_kptr_c(const struct kv * const kv); + + extern void +kv_print(const struct kv * const kv, const char * const cmd, FILE * const out); +// }}} kv + +// mm {{{ +typedef struct kv * (* kvmap_mm_in_func)(struct kv * kv, void * priv); +typedef struct kv * (* kvmap_mm_out_func)(struct kv * kv, struct kv * out); +typedef void (* kvmap_mm_free_func)(struct kv * kv, void * priv); + +// manage internal kv data of kvmap +struct kvmap_mm { + // to create a private copy of "kv" + // see put() functions + kvmap_mm_in_func in; + // to duplicate a private copy of "kv" to "out" + // see get() and iter_peek() functions + kvmap_mm_out_func out; + // to free a kv + // see del() and put() functions + kvmap_mm_free_func free; + void * priv; +}; + + extern struct kv * +kvmap_mm_in_noop(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_out_noop(struct kv * const kv, struct kv * const out); + + extern void +kvmap_mm_free_noop(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_in_dup(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_out_dup(struct kv * const kv, struct kv * const out); + + extern void +kvmap_mm_free_free(struct kv * const kv, void * const priv); + +// the default mm +extern const struct kvmap_mm kvmap_mm_dup; // in:Dup, out:Dup, free:Free +extern const struct kvmap_mm kvmap_mm_ndf; // in:Noop, out:Dup, free:Free +// }}} mm + +// ref {{{ +typedef int (*kref_kv_cmp_func)(const struct kref *, const struct kv *); + +// ptr and len only + extern void +kref_ref_raw(struct kref * const kref, const u8 * const ptr, const u32 len); + +// this calculates hash32 + extern void +kref_ref_hash32(struct kref * const kref, const u8 * const ptr, const u32 len); + + extern void +kref_update_hash32(struct kref * const kref); + + extern void +kref_ref_kv(struct kref * const kref, const struct kv * const kv); + + extern void +kref_ref_kv_hash32(struct kref * const kref, const struct kv * const kv); + + extern bool +kref_match(const struct kref * const k1, const struct kref * const k2); + + extern bool +kref_kv_match(const struct kref * const kref, const struct kv * const k); + + extern int +kref_compare(const struct kref * const kref1, const struct kref * const kref2); + + extern int +kref_kv_compare(const struct kref * const kref, const struct kv * const k); + + extern u32 +kref_lcp(const struct kref * const k1, const struct kref * const k2); + + extern u32 +kref_kv_lcp(const struct kref * const kref, const struct kv * const kv); + + extern int +kref_k128_compare(const struct kref * const sk, const u8 * const k128); + + extern int +kref_kv128_compare(const struct kref * const sk, const u8 * const kv128); + + extern const struct kref * +kref_null(void); + + extern void +kvref_ref_kv(struct kvref * const ref, struct kv * const kv); + + extern struct kv * +kvref_dup2_kv(struct kvref * const ref, struct kv * const to); + + extern struct kv * +kvref_dup2_key(struct kvref * const ref, struct kv * const to); + + extern int +kvref_kv_compare(const struct kvref * const ref, const struct kv * const kv); +// }}} ref + +// kv128 {{{ + extern size_t +kv128_estimate_kv(const struct kv * const kv); + + extern u8 * +kv128_encode_kv(const struct kv * const kv, u8 * const out, size_t * const pesize); + + extern struct kv * +kv128_decode_kv(const u8 * const ptr, struct kv * const out, size_t * const pesize); + + extern size_t +kv128_size(const u8 * const ptr); +// }}} kv128 + +// }}} kv + +// kvmap {{{ + +// kvmap_api {{{ +typedef void (* kv_inp_func)(struct kv * const curr, void * const priv); + +// the merge function should: +// 1: return NULL if the origin kv is not changed at all +// 2: return kv0 if updates has been applied in-place +// 3: return a different kv if the original kv must be replaced +// In an in-memory kvmap, 2==1 and no further action is needed +// In a persistent kv store with a memtable, 2 will need an insertion if kv0 is not from the memtable +typedef struct kv * (* kv_merge_func)(struct kv * const kv0, void * const priv); + +struct kvmap_api { + // feature bits + bool hashkey; // true: caller needs to provide correct hash in kv/kref + bool ordered; // true: has iter_seek + bool threadsafe; // true: support thread_safe access + bool readonly; // true: no put() and del() + bool irefsafe; // true: iter's kref/kvref can be safely accessed after iter_seek/iter_skip/iter_park + bool unique; // provide unique keys, especially for iterators + bool refpark; // ref has park() and resume() + bool async; // XXX for testing KVell + + // put (aka put/upsert): return true on success; false on error + // mm.in() controls how things move into the kvmap; the default mm make a copy with malloc() + // mm.free() controls how old kv get disposed when replaced + bool (* put) (void * const ref, struct kv * const kv); + // get: search and return a kv if found, or NULL if not + // with the default mm: malloc() if out == NULL; otherwise, use out as buffer + // with custom kvmap_mm: mm.out() controls buffer; use with caution + // caller should use the returned ptr even if out is provided + struct kv * (* get) (void * const ref, const struct kref * const key, struct kv * const out); + // probe: return true on found, false on not found + bool (* probe) (void * const ref, const struct kref * const key); + // del: return true on something deleted, false on not found + // mm.free() controls how old kv get disposed when replaced + bool (* del) (void * const ref, const struct kref * const key); + // inp: inplace operation if key exists; otherwise return false; uf() is always executed even with NULL key + // inpr/inpw acquires r/w locks respectively. + // Note that in inpw() you can only change the value. + bool (* inpr) (void * const ref, const struct kref * const key, kv_inp_func uf, void * const priv); + bool (* inpw) (void * const ref, const struct kref * const key, kv_inp_func uf, void * const priv); + // merge: put+callback on old/new keys; another name: read-modify-write + // return true if successfull; return false on error + bool (* merge) (void * const ref, const struct kref * const key, kv_merge_func uf, void * const priv); + // delete-range: delete all keys from start (inclusive) to end (exclusive) + u64 (* delr) (void * const ref, const struct kref * const start, const struct kref * const end); + // make everything persist; for persistent maps only + void (* sync) (void * const ref); + + // general guidelines for thread-safe iters: + // - it is assumed that the key under the cursor is locked/freezed/immutable + // - once created one must call iter_seek to make it valid + // - the ownership of ref is given to the iter so ref should not be used until iter_destroy + // - creating and use more than one iter based on a ref can cause deadlocks + void * (* iter_create) (void * const ref); + // move the cursor to the first key >= search-key; + void (* iter_seek) (void * const iter, const struct kref * const key); + // check if the cursor points to a valid key + bool (* iter_valid) (void * const iter); + // return the current key; copy to out if (out != NULL) + // mm.out() controls copy-out + struct kv * (* iter_peek) (void * const iter, struct kv * const out); + // similar to peek but does not copy; return false if iter is invalid + bool (* iter_kref) (void * const iter, struct kref * const kref); + // similar to iter_kref but also provide the value + bool (* iter_kvref) (void * const iter, struct kvref * const kvref); + // iter_retain makes kref or kvref of the current iter remain valid until released + // the returned opaque pointer should be provided when releasing the hold + u64 (* iter_retain) (void * const iter); + void (* iter_release) (void * const iter, const u64 opaque); + // skip one element + void (* iter_skip1) (void * const iter); + // skip nr elements + void (* iter_skip) (void * const iter, const u32 nr); + // iter_next == iter_peek + iter_skip1 + struct kv * (* iter_next) (void * const iter, struct kv * const out); + // perform inplace opeation if the current key is valid; return false if no current key + // the uf() is always executed even with NULL key + bool (* iter_inp) (void * const iter, kv_inp_func uf, void * const priv); + // invalidate the iter to release any resources or locks + // afterward, must call seek() again before accessing data + void (* iter_park) (void * const iter); + // destroy iter + void (* iter_destroy) (void * const iter); + + // misc: + // create refs for maps if required; always use use kvmap_ref() and kvmap_unref() + // if there are ref/unref functions, ref-ptr should be used as map for all kv operations + void * (* ref) (void * map); + // return the original map + void * (* unref) (void * ref); + // pause access without unref; must call resume later before access index again + void (* park) (void * ref); + // resume access of ref; must be paired with a park() + void (* resume) (void * ref); + + // UNSAFE functions: + // empty the map + void (* clean) (void * map); + // erase everything + void (* destroy) (void * map); + // for debugging + void (* fprint) (void * map, FILE * const out); +}; + +// registry +struct kvmap_api_reg { + int nargs; // number of arguments after name + const char * name; + const char * args_msg; // see ...helper_message + // multiple apis may share one create function + // arguments: name (e.g., "rdb"), mm (usually NULL), the remaining args + void * (*create)(const char *, const struct kvmap_mm *, char **); + const struct kvmap_api * api; +}; + +// call this function to register a kvmap_api + extern void +kvmap_api_register(const int nargs, const char * const name, const char * const args_msg, + void * (*create)(const char *, const struct kvmap_mm *, char **), const struct kvmap_api * const api); + + extern void +kvmap_api_helper_message(void); + + extern int +kvmap_api_helper(int argc, char ** const argv, const struct kvmap_mm * const mm, + const struct kvmap_api ** const api_out, void ** const map_out); +// }}} kvmap_api + +// helpers {{{ + extern void +kvmap_inp_steal_kv(struct kv * const kv, void * const priv); + + extern void * +kvmap_ref(const struct kvmap_api * const api, void * const map); + + extern void * +kvmap_unref(const struct kvmap_api * const api, void * const ref); + + extern struct kv * +kvmap_kv_get(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, struct kv * const out); + + extern bool +kvmap_kv_probe(const struct kvmap_api * const api, void * const ref, + const struct kv * const key); + + extern bool +kvmap_kv_put(const struct kvmap_api * const api, void * const ref, + struct kv * const kv); + + extern bool +kvmap_kv_del(const struct kvmap_api * const api, void * const ref, + const struct kv * const key); + + extern bool +kvmap_kv_inpr(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv); + + extern bool +kvmap_kv_inpw(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv); + + extern bool +kvmap_kv_merge(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_merge_func uf, void * const priv); + + extern u64 +kvmap_kv_delr(const struct kvmap_api * const api, void * const ref, + const struct kv * const start, const struct kv * const end); + + extern void +kvmap_kv_iter_seek(const struct kvmap_api * const api, void * const iter, + const struct kv * const key); + + extern struct kv * +kvmap_raw_get(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, struct kv * const out); + + extern bool +kvmap_raw_probe(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr); + + extern bool +kvmap_raw_del(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr); + + extern bool +kvmap_raw_inpr(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv); + + extern bool +kvmap_raw_inpw(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv); + + extern void +kvmap_raw_iter_seek(const struct kvmap_api * const api, void * const iter, + const u32 len, const u8 * const ptr); + + extern u64 +kvmap_dump_keys(const struct kvmap_api * const api, void * const map, const int fd); + + extern bool +kvmap_kv64_get(const struct kvmap_api * const api, void * const ref, + const u64 key, u64 * const out); + + extern bool +kvmap_kv64_probe(const struct kvmap_api * const api, void * const ref, + const u64 key); + + extern bool +kvmap_kv64_put(const struct kvmap_api * const api, void * const ref, + const u64 key, const u64 value); + + extern bool +kvmap_kv64_del(const struct kvmap_api * const api, void * const ref, + const u64 key); + + extern void +kvmap_kv64_iter_seek(const struct kvmap_api * const api, void * const iter, + const u64 key); + + extern bool +kvmap_kv64_iter_peek(const struct kvmap_api * const api, void * const iter, + u64 * const key_out, u64 * const value_out); +// }}} helpers + +// }}} kvmap + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/run/MassTrie-beta/wormhole/lib.c b/run/MassTrie-beta/wormhole/lib.c new file mode 100644 index 00000000..06d45f6d --- /dev/null +++ b/run/MassTrie-beta/wormhole/lib.c @@ -0,0 +1,3026 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include "lib.h" +#include "ctypes.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // va_start + +#if defined(__linux__) +#include +#include // malloc_usable_size +#elif defined(__APPLE__) && defined(__MACH__) +#include +#include +#elif defined(__FreeBSD__) +#include +#include +#endif // OS + +#if defined(__FreeBSD__) +#include +#endif +// }}} headers + +// math {{{ + inline u64 +mhash64(const u64 v) +{ + return v * 11400714819323198485lu; +} + + inline u32 +mhash32(const u32 v) +{ + return v * 2654435761u; +} + +// From Daniel Lemire's blog (2013, lemire.me) + u64 +gcd64(u64 a, u64 b) +{ + if (a == 0) + return b; + if (b == 0) + return a; + + const u32 shift = (u32)__builtin_ctzl(a | b); + a >>= __builtin_ctzl(a); + do { + b >>= __builtin_ctzl(b); + if (a > b) { + const u64 t = b; + b = a; + a = t; + } + b = b - a; + } while (b); + return a << shift; +} +// }}} math + +// random {{{ +// Lehmer's generator is 2x faster than xorshift +/** + * D. H. Lehmer, Mathematical methods in large-scale computing units. + * Proceedings of a Second Symposium on Large Scale Digital Calculating + * Machinery; + * Annals of the Computation Laboratory, Harvard Univ. 26 (1951), pp. 141-146. + * + * P L'Ecuyer, Tables of linear congruential generators of different sizes and + * good lattice structure. Mathematics of Computation of the American + * Mathematical + * Society 68.225 (1999): 249-260. + */ +struct lehmer_u64 { + union { + u128 v128; + u64 v64[2]; + }; +}; + +static __thread struct lehmer_u64 rseed_u128 = {.v64 = {4294967291, 1549556881}}; + + static inline u64 +lehmer_u64_next(struct lehmer_u64 * const s) +{ + const u64 r = s->v64[1]; + s->v128 *= 0xda942042e4dd58b5lu; + return r; +} + + static inline void +lehmer_u64_seed(struct lehmer_u64 * const s, const u64 seed) +{ + s->v128 = (((u128)(~seed)) << 64) | (seed | 1); + (void)lehmer_u64_next(s); +} + + inline u64 +random_u64(void) +{ + return lehmer_u64_next(&rseed_u128); +} + + inline void +srandom_u64(const u64 seed) +{ + lehmer_u64_seed(&rseed_u128, seed); +} + + inline double +random_double(void) +{ + // random between [0.0 - 1.0] + const u64 r = random_u64(); + return ((double)r) * (1.0 / ((double)(~0lu))); +} +// }}} random + +// timing {{{ + inline u64 +time_nsec(void) +{ + struct timespec ts; + // MONO_RAW is 5x to 10x slower than MONO + clock_gettime(CLOCK_MONOTONIC, &ts); + return ((u64)ts.tv_sec) * 1000000000lu + ((u64)ts.tv_nsec); +} + + inline double +time_sec(void) +{ + const u64 nsec = time_nsec(); + return ((double)nsec) * 1.0e-9; +} + + inline u64 +time_diff_nsec(const u64 last) +{ + return time_nsec() - last; +} + + inline double +time_diff_sec(const double last) +{ + return time_sec() - last; +} + +// need char str[64] + void +time_stamp(char * str, const size_t size) +{ + time_t now; + struct tm nowtm; + time(&now); + localtime_r(&now, &nowtm); + strftime(str, size, "%F %T %z", &nowtm); +} + + void +time_stamp2(char * str, const size_t size) +{ + time_t now; + struct tm nowtm; + time(&now); + localtime_r(&now, &nowtm); + strftime(str, size, "%F-%H-%M-%S%z", &nowtm); +} +// }}} timing + +// cpucache {{{ + inline void +cpu_pause(void) +{ +#if defined(__x86_64__) + _mm_pause(); +#elif defined(__aarch64__) + // nop +#endif +} + + inline void +cpu_mfence(void) +{ + atomic_thread_fence(MO_SEQ_CST); +} + +// compiler fence + inline void +cpu_cfence(void) +{ + atomic_thread_fence(MO_ACQ_REL); +} + + inline void +cpu_prefetch0(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 0); +} + + inline void +cpu_prefetch1(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 1); +} + + inline void +cpu_prefetch2(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 2); +} + + inline void +cpu_prefetch3(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 3); +} + + inline void +cpu_prefetchw(const void * const ptr) +{ + __builtin_prefetch(ptr, 1, 0); +} +// }}} cpucache + +// crc32c {{{ + inline u32 +crc32c_u8(const u32 crc, const u8 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u8(crc, v); +#elif defined(__aarch64__) + return __crc32cb(crc, v); +#endif +} + + inline u32 +crc32c_u16(const u32 crc, const u16 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u16(crc, v); +#elif defined(__aarch64__) + return __crc32ch(crc, v); +#endif +} + + inline u32 +crc32c_u32(const u32 crc, const u32 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u32(crc, v); +#elif defined(__aarch64__) + return __crc32cw(crc, v); +#endif +} + + inline u32 +crc32c_u64(const u32 crc, const u64 v) +{ +#if defined(__x86_64__) + return (u32)_mm_crc32_u64(crc, v); +#elif defined(__aarch64__) + return (u32)__crc32cd(crc, v); +#endif +} + + inline u32 +crc32c_inc_123(const u8 * buf, u32 nr, u32 crc) +{ + if (nr == 1) + return crc32c_u8(crc, buf[0]); + + crc = crc32c_u16(crc, *(u16 *)buf); + return (nr == 2) ? crc : crc32c_u8(crc, buf[2]); +} + + inline u32 +crc32c_inc_x4(const u8 * buf, u32 nr, u32 crc) +{ + //debug_assert((nr & 3) == 0); + const u32 nr8 = nr >> 3; +#pragma nounroll + for (u32 i = 0; i < nr8; i++) + crc = crc32c_u64(crc, ((u64*)buf)[i]); + + if (nr & 4u) + crc = crc32c_u32(crc, ((u32*)buf)[nr8<<1]); + return crc; +} + + u32 +crc32c_inc(const u8 * buf, u32 nr, u32 crc) +{ + crc = crc32c_inc_x4(buf, nr, crc); + const u32 nr123 = nr & 3u; + return nr123 ? crc32c_inc_123(buf + nr - nr123, nr123, crc) : crc; +} +// }}} crc32c + +// debug {{{ + void +debug_break(void) +{ + usleep(100); +} + +static u64 * debug_watch_u64 = NULL; + + static void +watch_u64_handler(const int sig) +{ + (void)sig; + const u64 v = debug_watch_u64 ? (*debug_watch_u64) : 0; + fprintf(stderr, "[USR1] %lu (0x%lx)\n", v, v); +} + + void +watch_u64_usr1(u64 * const ptr) +{ + debug_watch_u64 = ptr; + struct sigaction sa = {}; + sa.sa_handler = watch_u64_handler; + sigemptyset(&(sa.sa_mask)); + sa.sa_flags = SA_RESTART; + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + fprintf(stderr, "Failed to set signal handler for SIGUSR1\n"); + } else { + fprintf(stderr, "to watch> kill -s SIGUSR1 %d\n", getpid()); + } +} + +static void * debug_bt_state = NULL; +#if defined(BACKTRACE) && defined(__linux__) +// TODO: get exec path on MacOS and FreeBSD + +#include +static char debug_filepath[1024] = {}; + + static void +debug_bt_error_cb(void * const data, const char * const msg, const int errnum) +{ + (void)data; + if (msg) + dprintf(2, "libbacktrace: %s %s\n", msg, strerror(errnum)); +} + + static int +debug_bt_print_cb(void * const data, const uintptr_t pc, + const char * const file, const int lineno, const char * const func) +{ + u32 * const plevel = (typeof(plevel))data; + if (file || func || lineno) { + dprintf(2, "[%u]0x%012lx " TERMCLR(35) "%s" TERMCLR(31) ":" TERMCLR(34) "%d" TERMCLR(0)" %s\n", + *plevel, pc, file ? file : "???", lineno, func ? func : "???"); + } else if (pc) { + dprintf(2, "[%u]0x%012lx ??\n", *plevel, pc); + } + (*plevel)++; + return 0; +} + +__attribute__((constructor)) + static void +debug_backtrace_init(void) +{ + const ssize_t len = readlink("/proc/self/exe", debug_filepath, 1023); + // disable backtrace + if (len < 0 || len >= 1023) + return; + + debug_filepath[len] = '\0'; + debug_bt_state = backtrace_create_state(debug_filepath, 1, debug_bt_error_cb, NULL); +} +#endif // BACKTRACE + + static void +debug_wait_gdb(void * const bt_state) +{ + if (bt_state) { +#if defined(BACKTRACE) + dprintf(2, "Backtrace :\n"); + u32 level = 0; + backtrace_full(debug_bt_state, 1, debug_bt_print_cb, debug_bt_error_cb, &level); +#endif // BACKTRACE + } else { // fallback to execinfo if no backtrace or initialization failed + void *array[64]; + const int size = backtrace(array, 64); + dprintf(2, "Backtrace (%d):\n", size - 1); + backtrace_symbols_fd(array + 1, size - 1, 2); + } + + abool v = true; + char timestamp[32]; + time_stamp(timestamp, 32); + char threadname[32] = {}; + thread_get_name(pthread_self(), threadname, 32); + strcat(threadname, "(!!)"); + thread_set_name(pthread_self(), threadname); + char hostname[32]; + gethostname(hostname, 32); + + const char * const pattern = "[Waiting GDB] %1$s %2$s @ %3$s\n" + " Attach me: " TERMCLR(31) "sudo -Hi gdb -p %4$d" TERMCLR(0) "\n"; + char buf[256]; + sprintf(buf, pattern, timestamp, threadname, hostname, getpid()); + write(2, buf, strlen(buf)); + + // to continue: gdb> set var v = 0 + // to kill from shell: $ kill %pid; kill -CONT %pid + + // uncomment this line to surrender the shell on error + // kill(getpid(), SIGSTOP); // stop burning cpu, once + + static au32 nr_waiting = 0; + const u32 seq = atomic_fetch_add_explicit(&nr_waiting, 1, MO_RELAXED); + if (seq == 0) { + sprintf(buf, "/run/user/%u/.debug_wait_gdb_pid", getuid()); + const int pidfd = open(buf, O_CREAT|O_TRUNC|O_WRONLY, 00644); + if (pidfd >= 0) { + dprintf(pidfd, "%u", getpid()); + close(pidfd); + } + } + +#pragma nounroll + while (atomic_load_explicit(&v, MO_CONSUME)) + sleep(1); +} + +#ifndef NDEBUG + void +debug_assert(const bool v) +{ + if (!v) + debug_wait_gdb(debug_bt_state); +} +#endif + +__attribute__((noreturn)) + void +debug_die(void) +{ + debug_wait_gdb(debug_bt_state); + exit(0); +} + +__attribute__((noreturn)) + void +debug_die_perror(void) +{ + perror(NULL); + debug_die(); +} + +#if !defined(NOSIGNAL) +// signal handler for wait_gdb on fatal errors + static void +wait_gdb_handler(const int sig, siginfo_t * const info, void * const context) +{ + (void)info; + (void)context; + char buf[64] = "[SIGNAL] "; + strcat(buf, strsignal(sig)); + write(2, buf, strlen(buf)); + debug_wait_gdb(NULL); +} + +// setup hooks for catching fatal errors +__attribute__((constructor)) + static void +debug_init(void) +{ + void * stack = pages_alloc_4kb(16); + //fprintf(stderr, "altstack %p\n", stack); + stack_t ss = {.ss_sp = stack, .ss_flags = 0, .ss_size = PGSZ*16}; + if (sigaltstack(&ss, NULL)) + fprintf(stderr, "sigaltstack failed\n"); + + struct sigaction sa = {.sa_sigaction = wait_gdb_handler, .sa_flags = SA_SIGINFO | SA_ONSTACK}; + sigemptyset(&(sa.sa_mask)); + const int fatals[] = {SIGSEGV, SIGFPE, SIGILL, SIGBUS, 0}; + for (int i = 0; fatals[i]; i++) { + if (sigaction(fatals[i], &sa, NULL) == -1) { + fprintf(stderr, "Failed to set signal handler for %s\n", strsignal(fatals[i])); + fflush(stderr); + } + } +} + +__attribute__((destructor)) + static void +debug_exit(void) +{ + // to get rid of valgrind warnings + stack_t ss = {.ss_flags = SS_DISABLE}; + stack_t oss = {}; + sigaltstack(&ss, &oss); + if (oss.ss_sp) + pages_unmap(oss.ss_sp, PGSZ * 16); +} +#endif // !defined(NOSIGNAL) + + void +debug_dump_maps(FILE * const out) +{ + FILE * const in = fopen("/proc/self/smaps", "r"); + char * line0 = yalloc(1024); + size_t size0 = 1024; + while (!feof(in)) { + const ssize_t r1 = getline(&line0, &size0, in); + if (r1 < 0) break; + fprintf(out, "%s", line0); + } + free(line0); + fflush(out); + fclose(in); +} + +static pid_t perf_pid = 0; + +#if defined(__linux__) +__attribute__((constructor)) + static void +debug_perf_init(void) +{ + const pid_t ppid = getppid(); + char tmp[256] = {}; + sprintf(tmp, "/proc/%d/cmdline", ppid); + FILE * const fc = fopen(tmp, "r"); + const size_t nr = fread(tmp, 1, sizeof(tmp) - 1, fc); + fclose(fc); + // look for "perf record" + if (nr < 12) + return; + tmp[nr] = '\0'; + for (u64 i = 0; i < nr; i++) + if (tmp[i] == 0) + tmp[i] = ' '; + + char * const perf = strstr(tmp, "perf record"); + if (perf) { + fprintf(stderr, "%s: perf detected\n", __func__); + perf_pid = ppid; + } +} +#endif // __linux__ + + bool +debug_perf_switch(void) +{ + if (perf_pid > 0) { + kill(perf_pid, SIGUSR2); + return true; + } else { + return false; + } +} +// }}} debug + +// mm {{{ +#ifdef ALLOCFAIL + bool +alloc_fail(void) +{ +#define ALLOCFAIL_RECP ((64lu)) +#define ALLOCFAIL_MAGIC ((ALLOCFAIL_RECP / 3lu)) + return ((random_u64() % ALLOCFAIL_RECP) == ALLOCFAIL_MAGIC); +} + +#ifdef MALLOCFAIL +extern void * __libc_malloc(size_t size); + void * +malloc(size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_malloc(size); +} + +extern void * __libc_calloc(size_t nmemb, size_t size); + void * +calloc(size_t nmemb, size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_calloc(nmemb, size); +} + +extern void *__libc_realloc(void *ptr, size_t size); + + void * +realloc(void *ptr, size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_realloc(ptr, size); +} +#endif // MALLOC_FAIL +#endif // ALLOC_FAIL + + void * +xalloc(const size_t align, const size_t size) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + void * p; + return (posix_memalign(&p, align, size) == 0) ? p : NULL; +} + +// alloc cache-line aligned address + void * +yalloc(const size_t size) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + void * p; + return (posix_memalign(&p, 64, size) == 0) ? p : NULL; +} + + void ** +malloc_2d(const size_t nr, const size_t size) +{ + const size_t size1 = nr * sizeof(void *); + const size_t size2 = nr * size; + void ** const mem = malloc(size1 + size2); + u8 * const mem2 = ((u8 *)mem) + size1; + for (size_t i = 0; i < nr; i++) + mem[i] = mem2 + (i * size); + return mem; +} + + inline void ** +calloc_2d(const size_t nr, const size_t size) +{ + void ** const ret = malloc_2d(nr, size); + memset(ret[0], 0, nr * size); + return ret; +} + + inline void +pages_unmap(void * const ptr, const size_t size) +{ +#ifndef HEAPCHECKING + munmap(ptr, size); +#else + (void)size; + free(ptr); +#endif +} + + void +pages_lock(void * const ptr, const size_t size) +{ + static bool use_mlock = true; + if (use_mlock) { + const int ret = mlock(ptr, size); + if (ret != 0) { + use_mlock = false; + fprintf(stderr, "%s: mlock disabled\n", __func__); + } + } +} + +#ifndef HEAPCHECKING + static void * +pages_do_alloc(const size_t size, const int flags) +{ + // vi /etc/security/limits.conf + // * - memlock unlimited + void * const p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (p == MAP_FAILED) + return NULL; + + pages_lock(p, size); + return p; +} + +#if defined(__linux__) && defined(MAP_HUGETLB) + +#if defined(MAP_HUGE_SHIFT) +#define PAGES_FLAGS_1G ((MAP_HUGETLB | (30 << MAP_HUGE_SHIFT))) +#define PAGES_FLAGS_2M ((MAP_HUGETLB | (21 << MAP_HUGE_SHIFT))) +#else // MAP_HUGE_SHIFT +#define PAGES_FLAGS_1G ((MAP_HUGETLB)) +#define PAGES_FLAGS_2M ((MAP_HUGETLB)) +#endif // MAP_HUGE_SHIFT + +#else +#define PAGES_FLAGS_1G ((0)) +#define PAGES_FLAGS_2M ((0)) +#endif // __linux__ + +#endif // HEAPCHECKING + + inline void * +pages_alloc_1gb(const size_t nr_1gb) +{ + const u64 sz = nr_1gb << 30; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS | PAGES_FLAGS_1G); +#else + void * const p = xalloc(1lu << 21, sz); // Warning: valgrind fails with 30 + if (p) + memset(p, 0, sz); + return p; +#endif +} + + inline void * +pages_alloc_2mb(const size_t nr_2mb) +{ + const u64 sz = nr_2mb << 21; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS | PAGES_FLAGS_2M); +#else + void * const p = xalloc(1lu << 21, sz); + if (p) + memset(p, 0, sz); + return p; +#endif +} + + inline void * +pages_alloc_4kb(const size_t nr_4kb) +{ + const size_t sz = nr_4kb << 12; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS); +#else + void * const p = xalloc(1lu << 12, sz); + if (p) + memset(p, 0, sz); + return p; +#endif +} + + void * +pages_alloc_best(const size_t size, const bool try_1gb, u64 * const size_out) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + // 1gb huge page: at least 0.25GB + if (try_1gb) { + if (size >= (1lu << 28)) { + const size_t nr_1gb = bits_round_up(size, 30) >> 30; + void * const p1 = pages_alloc_1gb(nr_1gb); + if (p1) { + *size_out = nr_1gb << 30; + return p1; + } + } + } + + // 2mb huge page: at least 0.5MB + if (size >= (1lu << 19)) { + const size_t nr_2mb = bits_round_up(size, 21) >> 21; + void * const p2 = pages_alloc_2mb(nr_2mb); + if (p2) { + *size_out = nr_2mb << 21; + return p2; + } + } + + const size_t nr_4kb = bits_round_up(size, 12) >> 12; + void * const p3 = pages_alloc_4kb(nr_4kb); + if (p3) + *size_out = nr_4kb << 12; + return p3; +} +// }}} mm + +// process/thread {{{ +static u32 process_ncpu; +#if defined(__FreeBSD__) +typedef cpuset_t cpu_set_t; +#elif defined(__APPLE__) && defined(__MACH__) +typedef u64 cpu_set_t; +#define CPU_SETSIZE ((64)) +#define CPU_COUNT(__cpu_ptr__) (__builtin_popcountl(*__cpu_ptr__)) +#define CPU_ISSET(__cpu_idx__, __cpu_ptr__) (((*__cpu_ptr__) >> __cpu_idx__) & 1lu) +#define CPU_ZERO(__cpu_ptr__) ((*__cpu_ptr__) = 0) +#define CPU_SET(__cpu_idx__, __cpu_ptr__) ((*__cpu_ptr__) |= (1lu << __cpu_idx__)) +#define CPU_CLR(__cpu_idx__, __cpu_ptr__) ((*__cpu_ptr__) &= ~(1lu << __cpu_idx__)) +#define pthread_attr_setaffinity_np(...) ((void)0) +#endif + +__attribute__((constructor)) + static void +process_init(void) +{ + // Linux's default is 1024 cpus + process_ncpu = (u32)sysconf(_SC_NPROCESSORS_CONF); + if (process_ncpu > CPU_SETSIZE) { + fprintf(stderr, "%s: can use only %zu cores\n", + __func__, (size_t)CPU_SETSIZE); + process_ncpu = CPU_SETSIZE; + } + thread_set_name(pthread_self(), "main"); +} + + static inline int +thread_getaffinity_set(cpu_set_t * const cpuset) +{ +#if defined(__linux__) + return sched_getaffinity(0, sizeof(*cpuset), cpuset); +#elif defined(__FreeBSD__) + return cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(*cpuset), cpuset); +#elif defined(__APPLE__) && defined(__MACH__) + *cpuset = (1lu << process_ncpu) - 1; + return (int)process_ncpu; // TODO +#endif // OS +} + + static inline int +thread_setaffinity_set(const cpu_set_t * const cpuset) +{ +#if defined(__linux__) + return sched_setaffinity(0, sizeof(*cpuset), cpuset); +#elif defined(__FreeBSD__) + return cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(*cpuset), cpuset); +#elif defined(__APPLE__) && defined(__MACH__) + (void)cpuset; // TODO + return 0; +#endif // OS +} + + void +thread_get_name(const pthread_t pt, char * const name, const size_t len) +{ +#if defined(__linux__) + pthread_getname_np(pt, name, len); +#elif defined(__FreeBSD__) + pthread_get_name_np(pt, name, len); +#elif defined(__APPLE__) && defined(__MACH__) + (void)pt; + (void)len; + strcpy(name, "unknown"); // TODO +#endif // OS +} + + void +thread_set_name(const pthread_t pt, const char * const name) +{ +#if defined(__linux__) + pthread_setname_np(pt, name); +#elif defined(__FreeBSD__) + pthread_set_name_np(pt, name); +#elif defined(__APPLE__) && defined(__MACH__) + (void)pt; + (void)name; // TODO +#endif // OS +} + +// kB + long +process_get_rss(void) +{ + struct rusage rs; + getrusage(RUSAGE_SELF, &rs); + return rs.ru_maxrss; +} + + u32 +process_affinity_count(void) +{ + cpu_set_t set; + if (thread_getaffinity_set(&set) != 0) + return process_ncpu; + + const u32 nr = (u32)CPU_COUNT(&set); + return nr ? nr : process_ncpu; +} + + u32 +process_getaffinity_list(const u32 max, u32 * const cores) +{ + memset(cores, 0, max * sizeof(cores[0])); + cpu_set_t set; + if (thread_getaffinity_set(&set) != 0) + return 0; + + const u32 nr_affinity = (u32)CPU_COUNT(&set); + const u32 nr = nr_affinity < max ? nr_affinity : max; + u32 j = 0; + for (u32 i = 0; i < process_ncpu; i++) { + if (CPU_ISSET(i, &set)) + cores[j++] = i; + + if (j >= nr) + break; + } + return j; +} + + void +thread_setaffinity_list(const u32 nr, const u32 * const list) +{ + cpu_set_t set; + CPU_ZERO(&set); + for (u32 i = 0; i < nr; i++) + if (list[i] < process_ncpu) + CPU_SET(list[i], &set); + thread_setaffinity_set(&set); +} + + void +thread_pin(const u32 cpu) +{ + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(cpu % process_ncpu, &set); + thread_setaffinity_set(&set); +} + + u64 +process_cpu_time_usec(void) +{ + struct rusage rs; + getrusage(RUSAGE_SELF, &rs); + const u64 usr = (((u64)rs.ru_utime.tv_sec) * 1000000lu) + ((u64)rs.ru_utime.tv_usec); + const u64 sys = (((u64)rs.ru_stime.tv_sec) * 1000000lu) + ((u64)rs.ru_stime.tv_usec); + return usr + sys; +} + +struct fork_join_info { + u32 total; + u32 ncores; + u32 * cores; + void *(*func)(void *); + bool args; + union { + void * arg1; + void ** argn; + }; + union { + struct { au32 ferr, jerr; }; + au64 xerr; + }; +}; + +// DON'T CHANGE! +#define FORK_JOIN_RANK_BITS ((16)) // 16 +#define FORK_JOIN_MAX ((1u << FORK_JOIN_RANK_BITS)) + +/* + * fj(6): T0 + * / \ + * T0 T4 + * / \ / + * T0 T2 T4 + * / \ / \ / \ + * t0 t1 t2 t3 t4 t5 + */ + +// recursive tree fork-join + static void * +thread_do_fork_join_worker(void * const ptr) +{ + struct entry13 fjp = {.ptr = ptr}; + // GCC: Without explicitly casting from fjp.fji (a 45-bit u64 value), + // the high bits will get truncated, which is always CORRECT in gcc. + // Don't use gcc. + struct fork_join_info * const fji = u64_to_ptr(fjp.e3); + const u32 rank = (u32)fjp.e1; + + const u32 nchild = (u32)__builtin_ctz(rank ? rank : bits_p2_up_u32(fji->total)); + debug_assert(nchild <= FORK_JOIN_RANK_BITS); + pthread_t tids[FORK_JOIN_RANK_BITS]; + if (nchild) { + cpu_set_t set; + CPU_ZERO(&set); + pthread_attr_t attr; + pthread_attr_init(&attr); + //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); // Joinable by default + // fork top-down + for (u32 i = nchild - 1; i < nchild; i--) { + const u32 cr = rank + (1u << i); // child's rank + if (cr >= fji->total) + continue; // should not break + const u32 core = fji->cores[(cr < fji->ncores) ? cr : (cr % fji->ncores)]; + CPU_SET(core, &set); + pthread_attr_setaffinity_np(&attr, sizeof(set), &set); + fjp.e1 = (u16)cr; + const int r = pthread_create(&tids[i], &attr, thread_do_fork_join_worker, fjp.ptr); + CPU_CLR(core, &set); + if (unlikely(r)) { // fork failed + memset(&tids[0], 0, sizeof(tids[0]) * (i+1)); + u32 nmiss = (1u << (i + 1)) - 1; + if ((rank + nmiss) >= fji->total) + nmiss = fji->total - 1 - rank; + (void)atomic_fetch_add_explicit(&fji->ferr, nmiss, MO_RELAXED); + break; + } + } + pthread_attr_destroy(&attr); + } + + char thname0[16]; + char thname1[16]; + thread_get_name(pthread_self(), thname0, 16); + snprintf(thname1, 16, "%.8s_%u", thname0, rank); + thread_set_name(pthread_self(), thname1); + + void * const ret = fji->func(fji->args ? fji->argn[rank] : fji->arg1); + + thread_set_name(pthread_self(), thname0); + // join bottom-up + for (u32 i = 0; i < nchild; i++) { + const u32 cr = rank + (1u << i); // child rank + if (cr >= fji->total) + break; // safe to break + if (tids[i]) { + const int r = pthread_join(tids[i], NULL); + if (unlikely(r)) { // error + //fprintf(stderr, "pthread_join %u..%u = %d: %s\n", rank, cr, r, strerror(r)); + (void)atomic_fetch_add_explicit(&fji->jerr, 1, MO_RELAXED); + } + } + } + return ret; +} + + u64 +thread_fork_join(u32 nr, void *(*func) (void *), const bool args, void * const argx) +{ + if (unlikely(nr > FORK_JOIN_MAX)) { + fprintf(stderr, "%s reduce nr to %u\n", __func__, FORK_JOIN_MAX); + nr = FORK_JOIN_MAX; + } + + u32 cores[CPU_SETSIZE]; + u32 ncores = process_getaffinity_list(process_ncpu, cores); + if (unlikely(ncores == 0)) { // force to use all cores + ncores = process_ncpu; + for (u32 i = 0; i < process_ncpu; i++) + cores[i] = i; + } + if (unlikely(nr == 0)) + nr = ncores; + + // the compiler does not know fji can change since we cast &fji into fjp + struct fork_join_info fji = {.total = nr, .cores = cores, .ncores = ncores, + .func = func, .args = args, .arg1 = argx}; + const struct entry13 fjp = entry13(0, (u64)(&fji)); + + // save current affinity + cpu_set_t set0; + thread_getaffinity_set(&set0); + + // master thread shares thread0's core + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(fji.cores[0], &set); + thread_setaffinity_set(&set); + + const u64 t0 = time_nsec(); + (void)thread_do_fork_join_worker(fjp.ptr); + const u64 dt = time_diff_nsec(t0); + + // restore original affinity + thread_setaffinity_set(&set0); + + // check and report errors (unlikely) + if (atomic_load_explicit(&fji.xerr, MO_CONSUME)) + fprintf(stderr, "%s errors: fork %u join %u\n", __func__, fji.ferr, fji.jerr); + return dt; +} + + int +thread_create_at(const u32 cpu, pthread_t * const thread, + void *(*start_routine) (void *), void * const arg) +{ + const u32 cpu_id = (cpu < process_ncpu) ? cpu : (cpu % process_ncpu); + pthread_attr_t attr; + pthread_attr_init(&attr); + //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(cpu_id, &set); + pthread_attr_setaffinity_np(&attr, sizeof(set), &set); + const int r = pthread_create(thread, &attr, start_routine, arg); + pthread_attr_destroy(&attr); + return r; +} +// }}} process/thread + +// locking {{{ + +// spinlock {{{ +#if defined(__linux__) +#define SPINLOCK_PTHREAD +#endif // __linux__ + +#if defined(SPINLOCK_PTHREAD) +static_assert(sizeof(pthread_spinlock_t) <= sizeof(spinlock), "spinlock size"); +#else // SPINLOCK_PTHREAD +static_assert(sizeof(au32) <= sizeof(spinlock), "spinlock size"); +#endif // SPINLOCK_PTHREAD + + void +spinlock_init(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_init(p, PTHREAD_PROCESS_PRIVATE); +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + atomic_store_explicit(p, 0, MO_RELEASE); +#endif // SPINLOCK_PTHREAD +} + + inline void +spinlock_lock(spinlock * const lock) +{ +#if defined(CORR) +#pragma nounroll + while (!spinlock_trylock(lock)) + corr_yield(); +#else // CORR +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_lock(p); // return value ignored +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; +#pragma nounroll + do { + if (atomic_fetch_sub_explicit(p, 1, MO_ACQUIRE) == 0) + return; +#pragma nounroll + do { + cpu_pause(); + } while (atomic_load_explicit(p, MO_CONSUME)); + } while (true); +#endif // SPINLOCK_PTHREAD +#endif // CORR +} + + inline bool +spinlock_trylock(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + return !pthread_spin_trylock(p); +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + return atomic_fetch_sub_explicit(p, 1, MO_ACQUIRE) == 0; +#endif // SPINLOCK_PTHREAD +} + + inline void +spinlock_unlock(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_unlock(p); // return value ignored +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + atomic_store_explicit(p, 0, MO_RELEASE); +#endif // SPINLOCK_PTHREAD +} +// }}} spinlock + +// pthread mutex {{{ +static_assert(sizeof(pthread_mutex_t) <= sizeof(mutex), "mutexlock size"); + inline void +mutex_init(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_init(p, NULL); +} + + inline void +mutex_lock(mutex * const lock) +{ +#if defined(CORR) +#pragma nounroll + while (!mutex_trylock(lock)) + corr_yield(); +#else + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_lock(p); // return value ignored +#endif +} + + inline bool +mutex_trylock(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + return !pthread_mutex_trylock(p); // return value ignored +} + + inline void +mutex_unlock(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_unlock(p); // return value ignored +} + + inline void +mutex_deinit(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_destroy(p); +} +// }}} pthread mutex + +// rwdep {{{ +// poor man's lockdep for rwlock +// per-thread lock list +// it calls debug_die() when local double-(un)locking is detected +// cyclic dependencies can be manually identified by looking at the two lists below in gdb +#ifdef RWDEP +#define RWDEP_NR ((16)) +__thread const rwlock * rwdep_readers[RWDEP_NR] = {}; +__thread const rwlock * rwdep_writers[RWDEP_NR] = {}; + + static void +rwdep_check(const rwlock * const lock) +{ + debug_assert(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == lock) + debug_die(); + if (rwdep_writers[i] == lock) + debug_die(); + } +} +#endif // RWDEP + + static void +rwdep_lock_read(const rwlock * const lock) +{ +#ifdef RWDEP + rwdep_check(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == NULL) { + rwdep_readers[i] = lock; + return; + } + } +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_unlock_read(const rwlock * const lock) +{ +#ifdef RWDEP + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == lock) { + rwdep_readers[i] = NULL; + return; + } + } + debug_die(); +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_lock_write(const rwlock * const lock) +{ +#ifdef RWDEP + rwdep_check(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_writers[i] == NULL) { + rwdep_writers[i] = lock; + return; + } + } +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_unlock_write(const rwlock * const lock) +{ +#ifdef RWDEP + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_writers[i] == lock) { + rwdep_writers[i] = NULL; + return; + } + } + debug_die(); +#else + (void)lock; +#endif // RWDEP +} +// }}} rwlockdep + +// rwlock {{{ +typedef au32 lock_t; +typedef u32 lock_v; +static_assert(sizeof(lock_t) == sizeof(lock_v), "lock size"); +static_assert(sizeof(lock_t) <= sizeof(rwlock), "lock size"); + +#define RWLOCK_WSHIFT ((sizeof(lock_t) * 8 - 1)) +#define RWLOCK_WBIT ((((lock_v)1) << RWLOCK_WSHIFT)) + + inline void +rwlock_init(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + atomic_store_explicit(pvar, 0, MO_RELEASE); +} + + inline bool +rwlock_trylock_read(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + if ((atomic_fetch_add_explicit(pvar, 1, MO_ACQUIRE) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } else { + atomic_fetch_sub_explicit(pvar, 1, MO_RELAXED); + return false; + } +} + + inline bool +rwlock_trylock_read_lp(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + if (atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT) { + cpu_pause(); + return false; + } + return rwlock_trylock_read(lock); +} + +// actually nr + 1 + inline bool +rwlock_trylock_read_nr(rwlock * const lock, u16 nr) +{ + lock_t * const pvar = (typeof(pvar))lock; + if ((atomic_fetch_add_explicit(pvar, 1, MO_ACQUIRE) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } + +#pragma nounroll + do { // someone already locked; wait for a little while + cpu_pause(); + if ((atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } + } while (nr--); + + atomic_fetch_sub_explicit(pvar, 1, MO_RELAXED); + return false; +} + + inline void +rwlock_lock_read(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; +#pragma nounroll + do { + if (rwlock_trylock_read(lock)) + return; +#pragma nounroll + do { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } while (atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT); + } while (true); +} + + inline void +rwlock_unlock_read(rwlock * const lock) +{ + rwdep_unlock_read(lock); + lock_t * const pvar = (typeof(pvar))lock; + atomic_fetch_sub_explicit(pvar, 1, MO_RELEASE); +} + + inline bool +rwlock_trylock_write(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + lock_v v0 = atomic_load_explicit(pvar, MO_CONSUME); + if ((v0 == 0) && atomic_compare_exchange_weak_explicit(pvar, &v0, RWLOCK_WBIT, MO_ACQUIRE, MO_RELAXED)) { + rwdep_lock_write(lock); + return true; + } else { + return false; + } +} + +// actually nr + 1 + inline bool +rwlock_trylock_write_nr(rwlock * const lock, u16 nr) +{ +#pragma nounroll + do { + if (rwlock_trylock_write(lock)) + return true; + cpu_pause(); + } while (nr--); + return false; +} + + inline void +rwlock_lock_write(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; +#pragma nounroll + do { + if (rwlock_trylock_write(lock)) + return; +#pragma nounroll + do { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } while (atomic_load_explicit(pvar, MO_CONSUME)); + } while (true); +} + + inline bool +rwlock_trylock_write_hp(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + lock_v v0 = atomic_load_explicit(pvar, MO_CONSUME); + if (v0 >> RWLOCK_WSHIFT) + return false; + + if (atomic_compare_exchange_weak_explicit(pvar, &v0, v0|RWLOCK_WBIT, MO_ACQUIRE, MO_RELAXED)) { + rwdep_lock_write(lock); + // WBIT successfully marked; must wait for readers to leave + if (v0) { // saw active readers +#pragma nounroll + while (atomic_load_explicit(pvar, MO_CONSUME) != RWLOCK_WBIT) { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } + } + return true; + } else { + return false; + } +} + + inline bool +rwlock_trylock_write_hp_nr(rwlock * const lock, u16 nr) +{ +#pragma nounroll + do { + if (rwlock_trylock_write_hp(lock)) + return true; + cpu_pause(); + } while (nr--); + return false; +} + + inline void +rwlock_lock_write_hp(rwlock * const lock) +{ +#pragma nounroll + while (!rwlock_trylock_write_hp(lock)) { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } +} + + inline void +rwlock_unlock_write(rwlock * const lock) +{ + rwdep_unlock_write(lock); + lock_t * const pvar = (typeof(pvar))lock; + atomic_fetch_sub_explicit(pvar, RWLOCK_WBIT, MO_RELEASE); +} + + inline void +rwlock_write_to_read(rwlock * const lock) +{ + rwdep_unlock_write(lock); + rwdep_lock_read(lock); + lock_t * const pvar = (typeof(pvar))lock; + // +R -W + atomic_fetch_add_explicit(pvar, ((lock_v)1) - RWLOCK_WBIT, MO_ACQ_REL); +} + +#undef RWLOCK_WSHIFT +#undef RWLOCK_WBIT +// }}} rwlock + +// }}} locking + +// coroutine {{{ + +// asm {{{ +#if defined(__x86_64__) +// number pushes in co_switch_stack +#define CO_CONTEXT_SIZE ((6)) + +// for switch/exit: pass a return value to the target +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_switch_stack;" + ".type co_switch_stack, @function;" + "co_switch_stack:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_switch_stack;" + "_co_switch_stack:" +#else +#error Supported platforms: Linux/FreeBSD/Apple +#endif // OS + "push %rbp; push %rbx; push %r12;" + "push %r13; push %r14; push %r15;" + "mov %rsp, (%rdi);" + "mov %rsi, %rsp;" + "pop %r15; pop %r14; pop %r13;" + "pop %r12; pop %rbx; pop %rbp;" + "mov %rdx, %rax;" + "retq;" + ); + +#elif defined(__aarch64__) +// number pushes in co_switch_stack +#define CO_CONTEXT_SIZE ((20)) +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_switch_stack;" + ".type co_switch_stack, @function;" + "co_switch_stack:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_switch_stack;" + "_co_switch_stack:" +#else +#error supported platforms: Linux/FreeBSD/Apple +#endif // OS + "sub x8, sp, 160;" + "str x8, [x0];" + "stp x30, x19, [x8]; ldp x30, x19, [x1];" + "stp x20, x21, [x8, 16]; ldp x20, x21, [x1, 16];" + "stp x22, x23, [x8, 32]; ldp x22, x23, [x1, 32];" + "stp x24, x25, [x8, 48]; ldp x24, x25, [x1, 48];" + "stp x26, x27, [x8, 64]; ldp x26, x27, [x1, 64];" + "stp x28, x29, [x8, 80]; ldp x28, x29, [x1, 80];" + "stp d8, d9, [x8, 96]; ldp d8, d9, [x1, 96];" + "stp d10, d11, [x8, 112]; ldp d10, d11, [x1, 112];" + "stp d12, d13, [x8, 128]; ldp d12, d13, [x1, 128];" + "stp d14, d15, [x8, 144]; ldp d14, d15, [x1, 144];" + "add sp, x1, 160;" + "mov x0, x2;" + "br x30;" + ); + +extern void co_entry_aarch64(void); +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_entry_aarch64;" + ".type co_entry_aarch64, @function;" + "co_entry_aarch64:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_entry_aarch64;" + "_co_entry_aarch64:" +#else +#error supported platforms: Linux/FreeBSD/Apple +#endif // OS + "ldr x8, [sp, 0];" + "blr x8;" + "ldr x8, [sp, 8];" + "blr x8;" + "ldr x8, [sp, 16];" + "blr x8;" + ); +#else +#error supported CPUs: x86_64 or AArch64 +#endif // co_switch_stack x86_64 and aarch64 +// }}} asm + +// co {{{ +struct co { + u64 rsp; + void * priv; + u64 * host; // set host to NULL to exit + size_t stksz; +}; + +// not atomic: no concurrent access +// volatile: avoid caching of co_curr +static __thread struct co * volatile co_curr = NULL; // NULL in host + +// the stack sits under the struct co + static void +co_init(struct co * const co, void * func, void * priv, u64 * const host, + const u64 stksz, void * func_exit) +{ + debug_assert((stksz & 0x3f) == 0); // a multiple of 64 bytes + u64 * rsp = ((u64 *)co) - 4; + rsp[0] = (u64)func; + rsp[1] = (u64)func_exit; + rsp[2] = (u64)debug_die; + rsp[3] = 0; + + rsp -= CO_CONTEXT_SIZE; + +#if defined(__aarch64__) + rsp[0] = (u64)co_entry_aarch64; +#endif + + co->rsp = (u64)rsp; + co->priv = priv; + co->host = host; + co->stksz = stksz; +} + + static void +co_exit0(void) +{ + co_exit(0); +} + + struct co * +co_create(const u64 stacksize, void * func, void * priv, u64 * const host) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct co); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct co * const co = (typeof(co))(mem + stksz); + co_init(co, func, priv, host, stksz, co_exit0); + return co; +} + + inline void +co_reuse(struct co * const co, void * func, void * priv, u64 * const host) +{ + co_init(co, func, priv, host, co->stksz, co_exit0); +} + + inline struct co * +co_fork(void * func, void * priv) +{ + return co_curr ? co_create(co_curr->stksz, func, priv, co_curr->host) : NULL; +} + + inline void * +co_priv(void) +{ + return co_curr ? co_curr->priv : NULL; +} + +// the host calls this to enter a coroutine. + inline u64 +co_enter(struct co * const to, const u64 retval) +{ + debug_assert(co_curr == NULL); // must entry from the host + debug_assert(to && to->host); + u64 * const save = to->host; + co_curr = to; + const u64 ret = co_switch_stack(save, to->rsp, retval); + co_curr = NULL; + return ret; +} + +// switch from a coroutine to another coroutine +// co_curr must be valid +// the target will resume and receive the retval + inline u64 +co_switch_to(struct co * const to, const u64 retval) +{ + debug_assert(co_curr); + debug_assert(co_curr != to); + debug_assert(to && to->host); + struct co * const save = co_curr; + co_curr = to; + return co_switch_stack(&(save->rsp), to->rsp, retval); +} + +// switch from a coroutine to the host routine +// co_yield is now a c++ keyword... + inline u64 +co_back(const u64 retval) +{ + debug_assert(co_curr); + struct co * const save = co_curr; + co_curr = NULL; + return co_switch_stack(&(save->rsp), *(save->host), retval); +} + +#ifdef CO_STACK_CHECK + static void +co_stack_check(const u8 * const mem, const u64 stksz) +{ + const u64 * const mem64 = (typeof(mem64))mem; + const u64 size64 = stksz / sizeof(u64); + for (u64 i = 0; i < size64; i++) { + if (mem64[i] != 0x5c5c5c5c5c5c5c5clu) { + fprintf(stderr, "%s co stack usage: %lu/%lu\n", __func__, stksz - (i * sizeof(u64)), stksz); + break; + } + } +} +#endif // CO_STACK_CHECK + +// return to host and set host to NULL +__attribute__((noreturn)) + void +co_exit(const u64 retval) +{ + debug_assert(co_curr); +#ifdef CO_STACK_CHECK + const u64 stksz = co_curr->stksz; + u8 * const mem = ((u8 *)co_curr) - stksz; + co_stack_check(mem, stksz); +#endif // CO_STACK_CHECK + const u64 hostrsp = *(co_curr->host); + co_curr->host = NULL; + struct co * const save = co_curr; + co_curr = NULL; + (void)co_switch_stack(&(save->rsp), hostrsp, retval); + // return to co_enter + debug_die(); +} + +// host is set to NULL on exit + inline bool +co_valid(struct co * const co) +{ + return co->host != NULL; +} + +// return NULL on host + inline struct co * +co_self(void) +{ + return co_curr; +} + + inline void +co_destroy(struct co * const co) +{ + u8 * const mem = ((u8 *)co) - co->stksz; + free(mem); +} +// }}} co + +// corr {{{ +struct corr { + struct co co; + struct corr * next; + struct corr * prev; +}; + +// initial and link guest to the run-queue + struct corr * +corr_create(const u64 stacksize, void * func, void * priv, u64 * const host) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct corr); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct corr * const co = (typeof(co))(mem + stksz); + co_init(&(co->co), func, priv, host, stksz, corr_exit); + co->next = co; + co->prev = co; + return co; +} + + struct corr * +corr_link(const u64 stacksize, void * func, void * priv, struct corr * const prev) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct corr); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct corr * const co = (typeof(co))(mem + stksz); + co_init(&(co->co), func, priv, prev->co.host, stksz, corr_exit); + co->next = prev->next; + co->prev = prev; + co->prev->next = co; + co->next->prev = co; + return co; +} + + inline void +corr_reuse(struct corr * const co, void * func, void * priv, u64 * const host) +{ + co_init(&(co->co), func, priv, host, co->co.stksz, corr_exit); + co->next = co; + co->prev = co; +} + + inline void +corr_relink(struct corr * const co, void * func, void * priv, struct corr * const prev) +{ + co_init(&(co->co), func, priv, prev->co.host, co->co.stksz, corr_exit); + co->next = prev->next; + co->prev = prev; + co->prev->next = co; + co->next->prev = co; +} + + inline void +corr_enter(struct corr * const co) +{ + (void)co_enter(&(co->co), 0); +} + + inline void +corr_yield(void) +{ + struct corr * const curr = (typeof(curr))co_curr; + if (curr && (curr->next != curr)) + (void)co_switch_to(&(curr->next->co), 0); +} + +__attribute__((noreturn)) + inline void +corr_exit(void) +{ + debug_assert(co_curr); +#ifdef CO_STACK_CHECK + const u64 stksz = co_curr->stksz; + const u8 * const mem = ((u8 *)(co_curr)) - stksz; + co_stack_check(mem, stksz); +#endif // CO_STACK_CHECK + + struct corr * const curr = (typeof(curr))co_curr; + if (curr->next != curr) { // have more corr + struct corr * const next = curr->next; + struct corr * const prev = curr->prev; + next->prev = prev; + prev->next = next; + curr->next = NULL; + curr->prev = NULL; + curr->co.host = NULL; // invalidate + (void)co_switch_to(&(next->co), 0); + } else { // the last corr + co_exit0(); + } + debug_die(); +} + + inline void +corr_destroy(struct corr * const co) +{ + co_destroy(&(co->co)); +} +// }}} corr + +// }}} co + +// bits {{{ + inline u32 +bits_reverse_u32(const u32 v) +{ + const u32 v2 = __builtin_bswap32(v); + const u32 v3 = ((v2 & 0xf0f0f0f0u) >> 4) | ((v2 & 0x0f0f0f0fu) << 4); + const u32 v4 = ((v3 & 0xccccccccu) >> 2) | ((v3 & 0x33333333u) << 2); + const u32 v5 = ((v4 & 0xaaaaaaaau) >> 1) | ((v4 & 0x55555555u) << 1); + return v5; +} + + inline u64 +bits_reverse_u64(const u64 v) +{ + const u64 v2 = __builtin_bswap64(v); + const u64 v3 = ((v2 & 0xf0f0f0f0f0f0f0f0lu) >> 4) | ((v2 & 0x0f0f0f0f0f0f0f0flu) << 4); + const u64 v4 = ((v3 & 0xcccccccccccccccclu) >> 2) | ((v3 & 0x3333333333333333lu) << 2); + const u64 v5 = ((v4 & 0xaaaaaaaaaaaaaaaalu) >> 1) | ((v4 & 0x5555555555555555lu) << 1); + return v5; +} + + inline u64 +bits_rotl_u64(const u64 v, const u8 n) +{ + const u8 sh = n & 0x3f; + return (v << sh) | (v >> (64 - sh)); +} + + inline u64 +bits_rotr_u64(const u64 v, const u8 n) +{ + const u8 sh = n & 0x3f; + return (v >> sh) | (v << (64 - sh)); +} + + inline u32 +bits_rotl_u32(const u32 v, const u8 n) +{ + const u8 sh = n & 0x1f; + return (v << sh) | (v >> (32 - sh)); +} + + inline u32 +bits_rotr_u32(const u32 v, const u8 n) +{ + const u8 sh = n & 0x1f; + return (v >> sh) | (v << (32 - sh)); +} + + inline u64 +bits_p2_up_u64(const u64 v) +{ + // clz(0) is undefined + return (v > 1) ? (1lu << (64 - __builtin_clzl(v - 1lu))) : v; +} + + inline u32 +bits_p2_up_u32(const u32 v) +{ + // clz(0) is undefined + return (v > 1) ? (1u << (32 - __builtin_clz(v - 1u))) : v; +} + + inline u64 +bits_p2_down_u64(const u64 v) +{ + return v ? (1lu << (63 - __builtin_clzl(v))) : v; +} + + inline u32 +bits_p2_down_u32(const u32 v) +{ + return v ? (1u << (31 - __builtin_clz(v))) : v; +} + + inline u64 +bits_round_up(const u64 v, const u8 power) +{ + return (v + (1lu << power) - 1lu) >> power << power; +} + + inline u64 +bits_round_up_a(const u64 v, const u64 a) +{ + return (v + a - 1) / a * a; +} + + inline u64 +bits_round_down(const u64 v, const u8 power) +{ + return v >> power << power; +} + + inline u64 +bits_round_down_a(const u64 v, const u64 a) +{ + return v / a * a; +} +// }}} bits + +// vi128 {{{ +#if defined(__GNUC__) && __GNUC__ >= 7 +#define FALLTHROUGH __attribute__ ((fallthrough)) +#else +#define FALLTHROUGH ((void)0) +#endif /* __GNUC__ >= 7 */ + + inline u32 +vi128_estimate_u32(const u32 v) +{ + static const u8 t[] = {5,5,5,5, + 4,4,4,4,4,4,4, 3,3,3,3,3,3,3, + 2,2,2,2,2,2,2, 1,1,1,1,1,1,1}; + return v ? t[__builtin_clz(v)] : 2; + // 0 -> [0x80 0x00] the first byte is non-zero + + // nz bit range -> enc length offset in t[] + // 0 -> 2 special case + // 1 to 7 -> 1 31 to 25 + // 8 to 14 -> 2 24 to 18 + // 15 to 21 -> 3 17 to 11 + // 22 to 28 -> 4 10 to 4 + // 29 to 32 -> 5 3 to 0 +} + + u8 * +vi128_encode_u32(u8 * dst, u32 v) +{ + switch (vi128_estimate_u32(v)) { + case 5: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 4: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 3: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 2: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 1: + *(dst++) = (u8)v; + break; + default: + debug_die(); + break; + } + return dst; +} + + const u8 * +vi128_decode_u32(const u8 * src, u32 * const out) +{ + debug_assert(*src); + u32 r = 0; + for (u32 shift = 0; shift < 32; shift += 7) { + const u8 byte = *(src++); + r |= (((u32)(byte & 0x7f)) << shift); + if ((byte & 0x80) == 0) { // No more bytes to consume + *out = r; + return src; + } + } + *out = 0; + return NULL; // invalid +} + + inline u32 +vi128_estimate_u64(const u64 v) +{ + static const u8 t[] = {10, + 9,9,9,9,9,9,9, 8,8,8,8,8,8,8, 7,7,7,7,7,7,7, + 6,6,6,6,6,6,6, 5,5,5,5,5,5,5, 4,4,4,4,4,4,4, + 3,3,3,3,3,3,3, 2,2,2,2,2,2,2, 1,1,1,1,1,1,1}; + return v ? t[__builtin_clzl(v)] : 2; +} + +// return ptr after the generated bytes + u8 * +vi128_encode_u64(u8 * dst, u64 v) +{ + switch (vi128_estimate_u64(v)) { + case 10: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 9: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 8: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 7: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 6: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 5: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 4: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 3: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 2: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 1: + *(dst++) = (u8)v; + break; + default: + debug_die(); + break; + } + return dst; +} + +// return ptr after the consumed bytes + const u8 * +vi128_decode_u64(const u8 * src, u64 * const out) +{ + u64 r = 0; + for (u32 shift = 0; shift < 64; shift += 7) { + const u8 byte = *(src++); + r |= (((u64)(byte & 0x7f)) << shift); + if ((byte & 0x80) == 0) { // No more bytes to consume + *out = r; + return src; + } + } + *out = 0; + return NULL; // invalid +} + +#undef FALLTHROUGH +// }}} vi128 + +// misc {{{ + inline struct entry13 +entry13(const u16 e1, const u64 e3) +{ + debug_assert((e3 >> 48) == 0); + return (struct entry13){.v64 = (e3 << 16) | e1}; +} + + inline void +entry13_update_e3(struct entry13 * const e, const u64 e3) +{ + debug_assert((e3 >> 48) == 0); + *e = entry13(e->e1, e3); +} + + inline void * +u64_to_ptr(const u64 v) +{ + return (void *)v; +} + + inline u64 +ptr_to_u64(const void * const ptr) +{ + return (u64)ptr; +} + +// portable malloc_usable_size + inline size_t +m_usable_size(void * const ptr) +{ +#if defined(__linux__) || defined(__FreeBSD__) + const size_t sz = malloc_usable_size(ptr); +#elif defined(__APPLE__) && defined(__MACH__) + const size_t sz = malloc_size(ptr); +#endif // OS + +#ifndef HEAPCHECKING + // valgrind and asan may return unaligned usable size + debug_assert((sz & 0x7lu) == 0); +#endif // HEAPCHECKING + + return sz; +} + + inline size_t +fdsize(const int fd) +{ + struct stat st; + st.st_size = 0; + if (fstat(fd, &st) != 0) + return 0; + + if (S_ISBLK(st.st_mode)) { +#if defined(__linux__) + ioctl(fd, BLKGETSIZE64, &st.st_size); +#elif defined(__APPLE__) && defined(__MACH__) + u64 blksz = 0; + u64 nblks = 0; + ioctl(fd, DKIOCGETBLOCKSIZE, &blksz); + ioctl(fd, DKIOCGETBLOCKCOUNT, &nblks); + st.st_size = (ssize_t)(blksz * nblks); +#elif defined(__FreeBSD__) + ioctl(fd, DIOCGMEDIASIZE, &st.st_size); +#endif // OS + } + + return (size_t)st.st_size; +} + + u32 +memlcp(const u8 * const p1, const u8 * const p2, const u32 max) +{ + const u32 max64 = max & (~7u); + u32 clen = 0; + while (clen < max64) { + const u64 v1 = *(const u64 *)(p1+clen); + const u64 v2 = *(const u64 *)(p2+clen); + const u64 x = v1 ^ v2; + if (x) + return clen + (u32)(__builtin_ctzl(x) >> 3); + + clen += sizeof(u64); + } + + if ((clen + sizeof(u32)) <= max) { + const u32 v1 = *(const u32 *)(p1+clen); + const u32 v2 = *(const u32 *)(p2+clen); + const u32 x = v1 ^ v2; + if (x) + return clen + (u32)(__builtin_ctz(x) >> 3); + + clen += sizeof(u32); + } + + while ((clen < max) && (p1[clen] == p2[clen])) + clen++; + return clen; +} + +static double logger_t0 = 0.0; + +__attribute__((constructor)) + static void +logger_init(void) +{ + logger_t0 = time_sec(); +} + +__attribute__ ((format (printf, 2, 3))) + void +logger_printf(const int fd, const char * const fmt, ...) +{ + char buf[4096]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + dprintf(fd, "%010.3lf %08x %s", time_diff_sec(logger_t0), crc32c_u64(0x12345678, (u64)pthread_self()), buf); +} +// }}} misc + +// astk {{{ +// atomic stack +struct acell { struct acell * next; }; + +// extract ptr from m value + static inline struct acell * +astk_ptr(const u64 m) +{ + return (struct acell *)(m >> 16); +} + +// calculate the new magic + static inline u64 +astk_m1(const u64 m0, struct acell * const ptr) +{ + return ((m0 + 1) & 0xfffflu) | (((u64)ptr) << 16); +} + +// calculate the new magic + static inline u64 +astk_m1_unsafe(struct acell * const ptr) +{ + return ((u64)ptr) << 16; +} + + static bool +astk_try_push(au64 * const pmagic, struct acell * const first, struct acell * const last) +{ + u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + last->next = astk_ptr(m0); + const u64 m1 = astk_m1(m0, first); + return atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_RELEASE, MO_RELAXED); +} + + static void +astk_push_safe(au64 * const pmagic, struct acell * const first, struct acell * const last) +{ + while (!astk_try_push(pmagic, first, last)); +} + + static void +astk_push_unsafe(au64 * const pmagic, struct acell * const first, + struct acell * const last) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + last->next = astk_ptr(m0); + const u64 m1 = astk_m1_unsafe(first); + atomic_store_explicit(pmagic, m1, MO_RELAXED); +} + +//// can fail for two reasons: (1) NULL: no available object; (2) ~0lu: contention +// static void * +//astk_try_pop(au64 * const pmagic) +//{ +// u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); +// struct acell * const ret = astk_ptr(m0); +// if (ret == NULL) +// return NULL; +// +// const u64 m1 = astk_m1(m0, ret->next); +// if (atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_ACQUIRE, MO_RELAXED)) +// return ret; +// else +// return (void *)(~0lu); +//} + + static void * +astk_pop_safe(au64 * const pmagic) +{ + do { + u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + struct acell * const ret = astk_ptr(m0); + if (ret == NULL) + return NULL; + + const u64 m1 = astk_m1(m0, ret->next); + if (atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_ACQUIRE, MO_RELAXED)) + return ret; + } while (true); +} + + static void * +astk_pop_unsafe(au64 * const pmagic) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + struct acell * const ret = astk_ptr(m0); + if (ret == NULL) + return NULL; + + const u64 m1 = astk_m1_unsafe(ret->next); + atomic_store_explicit(pmagic, m1, MO_RELAXED); + return (void *)ret; +} + + static void * +astk_peek_unsafe(au64 * const pmagic) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + return astk_ptr(m0); +} +// }}} astk + +// slab {{{ +#define SLAB_OBJ0_OFFSET ((64)) +struct slab { + au64 magic; // hi 48: ptr, lo 16: seq + u64 padding1[7]; + + // 2nd line + struct acell * head_active; // list of blocks in use or in magic + struct acell * head_backup; // list of unused full blocks + u64 nr_ready; // UNSAFE only! number of objects under magic + u64 padding2[5]; + + // 3rd line const + u64 obj_size; // const: aligned size of each object + u64 blk_size; // const: size of each memory block + u64 objs_per_slab; // const: number of objects in a slab + u64 obj0_offset; // const: offset of the first object in a block + u64 padding3[4]; + + // 4th line + union { + mutex lock; + u64 padding4[8]; + }; +}; +static_assert(sizeof(struct slab) == 256, "sizeof(struct slab) != 256"); + + static void +slab_add(struct slab * const slab, struct acell * const blk, const bool is_safe) +{ + // insert into head_active + blk->next = slab->head_active; + slab->head_active = blk; + + u8 * const base = ((u8 *)blk) + slab->obj0_offset; + struct acell * iter = (typeof(iter))base; // [0] + for (u64 i = 1; i < slab->objs_per_slab; i++) { + struct acell * const next = (typeof(next))(base + (i * slab->obj_size)); + iter->next = next; + iter = next; + } + + // base points to the first block; iter points to the last block + if (is_safe) { // other threads can poll magic + astk_push_safe(&slab->magic, (struct acell *)base, iter); + } else { // unsafe + astk_push_unsafe(&slab->magic, (struct acell *)base, iter); + slab->nr_ready += slab->objs_per_slab; + } +} + +// critical section; call with lock + static bool +slab_expand(struct slab * const slab, const bool is_safe) +{ + struct acell * const old = slab->head_backup; + if (old) { // pop old from backup and add + slab->head_backup = old->next; + slab_add(slab, old, is_safe); + } else { // more core + size_t blk_size; + struct acell * const new = pages_alloc_best(slab->blk_size, true, &blk_size); + (void)blk_size; + if (new == NULL) + return false; + + slab_add(slab, new, is_safe); + } + return true; +} + +// return 0 on failure; otherwise, obj0_offset + static u64 +slab_check_sizes(const u64 obj_size, const u64 blk_size) +{ + // obj must be non-zero and 8-byte aligned + // blk must be at least of page size and power of 2 + if ((!obj_size) || (obj_size % 8lu) || (blk_size < 4096lu) || (blk_size & (blk_size - 1))) + return 0; + + // each slab should have at least one object + const u64 obj0_offset = (obj_size & (obj_size - 1)) ? SLAB_OBJ0_OFFSET : obj_size; + if (obj0_offset >= blk_size || (blk_size - obj0_offset) < obj_size) + return 0; + + return obj0_offset; +} + + static void +slab_init_internal(struct slab * const slab, const u64 obj_size, const u64 blk_size, const u64 obj0_offset) +{ + memset(slab, 0, sizeof(*slab)); + slab->obj_size = obj_size; + slab->blk_size = blk_size; + slab->objs_per_slab = (blk_size - obj0_offset) / obj_size; + debug_assert(slab->objs_per_slab); // >= 1 + slab->obj0_offset = obj0_offset; + mutex_init(&(slab->lock)); +} + + struct slab * +slab_create(const u64 obj_size, const u64 blk_size) +{ + const u64 obj0_offset = slab_check_sizes(obj_size, blk_size); + if (!obj0_offset) + return NULL; + + struct slab * const slab = yalloc(sizeof(*slab)); + if (slab == NULL) + return NULL; + + slab_init_internal(slab, obj_size, blk_size, obj0_offset); + return slab; +} + +// unsafe + bool +slab_reserve_unsafe(struct slab * const slab, const u64 nr) +{ + while (slab->nr_ready < nr) + if (!slab_expand(slab, false)) + return false; + return true; +} + + void * +slab_alloc_unsafe(struct slab * const slab) +{ + void * ret = astk_pop_unsafe(&slab->magic); + if (ret == NULL) { + if (!slab_expand(slab, false)) + return NULL; + ret = astk_pop_unsafe(&slab->magic); + } + debug_assert(ret); + slab->nr_ready--; + return ret; +} + + void * +slab_alloc_safe(struct slab * const slab) +{ + void * ret = astk_pop_safe(&slab->magic); + if (ret) + return ret; + + mutex_lock(&slab->lock); + do { + ret = astk_pop_safe(&slab->magic); // may already have new objs + if (ret) + break; + if (!slab_expand(slab, true)) + break; + } while (true); + mutex_unlock(&slab->lock); + return ret; +} + + void +slab_free_unsafe(struct slab * const slab, void * const ptr) +{ + debug_assert(ptr); + astk_push_unsafe(&slab->magic, ptr, ptr); + slab->nr_ready++; +} + + void +slab_free_safe(struct slab * const slab, void * const ptr) +{ + astk_push_safe(&slab->magic, ptr, ptr); +} + +// UNSAFE + void +slab_free_all(struct slab * const slab) +{ + slab->magic = 0; + slab->nr_ready = 0; // backup does not count + + if (slab->head_active) { + struct acell * iter = slab->head_active; + while (iter->next) + iter = iter->next; + // now iter points to the last blk + iter->next = slab->head_backup; // active..backup + slab->head_backup = slab->head_active; // backup gets all + slab->head_active = NULL; // empty active + } +} + +// unsafe + u64 +slab_get_nalloc(struct slab * const slab) +{ + struct acell * iter = slab->head_active; + u64 n = 0; + while (iter) { + n++; + iter = iter->next; + } + n *= slab->objs_per_slab; + + iter = astk_peek_unsafe(&slab->magic); + while (iter) { + n--; + iter = iter->next; + } + return n; +} + + static void +slab_deinit(struct slab * const slab) +{ + debug_assert(slab); + struct acell * iter = slab->head_active; + while (iter) { + struct acell * const next = iter->next; + pages_unmap(iter, slab->blk_size); + iter = next; + } + iter = slab->head_backup; + while (iter) { + struct acell * const next = iter->next; + pages_unmap(iter, slab->blk_size); + iter = next; + } +} + + void +slab_destroy(struct slab * const slab) +{ + slab_deinit(slab); + free(slab); +} +// }}} slab + +// string {{{ +static union { u16 v16; u8 v8[2]; } strdec_table[100]; + +__attribute__((constructor)) + static void +strdec_init(void) +{ + for (u8 i = 0; i < 100; i++) { + const u8 hi = (typeof(hi))('0' + (i / 10)); + const u8 lo = (typeof(lo))('0' + (i % 10)); + strdec_table[i].v8[0] = hi; + strdec_table[i].v8[1] = lo; + } +} + +// output 10 bytes + void +strdec_32(void * const out, const u32 v) +{ + u32 vv = v; + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 4; i <= 4; i--) { // x5 + ptr[i] = strdec_table[vv % 100].v16; + vv /= 100u; + } +} + +// output 20 bytes + void +strdec_64(void * const out, const u64 v) +{ + u64 vv = v; + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 9; i <= 9; i--) { // x10 + ptr[i] = strdec_table[vv % 100].v16; + vv /= 100; + } +} + +static const u8 strhex_table_16[16] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + +#if defined(__x86_64__) + static inline m128 +strhex_helper(const u64 v) +{ + static const u8 mask1[16] = {15,7,14,6,13,5,12,4,11,3,10,2,9,1,8,0}; + + const m128 tmp = _mm_set_epi64x((s64)(v>>4), (s64)v); // mm want s64 + const m128 hilo = _mm_and_si128(tmp, _mm_set1_epi8(0xf)); + const m128 bin = _mm_shuffle_epi8(hilo, _mm_load_si128((void *)mask1)); + const m128 str = _mm_shuffle_epi8(_mm_load_si128((const void *)strhex_table_16), bin); + return str; +} +#elif defined(__aarch64__) + static inline m128 +strhex_helper(const u64 v) +{ + static const u8 mask1[16] = {15,7,14,6,13,5,12,4,11,3,10,2,9,1,8,0}; + u64 v2[2] = {v, v>>4}; + const m128 tmp = vld1q_u8((u8 *)v2); + const m128 hilo = vandq_u8(tmp, vdupq_n_u8(0xf)); + const m128 bin = vqtbl1q_u8(hilo, vld1q_u8(mask1)); + const m128 str = vqtbl1q_u8(vld1q_u8(strhex_table_16), bin); + return str; +} +#else +static u16 strhex_table_256[256]; + +__attribute__((constructor)) + static void +strhex_init(void) +{ + for (u64 i = 0; i < 256; i++) + strhex_table_256[i] = (((u16)strhex_table_16[i & 0xf]) << 8) | (strhex_table_16[i>>4]); +} +#endif // __x86_64__ + +// output 8 bytes + void +strhex_32(void * const out, u32 v) +{ +#if defined(__x86_64__) + const m128 str = strhex_helper((u64)v); + _mm_storel_epi64(out, _mm_srli_si128(str, 8)); +#elif defined(__aarch64__) + const m128 str = strhex_helper((u64)v); + vst1q_lane_u64(out, vreinterpretq_u64_u8(str), 1); +#else + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 0; i < 4; i++) { + ptr[3-i] = strhex_table_256[v & 0xff]; + v >>= 8; + } +#endif +} + +// output 16 bytes // buffer must be aligned by 16B + void +strhex_64(void * const out, u64 v) +{ +#if defined(__x86_64__) + const m128 str = strhex_helper(v); + _mm_storeu_si128(out, str); +#elif defined(__aarch64__) + const m128 str = strhex_helper(v); + vst1q_u8(out, str); +#else + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 0; i < 8; i++) { + ptr[7-i] = strhex_table_256[v & 0xff]; + v >>= 8; + } +#endif +} + +// string to u64 + inline u64 +a2u64(const void * const str) +{ + return strtoull(str, NULL, 10); +} + + inline u32 +a2u32(const void * const str) +{ + return (u32)strtoull(str, NULL, 10); +} + + inline s64 +a2s64(const void * const str) +{ + return strtoll(str, NULL, 10); +} + + inline s32 +a2s32(const void * const str) +{ + return (s32)strtoll(str, NULL, 10); +} + + void +str_print_hex(FILE * const out, const void * const data, const u32 len) +{ + const u8 * const ptr = data; + const u32 strsz = len * 3; + u8 * const buf = malloc(strsz); + for (u32 i = 0; i < len; i++) { + buf[i*3] = ' '; + buf[i*3+1] = strhex_table_16[ptr[i]>>4]; + buf[i*3+2] = strhex_table_16[ptr[i] & 0xf]; + } + fwrite(buf, strsz, 1, out); + free(buf); +} + + void +str_print_dec(FILE * const out, const void * const data, const u32 len) +{ + const u8 * const ptr = data; + const u32 strsz = len * 4; + u8 * const buf = malloc(strsz); + for (u32 i = 0; i < len; i++) { + const u8 v = ptr[i]; + buf[i*4] = ' '; + const u8 v1 = v / 100u; + const u8 v23 = v % 100u; + buf[i*4+1] = (u8)'0' + v1; + buf[i*4+2] = (u8)'0' + (v23 / 10u); + buf[i*4+3] = (u8)'0' + (v23 % 10u); + } + fwrite(buf, strsz, 1, out); + free(buf); +} + +// returns a NULL-terminated list of string tokens. +// After use you only need to free the returned pointer (char **). + char ** +strtoks(const char * const str, const char * const delim) +{ + if (str == NULL) + return NULL; + size_t nptr_alloc = 32; + char ** tokens = malloc(sizeof(tokens[0]) * nptr_alloc); + if (tokens == NULL) + return NULL; + const size_t bufsize = strlen(str) + 1; + char * const buf = malloc(bufsize); + if (buf == NULL) + goto fail_buf; + + memcpy(buf, str, bufsize); + char * saveptr = NULL; + char * tok = strtok_r(buf, delim, &saveptr); + size_t ntoks = 0; + while (tok) { + if (ntoks >= nptr_alloc) { + nptr_alloc += 32; + char ** const r = realloc(tokens, sizeof(tokens[0]) * nptr_alloc); + if (r == NULL) + goto fail_realloc; + + tokens = r; + } + tokens[ntoks] = tok; + ntoks++; + tok = strtok_r(NULL, delim, &saveptr); + } + tokens[ntoks] = NULL; + const size_t nptr = ntoks + 1; // append a NULL + const size_t rsize = (sizeof(tokens[0]) * nptr) + bufsize; + char ** const r = realloc(tokens, rsize); + if (r == NULL) + goto fail_realloc; + + tokens = r; + char * const dest = (char *)(&(tokens[nptr])); + memcpy(dest, buf, bufsize); + for (u64 i = 0; i < ntoks; i++) + tokens[i] += (dest - buf); + + free(buf); + return tokens; + +fail_realloc: + free(buf); +fail_buf: + free(tokens); + return NULL; +} + + u32 +strtoks_count(const char * const * const toks) +{ + if (!toks) + return 0; + u32 n = 0; + while (toks[n++]); + return n; +} +// }}} string + +// qsbr {{{ +#define QSBR_STATES_NR ((23)) // shard capacity; valid values are 3*8-1 == 23; 5*8-1 == 39; 7*8-1 == 55 +#define QSBR_SHARD_BITS ((5)) // 2^n shards +#define QSBR_SHARD_NR (((1u) << QSBR_SHARD_BITS)) +#define QSBR_SHARD_MASK ((QSBR_SHARD_NR - 1)) + +struct qsbr_ref_real { +#ifdef QSBR_DEBUG + pthread_t ptid; // 8 + u32 status; // 4 + u32 nbt; // 4 (number of backtrace frames) +#define QSBR_DEBUG_BTNR ((14)) + void * backtrace[QSBR_DEBUG_BTNR]; +#endif + au64 qstate; // user updates it + au64 * pptr; // internal only + struct qsbr_ref_real * park; +}; + +static_assert(sizeof(struct qsbr_ref) == sizeof(struct qsbr_ref_real), "sizeof qsbr_ref"); + +// Quiescent-State-Based Reclamation RCU +struct qsbr { + struct qsbr_ref_real target; + u64 padding0[5]; + struct qshard { + au64 bitmap; + au64 ptrs[QSBR_STATES_NR]; + } shards[QSBR_SHARD_NR]; +}; + + struct qsbr * +qsbr_create(void) +{ + struct qsbr * const q = yalloc(sizeof(*q)); + memset(q, 0, sizeof(*q)); + return q; +} + + static inline struct qshard * +qsbr_shard(struct qsbr * const q, void * const ptr) +{ + const u32 sid = crc32c_u64(0, (u64)ptr) & QSBR_SHARD_MASK; + debug_assert(sid < QSBR_SHARD_NR); + return &(q->shards[sid]); +} + + static inline void +qsbr_write_qstate(struct qsbr_ref_real * const ref, const u64 v) +{ + atomic_store_explicit(&ref->qstate, v, MO_RELAXED); +} + + bool +qsbr_register(struct qsbr * const q, struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + struct qshard * const shard = qsbr_shard(q, ref); + qsbr_write_qstate(ref, 0); + + do { + u64 bits = atomic_load_explicit(&shard->bitmap, MO_CONSUME); + const u32 pos = (u32)__builtin_ctzl(~bits); + if (unlikely(pos >= QSBR_STATES_NR)) + return false; + + const u64 bits1 = bits | (1lu << pos); + if (atomic_compare_exchange_weak_explicit(&shard->bitmap, &bits, bits1, MO_ACQUIRE, MO_RELAXED)) { + atomic_store_explicit(&shard->ptrs[pos], (u64)ref, MO_RELAXED); + //shard->ptrs[pos] = ref; + + ref->pptr = &(shard->ptrs[pos]); + ref->park = &q->target; +#ifdef QSBR_DEBUG + ref->ptid = (u64)pthread_self(); + ref->tid = 0; + ref->status = 1; + ref->nbt = backtrace(ref->backtrace, QSBR_DEBUG_BTNR); +#endif + return true; + } + } while (true); +} + + void +qsbr_unregister(struct qsbr * const q, struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + struct qshard * const shard = qsbr_shard(q, ref); + const u32 pos = (u32)(ref->pptr - shard->ptrs); + debug_assert(pos < QSBR_STATES_NR); + debug_assert(shard->bitmap & (1lu << pos)); + + atomic_store_explicit(&shard->ptrs[pos], (u64)(&q->target), MO_RELAXED); + //shard->ptrs[pos] = &q->target; + (void)atomic_fetch_and_explicit(&shard->bitmap, ~(1lu << pos), MO_RELEASE); +#ifdef QSBR_DEBUG + ref->tid = 0; + ref->ptid = 0; + ref->status = 0xffff; // unregistered + ref->nbt = 0; +#endif + ref->pptr = NULL; + // wait for qsbr_wait to leave if it's working on the shard + while (atomic_load_explicit(&shard->bitmap, MO_CONSUME) >> 63) + cpu_pause(); +} + + inline void +qsbr_update(struct qsbr_ref * const qref, const u64 v) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + debug_assert((*ref->pptr) == (u64)ref); // must be unparked + // rcu update does not require release or acquire order + qsbr_write_qstate(ref, v); +} + + inline void +qsbr_park(struct qsbr_ref * const qref) +{ + cpu_cfence(); + struct qsbr_ref_real * const ref = (typeof(ref))qref; + atomic_store_explicit(ref->pptr, (u64)ref->park, MO_RELAXED); +#ifdef QSBR_DEBUG + ref->status = 0xfff; // parked +#endif +} + + inline void +qsbr_resume(struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + atomic_store_explicit(ref->pptr, (u64)ref, MO_RELAXED); +#ifdef QSBR_DEBUG + ref->status = 0xf; // resumed +#endif + cpu_cfence(); +} + +// waiters needs external synchronization + void +qsbr_wait(struct qsbr * const q, const u64 target) +{ + cpu_cfence(); + qsbr_write_qstate(&q->target, target); + u64 cbits = 0; // check-bits; each bit corresponds to a shard + u64 bms[QSBR_SHARD_NR]; // copy of all bitmap + // take an unsafe snapshot of active users + for (u32 i = 0; i < QSBR_SHARD_NR; i++) { + bms[i] = atomic_load_explicit(&q->shards[i].bitmap, MO_CONSUME); + if (bms[i]) + cbits |= (1lu << i); // set to 1 if [i] has ptrs + } + + while (cbits) { + for (u64 ctmp = cbits; ctmp; ctmp &= (ctmp - 1)) { + // shard id + const u32 i = (u32)__builtin_ctzl(ctmp); + struct qshard * const shard = &(q->shards[i]); + const u64 bits1 = atomic_fetch_or_explicit(&(shard->bitmap), 1lu << 63, MO_ACQUIRE); + for (u64 bits = bms[i]; bits; bits &= (bits - 1)) { + const u64 bit = bits & -bits; // extract lowest bit + if ((bits1 & bit) == 0) { + bms[i] &= ~bit; + } else { + au64 * pptr = &(shard->ptrs[__builtin_ctzl(bit)]); + struct qsbr_ref_real * const ptr = (typeof(ptr))atomic_load_explicit(pptr, MO_RELAXED); + if (atomic_load_explicit(&(ptr->qstate), MO_CONSUME) == target) + bms[i] &= ~bit; + } + } + (void)atomic_fetch_and_explicit(&(shard->bitmap), ~(1lu << 63), MO_RELEASE); + if (bms[i] == 0) + cbits &= ~(1lu << i); + } +#if defined(CORR) + corr_yield(); +#endif + } + debug_assert(cbits == 0); + cpu_cfence(); +} + + void +qsbr_destroy(struct qsbr * const q) +{ + if (q) + free(q); +} +#undef QSBR_STATES_NR +#undef QSBR_BITMAP_NR +// }}} qsbr + +// vim:fdm=marker diff --git a/run/MassTrie-beta/wormhole/lib.h b/run/MassTrie-beta/wormhole/lib.h new file mode 100644 index 00000000..40a2f40d --- /dev/null +++ b/run/MassTrie-beta/wormhole/lib.h @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +// includes {{{ +// C headers +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// POSIX headers +#include +#include +#include + +// Linux headers +#include +#include +#include +#include + +// SIMD +#if defined(__x86_64__) +#include +#elif defined(__aarch64__) +#include +#include +#endif +// }}} includes + +#ifdef __cplusplus +extern "C" { +#endif + +// types {{{ +#ifndef typeof +#define typeof __typeof__ +#endif +#ifndef asm +#define asm __asm__ +#endif +typedef char s8; +typedef short s16; +typedef int s32; +typedef long s64; +typedef __int128_t s128; +static_assert(sizeof(s8) == 1, "sizeof(s8)"); +static_assert(sizeof(s16) == 2, "sizeof(s16)"); +static_assert(sizeof(s32) == 4, "sizeof(s32)"); +static_assert(sizeof(s64) == 8, "sizeof(s64)"); +static_assert(sizeof(s128) == 16, "sizeof(s128)"); + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long u64; +typedef __uint128_t u128; +static_assert(sizeof(u8) == 1, "sizeof(u8)"); +static_assert(sizeof(u16) == 2, "sizeof(u16)"); +static_assert(sizeof(u32) == 4, "sizeof(u32)"); +static_assert(sizeof(u64) == 8, "sizeof(u64)"); +static_assert(sizeof(u128) == 16, "sizeof(u128)"); + +#if defined(__x86_64__) +typedef __m128i m128; +#if defined(__AVX2__) +typedef __m256i m256; +#endif // __AVX2__ +#if defined(__AVX512F__) +typedef __m512i m512; +#endif // __AVX512F__ +#elif defined(__aarch64__) +typedef uint8x16_t m128; +#else +#error Need x86_64 or AArch64. +#endif +// }}} types + +// defs {{{ +#define likely(____x____) __builtin_expect(____x____, 1) +#define unlikely(____x____) __builtin_expect(____x____, 0) + +// ansi colors +// 3X:fg; 4X:bg; 9X:light fg; 10X:light bg; +// X can be one of the following colors: +// 0:black; 1:red; 2:green; 3:yellow; +// 4:blue; 5:magenta; 6:cyan; 7:white; +#define TERMCLR(____code____) "\x1b[" #____code____ "m" +// }}} defs + +// const {{{ +#define PGBITS ((12)) +#define PGSZ ((1lu << PGBITS)) +// }}} const + +// math {{{ + extern u64 +mhash64(const u64 v); + + extern u32 +mhash32(const u32 v); + + extern u64 +gcd64(u64 a, u64 b); +// }}} math + +// random {{{ + extern u64 +random_u64(void); + + extern void +srandom_u64(const u64 seed); + + extern double +random_double(void); +// }}} random + +// timing {{{ + extern u64 +time_nsec(void); + + extern double +time_sec(void); + + extern u64 +time_diff_nsec(const u64 last); + + extern double +time_diff_sec(const double last); + + extern void +time_stamp(char * str, const size_t size); + + extern void +time_stamp2(char * str, const size_t size); +// }}} timing + +// cpucache {{{ + extern void +cpu_pause(void); + + extern void +cpu_mfence(void); + + extern void +cpu_cfence(void); + + extern void +cpu_prefetch0(const void * const ptr); + + extern void +cpu_prefetch1(const void * const ptr); + + extern void +cpu_prefetch2(const void * const ptr); + + extern void +cpu_prefetch3(const void * const ptr); + + extern void +cpu_prefetchw(const void * const ptr); +// }}} cpucache + +// crc32c {{{ + extern u32 +crc32c_u8(const u32 crc, const u8 v); + + extern u32 +crc32c_u16(const u32 crc, const u16 v); + + extern u32 +crc32c_u32(const u32 crc, const u32 v); + + extern u32 +crc32c_u64(const u32 crc, const u64 v); + +// 1 <= nr <= 3 + extern u32 +crc32c_inc_123(const u8 * buf, u32 nr, u32 crc); + +// nr % 4 == 0 + extern u32 +crc32c_inc_x4(const u8 * buf, u32 nr, u32 crc); + + extern u32 +crc32c_inc(const u8 * buf, u32 nr, u32 crc); +// }}} crc32c + +// debug {{{ + extern void +debug_break(void); + + extern void +debug_backtrace(void); + + extern void +watch_u64_usr1(u64 * const ptr); + +#ifndef NDEBUG + extern void +debug_assert(const bool v); +#else +#define debug_assert(expr) ((void)0) +#endif + +__attribute__((noreturn)) + extern void +debug_die(void); + +__attribute__((noreturn)) + extern void +debug_die_perror(void); + + extern void +debug_dump_maps(FILE * const out); + + extern bool +debug_perf_switch(void); +// }}} debug + +// mm {{{ +#ifdef ALLOCFAIL + extern bool +alloc_fail(void); +#endif + + extern void * +xalloc(const size_t align, const size_t size); + + extern void * +yalloc(const size_t size); + + extern void ** +malloc_2d(const size_t nr, const size_t size); + + extern void ** +calloc_2d(const size_t nr, const size_t size); + + extern void +pages_unmap(void * const ptr, const size_t size); + + extern void +pages_lock(void * const ptr, const size_t size); + +/* hugepages */ +// force posix allocators: -DVALGRIND_MEMCHECK + extern void * +pages_alloc_4kb(const size_t nr_4kb); + + extern void * +pages_alloc_2mb(const size_t nr_2mb); + + extern void * +pages_alloc_1gb(const size_t nr_1gb); + + extern void * +pages_alloc_best(const size_t size, const bool try_1gb, u64 * const size_out); +// }}} mm + +// process/thread {{{ + extern void +thread_get_name(const pthread_t pt, char * const name, const size_t len); + + extern void +thread_set_name(const pthread_t pt, const char * const name); + + extern long +process_get_rss(void); + + extern u32 +process_affinity_count(void); + + extern u32 +process_getaffinity_list(const u32 max, u32 * const cores); + + extern void +thread_setaffinity_list(const u32 nr, const u32 * const list); + + extern void +thread_pin(const u32 cpu); + + extern u64 +process_cpu_time_usec(void); + +// if args == true, argx is void ** +// if args == false, argx is void * + extern u64 +thread_fork_join(u32 nr, void *(*func) (void *), const bool args, void * const argx); + + extern int +thread_create_at(const u32 cpu, pthread_t * const thread, void *(*start_routine) (void *), void * const arg); +// }}} process/thread + +// locking {{{ +typedef union { + u32 opaque; +} spinlock; + + extern void +spinlock_init(spinlock * const lock); + + extern void +spinlock_lock(spinlock * const lock); + + extern bool +spinlock_trylock(spinlock * const lock); + + extern void +spinlock_unlock(spinlock * const lock); + +typedef union { + u32 opaque; +} rwlock; + + extern void +rwlock_init(rwlock * const lock); + + extern bool +rwlock_trylock_read(rwlock * const lock); + +// low-priority reader-lock; use with trylock_write_hp + extern bool +rwlock_trylock_read_lp(rwlock * const lock); + + extern bool +rwlock_trylock_read_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_read(rwlock * const lock); + + extern void +rwlock_unlock_read(rwlock * const lock); + + extern bool +rwlock_trylock_write(rwlock * const lock); + + extern bool +rwlock_trylock_write_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_write(rwlock * const lock); + +// writer has higher priority; new readers are blocked + extern bool +rwlock_trylock_write_hp(rwlock * const lock); + + extern bool +rwlock_trylock_write_hp_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_write_hp(rwlock * const lock); + + extern void +rwlock_unlock_write(rwlock * const lock); + + extern void +rwlock_write_to_read(rwlock * const lock); + +typedef union { + u64 opqaue[8]; +} mutex; + + extern void +mutex_init(mutex * const lock); + + extern void +mutex_lock(mutex * const lock); + + extern bool +mutex_trylock(mutex * const lock); + + extern void +mutex_unlock(mutex * const lock); + + extern void +mutex_deinit(mutex * const lock); +// }}} locking + +// coroutine {{{ +extern u64 co_switch_stack(u64 * const saversp, const u64 newrsp, const u64 retval); + +struct co; + + extern struct co * +co_create(const u64 stacksize, void * func, void * priv, u64 * const host); + + extern void +co_reuse(struct co * const co, void * func, void * priv, u64 * const host); + + extern struct co * +co_fork(void * func, void * priv); + + extern void * +co_priv(void); + + extern u64 +co_enter(struct co * const to, const u64 retval); + + extern u64 +co_switch_to(struct co * const to, const u64 retval); + + extern u64 +co_back(const u64 retval); + + extern void +co_exit(const u64 retval); + + extern bool +co_valid(struct co * const co); + + extern struct co * +co_self(void); + + extern void +co_destroy(struct co * const co); + +struct corr; + + extern struct corr * +corr_create(const u64 stacksize, void * func, void * priv, u64 * const host); + + extern struct corr * +corr_link(const u64 stacksize, void * func, void * priv, struct corr * const prev); + + extern void +corr_reuse(struct corr * const co, void * func, void * priv, u64 * const host); + + extern void +corr_relink(struct corr * const co, void * func, void * priv, struct corr * const prev); + + extern void +corr_enter(struct corr * const co); + + extern void +corr_yield(void); + + extern void +corr_exit(void); + + extern void +corr_destroy(struct corr * const co); +// }}} coroutine + +// bits {{{ + extern u32 +bits_reverse_u32(const u32 v); + + extern u64 +bits_reverse_u64(const u64 v); + + extern u64 +bits_rotl_u64(const u64 v, const u8 n); + + extern u64 +bits_rotr_u64(const u64 v, const u8 n); + + extern u32 +bits_rotl_u32(const u32 v, const u8 n); + + extern u32 +bits_rotr_u32(const u32 v, const u8 n); + + extern u64 +bits_p2_up_u64(const u64 v); + + extern u32 +bits_p2_up_u32(const u32 v); + + extern u64 +bits_p2_down_u64(const u64 v); + + extern u32 +bits_p2_down_u32(const u32 v); + + extern u64 +bits_round_up(const u64 v, const u8 power); + + extern u64 +bits_round_up_a(const u64 v, const u64 a); + + extern u64 +bits_round_down(const u64 v, const u8 power); + + extern u64 +bits_round_down_a(const u64 v, const u64 a); +// }}} bits + +// vi128 {{{ + extern u32 +vi128_estimate_u32(const u32 v); + + extern u8 * +vi128_encode_u32(u8 * dst, u32 v); + + extern const u8 * +vi128_decode_u32(const u8 * src, u32 * const out); + + extern u32 +vi128_estimate_u64(const u64 v); + + extern u8 * +vi128_encode_u64(u8 * dst, u64 v); + + extern const u8 * +vi128_decode_u64(const u8 * src, u64 * const out); +// }}} vi128 + +// misc {{{ +// TODO: only works on little endian? +struct entry13 { // what a beautiful name + union { + u16 e1; + struct { // easy for debugging + u64 e1_64:16; + u64 e3:48; + }; + u64 v64; + void * ptr; + }; +}; + +static_assert(sizeof(struct entry13) == 8, "sizeof(entry13) != 8"); + +// directly access read .e1 and .e3 +// directly write .e1 +// use entry13_update() to update the entire entry + + extern struct entry13 +entry13(const u16 e1, const u64 e3); + + extern void +entry13_update_e3(struct entry13 * const e, const u64 e3); + + extern void * +u64_to_ptr(const u64 v); + + extern u64 +ptr_to_u64(const void * const ptr); + + extern size_t +m_usable_size(void * const ptr); + + extern size_t +fdsize(const int fd); + + extern u32 +memlcp(const u8 * const p1, const u8 * const p2, const u32 max); + +__attribute__ ((format (printf, 2, 3))) + extern void +logger_printf(const int fd, const char * const fmt, ...); +// }}} misc + +// slab {{{ +struct slab; + + extern struct slab * +slab_create(const u64 obj_size, const u64 blk_size); + + extern bool +slab_reserve_unsafe(struct slab * const slab, const u64 nr); + + extern void * +slab_alloc_unsafe(struct slab * const slab); + + extern void * +slab_alloc_safe(struct slab * const slab); + + extern void +slab_free_unsafe(struct slab * const slab, void * const ptr); + + extern void +slab_free_safe(struct slab * const slab, void * const ptr); + + extern void +slab_free_all(struct slab * const slab); + + extern u64 +slab_get_nalloc(struct slab * const slab); + + extern void +slab_destroy(struct slab * const slab); +// }}} slab + +// string {{{ +// XXX strdec_ and strhex_ functions does not append the trailing '\0' to the output string +// size of out should be >= 10 + extern void +strdec_32(void * const out, const u32 v); + +// size of out should be >= 20 + extern void +strdec_64(void * const out, const u64 v); + +// size of out should be >= 8 + extern void +strhex_32(void * const out, const u32 v); + +// size of out should be >= 16 + extern void +strhex_64(void * const out, const u64 v); + + extern u64 +a2u64(const void * const str); + + extern u32 +a2u32(const void * const str); + + extern s64 +a2s64(const void * const str); + + extern s32 +a2s32(const void * const str); + + extern void +str_print_hex(FILE * const out, const void * const data, const u32 len); + + extern void +str_print_dec(FILE * const out, const void * const data, const u32 len); + +// user should free returned ptr (and nothing else) after use + extern char ** +strtoks(const char * const str, const char * const delim); + + extern u32 +strtoks_count(const char * const * const toks); +// }}} string + +// qsbr {{{ +// QSBR vs EBR (Quiescent-State vs Epoch Based Reclaimation) +// QSBR: readers just use qsbr_update -> qsbr_update -> ... repeatedly +// EBR: readers use qsbr_update -> qsbr_park -> qsbr_resume -> qsbr_update -> ... +// The advantage of EBR is qsbr_park can happen much earlier than the next qsbr_update +// The disadvantage is the extra cost, a pair of park/resume is used in every iteration +struct qsbr; +struct qsbr_ref { +#ifdef QSBR_DEBUG + u64 debug[16]; +#endif + u64 opaque[3]; +}; + + extern struct qsbr * +qsbr_create(void); + +// every READER accessing the shared data must first register itself with the qsbr + extern bool +qsbr_register(struct qsbr * const q, struct qsbr_ref * const qref); + + extern void +qsbr_unregister(struct qsbr * const q, struct qsbr_ref * const qref); + +// For READER: mark the beginning of critical section; like rcu_read_lock() + extern void +qsbr_update(struct qsbr_ref * const qref, const u64 v); + +// temporarily stop access the shared data to avoid blocking writers +// READER can use qsbr_park (like rcu_read_unlock()) in conjunction with qsbr_update +// qsbr_park is roughly equivalent to qsbr_unregister, but faster + extern void +qsbr_park(struct qsbr_ref * const qref); + +// undo the effect of qsbr_park; must use it between qsbr_park and qsbr_update +// qsbr_resume is roughly equivalent to qsbr_register, but faster + extern void +qsbr_resume(struct qsbr_ref * const qref); + +// WRITER: wait until all the readers have announced v=target with qsbr_update + extern void +qsbr_wait(struct qsbr * const q, const u64 target); + + extern void +qsbr_destroy(struct qsbr * const q); +// }}} qsbr + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/run/MassTrie-beta/wormhole/libwh.so b/run/MassTrie-beta/wormhole/libwh.so new file mode 100644 index 00000000..2ecd7e7e Binary files /dev/null and b/run/MassTrie-beta/wormhole/libwh.so differ diff --git a/run/MassTrie-beta/wormhole/stresstest.c b/run/MassTrie-beta/wormhole/stresstest.c new file mode 100644 index 00000000..93fb6f05 --- /dev/null +++ b/run/MassTrie-beta/wormhole/stresstest.c @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2016-2020 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +#include "lib.h" +#include "kv.h" +#include "wh.h" +#include "ctypes.h" + +struct stress_info { + u64 nkeys; + u32 nloader; + u32 nunldr; + u32 nth; + u32 cpt; + bool has_iter; + + au64 seqno; + struct kv ** keys; + + const struct kvmap_api * api; + void * map; + au64 tot; + au64 wfail; + u64 endtime; +}; + + static void * +stress_load_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + srandom_u64(time_nsec() * time_nsec() / time_nsec()); + void * const ref = kvmap_ref(si->api, si->map); + const u64 seq = atomic_fetch_add(&si->seqno, 1); + const u64 n0 = si->nkeys / si->nloader * seq; + const u64 nz = (seq == (si->nloader - 1)) ? si->nkeys : (si->nkeys / si->nloader * (seq + 1)); + //printf("load worker %lu %lu\n", n0, nz-1); + + char * buf = malloc(128); + debug_assert(buf); + u64 * buf64 = (typeof(buf64))buf; + for (u64 i = n0; i < nz; i++) { + const u32 klen = (u32)(random_u64() & 0x3flu) + 8; + const u32 klen8 = (klen + 7) >> 3; + /* + buf64[0] = bswap_64(i); // little endian + for (u64 j = 1; j < klen8; j++) + buf64[j] = random_u64(); + */ + const u64 rkey = random_u64(); + for (u32 j = 0; j < klen8; j++) + buf64[j] = (rkey >> j) & 0x0101010101010101lu; + + si->keys[i] = kv_create(buf, klen, buf, 8); + if (si->keys[i] == NULL) + exit(0); + kvmap_kv_put(si->api, ref, si->keys[i]); + } + free(buf); + kvmap_unref(si->api, ref); + return NULL; +} + + static void * +stress_unload_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + const u64 seq = atomic_fetch_add(&si->seqno, 1); + const u64 n0 = si->nkeys / si->nunldr * seq; + const u64 nz = (seq == (si->nunldr - 1)) ? si->nkeys : (si->nkeys / si->nunldr * (seq + 1)); + + void * const ref = kvmap_ref(si->api, si->map); + for (u64 i = n0; i < nz; i++) { + kvmap_kv_del(si->api, ref, si->keys[i]); + free(si->keys[i]); + } + kvmap_unref(si->api, ref); + return NULL; +} + + static void +stress_inp_plus1(struct kv * const kv0, void * const priv) +{ + (void)priv; + if (kv0) { // can be NULL + u64 * ptr = kv_vptr(kv0); + ++(*ptr); + } +} + + static struct kv * +stress_merge_plus1(struct kv * const kv0, void * const priv) +{ + (void)priv; + if (kv0) { // can be NULL + u64 * ptr = kv_vptr(kv0); + ++(*ptr); + return kv0; + } else { + u64 * ptr = kv_vptr((struct kv *)priv); + *ptr = 0; + return priv; + } +} + + static void +stress_func(struct stress_info * const si) +{ + srandom_u64(time_nsec() * time_nsec() / time_nsec()); + const struct kvmap_api * const api = si->api; + void * ref = kvmap_ref(api, si->map); + struct kv * next = si->keys[random_u64() % si->nkeys]; + u64 rnext = random_u64() % si->nkeys; + struct kv * const tmp = malloc(128); + struct kref tmpkref; + struct kvref tmpkvref; + debug_assert(tmp); + void * iter = NULL; + if (api->iter_park) { + iter = api->iter_create(ref); + api->iter_park(iter); + } + u64 wfail1 = 0; + u64 nops = 0; +#define BATCHSIZE ((4096)) + do { + for (u64 i = 0; i < BATCHSIZE; i++) { + // reading kv keys leads to unnecessary cache misses + // use prefetch to minimize overhead on workload generation + struct kv * const key = next; + next = si->keys[rnext]; + cpu_prefetch0(next); + cpu_prefetch0(((u8 *)next) + 64); + rnext = random_u64() % si->nkeys; + cpu_prefetch0(&(si->keys[rnext])); + + // do probe + // customize your benchmark: do a mix of wh operations with switch-cases + const u64 r = random_u64() % 16; + switch (r) { + case 0: + kvmap_kv_probe(api, ref, key); + break; + case 1: + kvmap_kv_get(api, ref, key, tmp); + break; + case 2: + if (si->has_iter) { + if (api->iter_park == NULL) + iter = api->iter_create(ref); + debug_assert(iter); + kvmap_kv_iter_seek(api, iter, key); + api->iter_next(iter, tmp); + api->iter_peek(iter, tmp); + api->iter_skip(iter, 2); + // this is unsafe; only reader's lock is acquired + if (api->iter_inp) + api->iter_inp(iter, stress_inp_plus1, NULL); + // kref + if (api->iter_kref) + api->iter_kref(iter, &tmpkref); + // kvref + if (api->iter_kvref) + api->iter_kvref(iter, &tmpkvref); + // done + if (api->iter_park) + api->iter_park(iter); + else + api->iter_destroy(iter); + } + break; + case 3: + if (api->refpark) { + api->park(ref); + api->resume(ref); + } + break; + case 4: + if (api->iter_park) + api->iter_destroy(iter); + (void)kvmap_unref(api, ref); + ref = kvmap_ref(api, si->map); + if (api->iter_park) + iter = api->iter_create(ref); + break; + case 5: + if (api->merge) { + kv_dup2_key(key, tmp); + tmp->vlen = 8; + kvmap_kv_merge(api, ref, key, stress_merge_plus1, tmp); + } + break; + case 6: + if ((random_u64() & 0x7fffu) == 0x22 && api->delr) + (void)kvmap_kv_delr(api, ref, si->keys[rnext], (rnext + 10) < si->nkeys ? si->keys[rnext + 10] : NULL); + else + kvmap_kv_probe(api, ref, key); + break; + case 7: case 8: case 9: + (void)kvmap_kv_del(api, ref, key); + break; + case 10: case 11: + if (api->inpw) + kvmap_kv_inpw(api, ref, key, stress_inp_plus1, NULL); + break; + case 12: case 13: case 14: case 15: + if (!kvmap_kv_put(api, ref, key)) + wfail1++; + break; + default: + break; + } + } + nops += BATCHSIZE; + } while (time_nsec() < si->endtime); + si->wfail += wfail1; + if (api->iter_park) + api->iter_destroy(iter); + kvmap_unref(api, ref); + free(tmp); + si->tot += nops; +} + + static void +stress_co_worker(void) +{ + struct stress_info * const si = (typeof(si))co_priv(); + debug_assert(si); + stress_func(si); +} + + static void * +stress_thread_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + if (si->cpt) { + u64 hostrsp = 0; + struct corr * crs[32]; + do { // to work smoothly with ALLOCFAIL + crs[0] = corr_create(16*PGSZ, stress_co_worker, si, &hostrsp); + } while (crs[0] == NULL); + for (u32 j = 1; j < si->cpt; j++) { + do { // to work smoothly with ALLOCFAIL + crs[j] = corr_link(16*PGSZ, stress_co_worker, si, crs[j-1]); + } while (crs[j] == NULL); + } + + corr_enter(crs[0]); + for (u32 j = 0; j < si->cpt; j++) + corr_destroy(crs[j]); + } else { + stress_func(si); + } + return NULL; +} + + int +main(int argc, char ** argv) +{ + struct stress_info si = {.nkeys = 10000, .nloader = 1, .nunldr = 1, .nth = 1, .cpt = 0}; + argc--; + argv++; + int n = -1; + if ((n = kvmap_api_helper(argc, argv, NULL, &si.api, &si.map)) < 0) { + fprintf(stderr, "usage: api ... [<#keys>=10000 [<#load-threads>=1 [<#unload-threads>=1 [<#threads>=1 [<#co-per-thread>=0 (disabled) [=1 [=1]]]]]]]\n"); + kvmap_api_helper_message(); + exit(0); + } + argc -= n; + argv += n; + + const bool has_point = si.api->get && si.api->probe && si.api->del && si.api->put; + if (!has_point) { + fprintf(stderr, "api not supported\n"); + exit(0); + } + if (!si.api->inpw) + fprintf(stderr, "api->inpw function not found: ignored\n"); + if (!si.api->merge) + fprintf(stderr, "api->merge function not found: ignored\n"); + if (!si.api->delr) + fprintf(stderr, "api->delr function not found: ignored\n"); + + si.has_iter = si.api->iter_create && si.api->iter_seek && si.api->iter_peek && + si.api->iter_skip && si.api->iter_next && si.api->iter_destroy; + if (!si.has_iter) + fprintf(stderr, "iter functions not complete: ignored\n"); + + // generate keys + if (argc >= 1) + si.nkeys = a2u64(argv[0]); + si.keys = malloc(sizeof(struct kv *) * si.nkeys); + debug_assert(si.keys); + if (argc >= 2) + si.nloader = a2u32(argv[1]); + if (argc >= 3) + si.nunldr = a2u32(argv[2]); + if (argc >= 4) + si.nth = a2u32(argv[3]); + if (argc >= 5) + si.cpt = a2u32(argv[4]); + if (si.cpt > 32) + si.cpt = 32; +#if !defined(CORR) + if (si.cpt > 1) + fprintf(stderr, TERMCLR(35) "CORR not enabled. Compile with -DCORR to enable it.\n" TERMCLR(0)); +#endif // CORR + const u64 nr = (argc >= 6) ? a2u64(argv[5]) : 1; // default 1 + const u64 ne = (argc >= 7) ? a2u64(argv[6]) : 1; // default 1 + printf("stresstest: nkeys %lu ldr %u uldr %u th %u cpt %u r %lu e %lu\n", + si.nkeys, si.nloader, si.nunldr, si.nth, si.cpt, nr, ne); + + for (u64 e = 0; e < ne; e++) { + si.seqno = 0; + const u64 dtl = thread_fork_join(si.nloader, (void *)stress_load_worker, false, &si); + printf("load th %u mops %.2lf\n", si.nloader, ((double)si.nkeys) * 1e3 / ((double)dtl)); + if (si.api->fprint) + si.api->fprint(si.map, stdout); + + debug_perf_switch(); + for (u64 r = 0; r < nr; r++) { + si.tot = 0; + si.wfail = 0; + si.endtime = time_nsec() + 2000000000lu; + const u64 dt = thread_fork_join(si.nth, (void *)stress_thread_worker, false, &si); + const double mops = ((double)si.tot) * 1e3 / ((double)dt); + char ts[64]; + time_stamp(ts, 64); + const long rss = process_get_rss(); + printf("%s e %lu r %lu th %u cpt %u tot %lu mops %.2lf rss %ldkB wfail %lu\n", + ts, e, r, si.nth, si.cpt, si.tot, mops, rss, si.wfail); + debug_perf_switch(); + } + si.seqno = 0; + if (si.nunldr == 0) { // use clean + const u64 t0 = time_nsec(); + si.api->clean(si.map); + const u64 dtu = time_diff_nsec(t0); + for (u64 i = 0; i < si.nkeys; i++) + free(si.keys[i]); + printf("clean mops %.2lf\n", ((double)si.nkeys) *1e3 / ((double)dtu)); + } else { + const u64 dtu = thread_fork_join(si.nunldr, (void *)stress_unload_worker, false, &si); + printf("unload th %u mops %.2lf\n", si.nunldr, ((double)si.nkeys) *1e3 / ((double)dtu)); + } + } + + free(si.keys); + si.api->destroy(si.map); + return 0; +} diff --git a/run/MassTrie-beta/wormhole/stresstest.out b/run/MassTrie-beta/wormhole/stresstest.out new file mode 100644 index 00000000..874d359c Binary files /dev/null and b/run/MassTrie-beta/wormhole/stresstest.out differ diff --git a/run/MassTrie-beta/wormhole/wh.c b/run/MassTrie-beta/wormhole/wh.c new file mode 100644 index 00000000..1d31e231 --- /dev/null +++ b/run/MassTrie-beta/wormhole/wh.c @@ -0,0 +1,3876 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include // static_assert +#include "lib.h" +#include "ctypes.h" +#include "kv.h" +#include "wh.h" +// }}} headers + +// def {{{ +#define WH_HMAPINIT_SIZE ((1u << 12)) // 10: 16KB/64KB 12: 64KB/256KB 14: 256KB/1MB +#define WH_SLABMETA_SIZE ((1lu << 21)) // 2MB + +#ifndef HEAPCHECKING +#define WH_SLABLEAF_SIZE ((1lu << 21)) // 2MB is ok +#else +#define WH_SLABLEAF_SIZE ((1lu << 21)) // 2MB for valgrind +#endif + +#define WH_KPN ((128u)) // keys per node; power of 2 +#define WH_HDIV (((1u << 16)) / WH_KPN) +#define WH_MID ((WH_KPN >> 1)) // ideal cut point for split, the closer the better +#define WH_BKT_NR ((8)) +#define WH_KPN2 ((WH_KPN + WH_KPN)) + +#define WH_KPN_MRG (((WH_KPN + WH_MID) >> 1 )) // 3/4 + +// FO is fixed at 256. Don't change it +#define WH_FO ((256u)) // index fan-out +// number of bits in a bitmap +#define WH_BMNR ((WH_FO >> 6)) // number of u64 +// }}} def + +// struct {{{ +struct wormmeta { + struct entry13 k13; // kref+klen + struct entry13 l13; // lmost+bitmin+bitmax + struct entry13 r13; // rmost+hash32_lo + struct entry13 p13; // lpath+hash32_hi + u64 bitmap[0]; // 4 if bitmin != bitmax +}; +static_assert(sizeof(struct wormmeta) == 32, "sizeof(wormmeta) != 32"); + +struct wormkv64 { u64 key; void * ptr; }; // u64 keys (whu64) + +struct wormleaf { + // first line + rwlock leaflock; + spinlock sortlock; // to protect the seemingly "read-only" iter_seek + au64 lv; // version (dont use the first u64) + struct wormleaf * prev; // prev leaf + struct wormleaf * next; // next leaf + struct kv * anchor; + + u32 nr_sorted; + u32 nr_keys; + u64 reserved[2]; + + struct entry13 hs[WH_KPN]; // sorted by hashes + u8 ss[WH_KPN]; // sorted by keys +}; + +struct wormslot { u16 t[WH_BKT_NR]; }; +static_assert(sizeof(struct wormslot) == 16, "sizeof(wormslot) != 16"); + +struct wormmbkt { struct wormmeta * e[WH_BKT_NR]; }; +static_assert(sizeof(struct wormmbkt) == 64, "sizeof(wormmbkt) != 64"); + +struct wormhmap { + au64 hv; + struct wormslot * wmap; + struct wormmbkt * pmap; + u32 mask; + u32 maxplen; + u64 msize; + + struct slab * slab1; + struct slab * slab2; + struct kv * pbuf; +}; +static_assert(sizeof(struct wormhmap) == 64, "sizeof(wormhmap) != 64"); + +struct wormhole { + // 1 line + union { + au64 hmap_ptr; // safe + struct wormhmap * hmap; // unsafe + }; + u64 padding0[6]; + struct wormleaf * leaf0; // usually not used + // 1 line + struct kvmap_mm mm; + struct qsbr * qsbr; + struct slab * slab_leaf; + struct kv * pbuf; + u32 leaftype; + u32 padding1; + // 2 lines + struct wormhmap hmap2[2]; + // fifth line + rwlock metalock; + u32 padding2[15]; +}; + +struct wormhole_iter { + struct wormref * ref; // safe-iter only + struct wormhole * map; + struct wormleaf * leaf; + u32 is; +}; + +struct wormref { + struct wormhole * map; + struct qsbr_ref qref; +}; +// }}} struct + +// helpers {{{ + +// meta {{{ + static inline struct kv * +wormmeta_keyref_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->k13.e3); +} + + static inline u16 +wormmeta_klen_load(const struct wormmeta * const meta) +{ + return meta->k13.e1; +} + + static inline struct wormleaf * +wormmeta_lmost_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->l13.e3 & (~0x3flu)); +} + + static inline u32 +wormmeta_bitmin_load(const struct wormmeta * const meta) +{ + return (u32)(meta->l13.v64 & 0x1fflu); +} + + static inline u32 +wormmeta_bitmax_load(const struct wormmeta * const meta) +{ + return (u32)((meta->l13.v64 >> 9) & 0x1fflu); +} + + static inline u32 +wormmeta_hash32_load(const struct wormmeta * const meta) +{ + return ((u32)meta->r13.e1) | (((u32)meta->p13.e1) << 16); +} + + static inline struct wormleaf * +wormmeta_rmost_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->r13.e3); +} + + static inline struct wormleaf * +wormmeta_lpath_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->p13.e3); +} + +// internal + static inline void +wormmeta_lpath_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + entry13_update_e3(&meta->p13, ptr_to_u64(leaf)); +} + +// also updates leaf_klen_eq and + static inline void +wormmeta_lmost_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + const u64 minmax = meta->l13.v64 & 0x3fffflu; + meta->l13.v64 = (((u64)leaf) << 16) | minmax; + + const bool leaf_klen_eq = leaf->anchor->klen == wormmeta_klen_load(meta); + wormmeta_lpath_store(meta, leaf_klen_eq ? leaf : leaf->prev); +} + + static inline void +wormmeta_bitmin_store(struct wormmeta * const meta, const u32 bitmin) +{ + meta->l13.v64 = (meta->l13.v64 & (~0x1fflu)) | bitmin; +} + + static inline void +wormmeta_bitmax_store(struct wormmeta * const meta, const u32 bitmax) +{ + meta->l13.v64 = (meta->l13.v64 & (~0x3fe00lu)) | (bitmax << 9); +} + + static inline void +wormmeta_rmost_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + entry13_update_e3(&meta->r13, ptr_to_u64(leaf)); +} + +// for wormmeta_alloc + static void +wormmeta_init(struct wormmeta * const meta, struct wormleaf * const lrmost, + struct kv * const keyref, const u32 alen, const u32 bit) +{ + keyref->refcnt++; // shared + + const u32 plen = keyref->klen; + debug_assert(plen <= UINT16_MAX); + meta->k13 = entry13((u16)plen, ptr_to_u64(keyref)); + meta->l13.v64 = (ptr_to_u64(lrmost) << 16) | (bit << 9) | bit; + + const u32 hash32 = keyref->hashlo; + meta->r13 = entry13((u16)hash32, ptr_to_u64(lrmost)); + + const bool leaf_klen_eq = alen == plen; + meta->p13 = entry13((u16)(hash32 >> 16), ptr_to_u64(leaf_klen_eq ? lrmost : lrmost->prev)); +} +// }}} meta + +// meta-bitmap {{{ + static inline bool +wormmeta_bm_test(const struct wormmeta * const meta, const u32 id) +{ + debug_assert(id < WH_FO); + const u32 bitmin = wormmeta_bitmin_load(meta); + const u32 bitmax = wormmeta_bitmax_load(meta); + if (bitmin == bitmax) { // half node + return bitmin == id; + } else { // full node + return (bool)((meta->bitmap[id >> 6u] >> (id & 0x3fu)) & 1lu); + } +} + +// meta must be a full node + static void +wormmeta_bm_set(struct wormmeta * const meta, const u32 id) +{ + // need to replace meta + u64 * const ptr = &(meta->bitmap[id >> 6u]); + const u64 bit = 1lu << (id & 0x3fu); + if ((*ptr) & bit) + return; + + (*ptr) |= bit; + + // min + if (id < wormmeta_bitmin_load(meta)) + wormmeta_bitmin_store(meta, id); + + // max + const u32 oldmax = wormmeta_bitmax_load(meta); + if (oldmax == WH_FO || id > oldmax) + wormmeta_bitmax_store(meta, id); +} + +// find the lowest bit > id0 +// return WH_FO if not found + static inline u32 +wormmeta_bm_gt(const struct wormmeta * const meta, const u32 id0) +{ + u32 ix = id0 >> 6; + u64 bits = meta->bitmap[ix] & ~((1lu << (id0 & 0x3fu)) - 1lu); + if (bits) + return (ix << 6) + (u32)__builtin_ctzl(bits); + + while (++ix < WH_BMNR) { + bits = meta->bitmap[ix]; + if (bits) + return (ix << 6) + (u32)__builtin_ctzl(bits); + } + + return WH_FO; +} + +// find the highest bit that is lower than the id0 +// return WH_FO if not found + static inline u32 +wormmeta_bm_lt(const struct wormmeta * const meta, const u32 id0) +{ + u32 ix = id0 >> 6; + u64 bits = meta->bitmap[ix] & ((1lu << (id0 & 0x3fu)) - 1lu); + if (bits) + return (ix << 6) + 63u - (u32)__builtin_clzl(bits); + + while (ix--) { + bits = meta->bitmap[ix]; + if (bits) + return (ix << 6) + 63u - (u32)__builtin_clzl(bits); + } + + return WH_FO; +} + +// meta must be a full node + static inline void +wormmeta_bm_clear(struct wormmeta * const meta, const u32 id) +{ + debug_assert(wormmeta_bitmin_load(meta) < wormmeta_bitmax_load(meta)); + meta->bitmap[id >> 6u] &= (~(1lu << (id & 0x3fu))); + + // min + if (id == wormmeta_bitmin_load(meta)) + wormmeta_bitmin_store(meta, wormmeta_bm_gt(meta, id)); + + // max + if (id == wormmeta_bitmax_load(meta)) + wormmeta_bitmax_store(meta, wormmeta_bm_lt(meta, id)); +} +// }}} meta-bitmap + +// key/prefix {{{ + static inline u16 +wormhole_pkey(const u32 hash32) +{ + const u16 pkey0 = ((u16)hash32) ^ ((u16)(hash32 >> 16)); + return pkey0 ? pkey0 : 1; +} + + static inline u32 +wormhole_bswap(const u32 hashlo) +{ + return __builtin_bswap32(hashlo); +} + + static inline bool +wormhole_key_meta_match(const struct kv * const key, const struct wormmeta * const meta) +{ + return (key->klen == wormmeta_klen_load(meta)) + && (!memcmp(key->kv, wormmeta_keyref_load(meta)->kv, key->klen)); +} + +// called by get_kref_slot + static inline bool +wormhole_kref_meta_match(const struct kref * const kref, + const struct wormmeta * const meta) +{ + return (kref->len == wormmeta_klen_load(meta)) + && (!memcmp(kref->ptr, wormmeta_keyref_load(meta)->kv, kref->len)); +} + +// called from meta_down ... get_kref1_slot +// will access rmost, prefetching is effective here + static inline bool +wormhole_kref1_meta_match(const struct kref * const kref, + const struct wormmeta * const meta, const u8 cid) +{ + const u8 * const keybuf = wormmeta_keyref_load(meta)->kv; + const u32 plen = kref->len; + return ((plen + 1) == wormmeta_klen_load(meta)) + && (!memcmp(kref->ptr, keybuf, plen)) + && (keybuf[plen] == cid); +} + +// warning: be careful with buffer overflow + static inline void +wormhole_prefix(struct kv * const pfx, const u32 klen) +{ + pfx->klen = klen; + kv_update_hash(pfx); +} + +// for split + static inline void +wormhole_prefix_inc1(struct kv * const pfx) +{ + pfx->hashlo = crc32c_u8(pfx->hashlo, pfx->kv[pfx->klen]); + pfx->klen++; +} + +// meta_lcp only + static inline void +wormhole_kref_inc(struct kref * const kref, const u32 len0, + const u32 crc, const u32 inc) +{ + kref->hash32 = crc32c_inc(kref->ptr + len0, inc, crc); + kref->len = len0 + inc; +} + +// meta_lcp only + static inline void +wormhole_kref_inc_123(struct kref * const kref, const u32 len0, + const u32 crc, const u32 inc) +{ + kref->hash32 = crc32c_inc_123(kref->ptr + len0, inc, crc); + kref->len = len0 + inc; +} +// }}} key/prefix + +// alloc {{{ + static inline struct kv * +wormhole_alloc_akey(const size_t klen) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + return malloc(sizeof(struct kv) + klen); +} + + static inline void +wormhole_free_akey(struct kv * const akey) +{ + free(akey); +} + + static inline struct kv * +wormhole_alloc_mkey(const size_t klen) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + return malloc(sizeof(struct kv) + klen); +} + + static inline void +wormhole_free_mkey(struct kv * const mkey) +{ + free(mkey); +} + + static struct wormleaf * +wormleaf_alloc(struct wormhole * const map, struct wormleaf * const prev, + struct wormleaf * const next, struct kv * const anchor) +{ + struct wormleaf * const leaf = slab_alloc_safe(map->slab_leaf); + if (leaf == NULL) + return NULL; + + rwlock_init(&(leaf->leaflock)); + spinlock_init(&(leaf->sortlock)); + + // keep the old version; new version will be assigned by split functions + //leaf->lv = 0; + + leaf->prev = prev; + leaf->next = next; + leaf->anchor = anchor; + + leaf->nr_keys = 0; + leaf->nr_sorted = 0; + + // hs requires zero init. + memset(leaf->hs, 0, sizeof(leaf->hs[0]) * WH_KPN); + return leaf; +} + + static void +wormleaf_free(struct slab * const slab, struct wormleaf * const leaf) +{ + debug_assert(leaf->leaflock.opaque == 0); + wormhole_free_akey(leaf->anchor); + slab_free_safe(slab, leaf); +} + + static struct wormmeta * +wormmeta_alloc(struct wormhmap * const hmap, struct wormleaf * const lrmost, + struct kv * const keyref, const u32 alen, const u32 bit) +{ + debug_assert(alen <= UINT16_MAX); + debug_assert(lrmost && keyref); + + struct wormmeta * const meta = slab_alloc_unsafe(hmap->slab1); + if (meta == NULL) + return NULL; + + wormmeta_init(meta, lrmost, keyref, alen, bit); + return meta; +} + + static inline bool +wormhole_slab_reserve(struct wormhole * const map, const u32 nr) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return false; +#endif + for (u32 i = 0; i < 2; i++) { + if (!(map->hmap2[i].slab1 && map->hmap2[i].slab2)) + continue; + if (!slab_reserve_unsafe(map->hmap2[i].slab1, nr)) + return false; + if (!slab_reserve_unsafe(map->hmap2[i].slab2, nr)) + return false; + } + return true; +} + + static void +wormmeta_keyref_release(struct wormmeta * const meta) +{ + struct kv * const keyref = wormmeta_keyref_load(meta); + debug_assert(keyref->refcnt); + keyref->refcnt--; + if (keyref->refcnt == 0) + wormhole_free_mkey(keyref); +} + + static void +wormmeta_free(struct wormhmap * const hmap, struct wormmeta * const meta) +{ + wormmeta_keyref_release(meta); + slab_free_unsafe(hmap->slab1, meta); +} +// }}} alloc + +// lock {{{ + static void +wormleaf_lock_write(struct wormleaf * const leaf, struct wormref * const ref) +{ + if (!rwlock_trylock_write(&(leaf->leaflock))) { + wormhole_park(ref); + rwlock_lock_write(&(leaf->leaflock)); + wormhole_resume(ref); + } +} + + static void +wormleaf_lock_read(struct wormleaf * const leaf, struct wormref * const ref) +{ + if (!rwlock_trylock_read(&(leaf->leaflock))) { + wormhole_park(ref); + rwlock_lock_read(&(leaf->leaflock)); + wormhole_resume(ref); + } +} + + static void +wormleaf_unlock_write(struct wormleaf * const leaf) +{ + rwlock_unlock_write(&(leaf->leaflock)); +} + + static void +wormleaf_unlock_read(struct wormleaf * const leaf) +{ + rwlock_unlock_read(&(leaf->leaflock)); +} + + static void +wormhmap_lock(struct wormhole * const map, struct wormref * const ref) +{ + if (!rwlock_trylock_write(&(map->metalock))) { + wormhole_park(ref); + rwlock_lock_write(&(map->metalock)); + wormhole_resume(ref); + } +} + + static inline void +wormhmap_unlock(struct wormhole * const map) +{ + rwlock_unlock_write(&(map->metalock)); +} +// }}} lock + +// hmap-version {{{ + static inline struct wormhmap * +wormhmap_switch(struct wormhole * const map, struct wormhmap * const hmap) +{ + return (hmap == map->hmap2) ? (hmap + 1) : (hmap - 1); +} + + static inline struct wormhmap * +wormhmap_load(struct wormhole * const map) +{ + return (struct wormhmap *)atomic_load_explicit(&(map->hmap_ptr), MO_ACQUIRE); +} + + static inline void +wormhmap_store(struct wormhole * const map, struct wormhmap * const hmap) +{ + atomic_store_explicit(&(map->hmap_ptr), (u64)hmap, MO_RELEASE); +} + + static inline u64 +wormhmap_version_load(const struct wormhmap * const hmap) +{ + // no concurrent access + return atomic_load_explicit(&(hmap->hv), MO_ACQUIRE); +} + + static inline void +wormhmap_version_store(struct wormhmap * const hmap, const u64 v) +{ + atomic_store_explicit(&(hmap->hv), v, MO_RELEASE); +} + + static inline u64 +wormleaf_version_load(struct wormleaf * const leaf) +{ + return atomic_load_explicit(&(leaf->lv), MO_CONSUME); +} + + static inline void +wormleaf_version_store(struct wormleaf * const leaf, const u64 v) +{ + atomic_store_explicit(&(leaf->lv), v, MO_RELEASE); +} +// }}} hmap-version + +// co {{{ + static inline void +wormhmap_prefetch_pmap(const struct wormhmap * const hmap, const u32 idx) +{ +#if defined(CORR) + (void)hmap; + (void)idx; +#else + cpu_prefetch0(&(hmap->pmap[idx])); +#endif +} + + static inline struct wormmeta * +wormhmap_get_meta(const struct wormhmap * const hmap, const u32 mid, const u32 i) +{ + struct wormmeta * const meta = hmap->pmap[mid].e[i]; +#if defined(CORR) + cpu_prefetch0(meta); + corr_yield(); +#endif + return meta; +} + + static inline void +wormleaf_prefetch(struct wormleaf * const leaf, const u32 hashlo) +{ + const u32 i = wormhole_pkey(hashlo) / WH_HDIV; +#if defined(CORR) + cpu_prefetch0(leaf); + cpu_prefetch0(&(leaf->hs[i-4])); + cpu_prefetch0(&(leaf->hs[i+4])); + corr_yield(); +#else + cpu_prefetch0(&(leaf->hs[i])); +#endif +} + + static inline bool +wormhole_kref_kv_match(const struct kref * const key, const struct kv * const curr) +{ +#if defined(CORR) + const u8 * const ptr = (typeof(ptr))curr; + cpu_prefetch0(ptr); + cpu_prefetch0(ptr + 64); + if (key->len > 56) { + cpu_prefetch0(ptr + 128); + cpu_prefetch0(ptr + 192); + } + corr_yield(); +#endif + return kref_kv_match(key, curr); +} + + static inline void +wormhole_qsbr_update_pause(struct wormref * const ref, const u64 v) +{ + qsbr_update(&ref->qref, v); +#if defined(CORR) + corr_yield(); +#endif +} +// }}} co + +// }}} helpers + +// hmap {{{ +// hmap is the MetaTrieHT of Wormhole + static bool +wormhmap_init(struct wormhmap * const hmap, struct kv * const pbuf) +{ + const u64 wsize = sizeof(hmap->wmap[0]) * WH_HMAPINIT_SIZE; + const u64 psize = sizeof(hmap->pmap[0]) * WH_HMAPINIT_SIZE; + u64 msize = wsize + psize; + u8 * const mem = pages_alloc_best(msize, true, &msize); + if (mem == NULL) + return false; + + hmap->pmap = (typeof(hmap->pmap))mem; + hmap->wmap = (typeof(hmap->wmap))(mem + psize); + hmap->msize = msize; + hmap->mask = WH_HMAPINIT_SIZE - 1; + wormhmap_version_store(hmap, 0); + hmap->maxplen = 0; + hmap->pbuf = pbuf; + return true; +} + + static inline void +wormhmap_deinit(struct wormhmap * const hmap) +{ + if (hmap->pmap) { + pages_unmap(hmap->pmap, hmap->msize); + hmap->pmap = NULL; + hmap->wmap = NULL; + } +} + + static inline m128 +wormhmap_zero(void) +{ +#if defined(__x86_64__) + return _mm_setzero_si128(); +#elif defined(__aarch64__) + return vdupq_n_u8(0); +#endif +} + + static inline m128 +wormhmap_m128_pkey(const u16 pkey) +{ +#if defined(__x86_64__) + return _mm_set1_epi16((short)pkey); +#elif defined(__aarch64__) + return vreinterpretq_u8_u16(vdupq_n_u16(pkey)); +#endif +} + + static inline u32 +wormhmap_match_mask(const struct wormslot * const s, const m128 skey) +{ +#if defined(__x86_64__) + const m128 sv = _mm_load_si128((const void *)s); + return (u32)_mm_movemask_epi8(_mm_cmpeq_epi16(skey, sv)); +#elif defined(__aarch64__) + const uint16x8_t sv = vld1q_u16((const u16 *)s); // load 16 bytes at s + const uint16x8_t cmp = vceqq_u16(vreinterpretq_u16_u8(skey), sv); // cmpeq => 0xffff or 0x0000 + static const uint16x8_t mbits = {0x3, 0xc, 0x30, 0xc0, 0x300, 0xc00, 0x3000, 0xc000}; + return (u32)vaddvq_u16(vandq_u16(cmp, mbits)); +#endif +} + + static inline bool +wormhmap_match_any(const struct wormslot * const s, const m128 skey) +{ +#if defined(__x86_64__) + return wormhmap_match_mask(s, skey) != 0; +#elif defined(__aarch64__) + const uint16x8_t sv = vld1q_u16((const u16 *)s); // load 16 bytes at s + const uint16x8_t cmp = vceqq_u16(vreinterpretq_u16_u8(skey), sv); // cmpeq => 0xffff or 0x0000 + return vaddvq_u32(vreinterpretq_u32_u16(cmp)) != 0; +#endif +} + +// meta_lcp only + static inline bool +wormhmap_peek(const struct wormhmap * const hmap, const u32 hash32) +{ + const m128 sk = wormhmap_m128_pkey(wormhole_pkey(hash32)); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + return wormhmap_match_any(&(hmap->wmap[midx]), sk) + || wormhmap_match_any(&(hmap->wmap[midy]), sk); +} + + static inline struct wormmeta * +wormhmap_get_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kv * const key) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + if (likely(wormhole_key_meta_match(key, meta))) + return meta; + mask ^= (3u << i2); + } + return NULL; +} + + static struct wormmeta * +wormhmap_get(const struct wormhmap * const hmap, const struct kv * const key) +{ + const u32 hash32 = key->hashlo; + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_slot(hmap, midx, skey, key); + if (r) + return r; + return wormhmap_get_slot(hmap, midy, skey, key); +} + +// for meta_lcp only + static inline struct wormmeta * +wormhmap_get_kref_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kref * const kref) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + if (likely(wormhole_kref_meta_match(kref, meta))) + return meta; + + mask ^= (3u << i2); + } + return NULL; +} + +// for meta_lcp only + static inline struct wormmeta * +wormhmap_get_kref(const struct wormhmap * const hmap, const struct kref * const kref) +{ + const u32 hash32 = kref->hash32; + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_kref_slot(hmap, midx, skey, kref); + if (r) + return r; + return wormhmap_get_kref_slot(hmap, midy, skey, kref); +} + +// for meta_down only + static inline struct wormmeta * +wormhmap_get_kref1_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kref * const kref, const u8 cid) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + //cpu_prefetch0(wormmeta_rmost_load(meta)); // will access + if (likely(wormhole_kref1_meta_match(kref, meta, cid))) + return meta; + + mask ^= (3u << i2); + } + return NULL; +} + +// for meta_down only + static inline struct wormmeta * +wormhmap_get_kref1(const struct wormhmap * const hmap, + const struct kref * const kref, const u8 cid) +{ + const u32 hash32 = crc32c_u8(kref->hash32, cid); + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_kref1_slot(hmap, midx, skey, kref, cid); + if (r) + return r; + return wormhmap_get_kref1_slot(hmap, midy, skey, kref, cid); +} + + static inline u32 +wormhmap_slot_count(const struct wormslot * const slot) +{ + const u32 mask = wormhmap_match_mask(slot, wormhmap_zero()); + return mask ? ((u32)__builtin_ctz(mask) >> 1) : 8; +} + + static inline void +wormhmap_squeeze(const struct wormhmap * const hmap) +{ + struct wormslot * const wmap = hmap->wmap; + struct wormmbkt * const pmap = hmap->pmap; + const u32 mask = hmap->mask; + const u64 nrs64 = ((u64)(hmap->mask)) + 1; // must use u64; u32 can overflow + for (u64 si64 = 0; si64 < nrs64; si64++) { // # of buckets + const u32 si = (u32)si64; + u32 ci = wormhmap_slot_count(&(wmap[si])); + for (u32 ei = ci - 1; ei < WH_BKT_NR; ei--) { + struct wormmeta * const meta = pmap[si].e[ei]; + const u32 sj = wormmeta_hash32_load(meta) & mask; // first hash + if (sj == si) + continue; + + // move + const u32 ej = wormhmap_slot_count(&(wmap[sj])); + if (ej < WH_BKT_NR) { // has space at home location + wmap[sj].t[ej] = wmap[si].t[ei]; + pmap[sj].e[ej] = pmap[si].e[ei]; + const u32 ni = ci - 1; + if (ei < ni) { + wmap[si].t[ei] = wmap[si].t[ni]; + pmap[si].e[ei] = pmap[si].e[ni]; + } + wmap[si].t[ni] = 0; + pmap[si].e[ni] = NULL; + ci--; + } + } + } +} + + static void +wormhmap_expand(struct wormhmap * const hmap) +{ + // sync expand + const u32 mask0 = hmap->mask; + if (mask0 == UINT32_MAX) + debug_die(); + const u32 nr0 = mask0 + 1; + const u32 mask1 = mask0 + nr0; + const u64 nr1 = ((u64)nr0) << 1; // must use u64; u32 can overflow + const u64 wsize = nr1 * sizeof(hmap->wmap[0]); + const u64 psize = nr1 * sizeof(hmap->pmap[0]); + u64 msize = wsize + psize; + u8 * mem = pages_alloc_best(msize, true, &msize); + if (mem == NULL) { + // We are at a very deep call stack from wormhole_put(). + // Gracefully handling the failure requires lots of changes. + // Currently we simply wait for available memory + // TODO: gracefully return with insertion failure + char ts[64]; + time_stamp(ts, 64); + fprintf(stderr, "%s %s sleep-wait for memory allocation %lukB\n", + __func__, ts, msize >> 10); + do { + sleep(1); + mem = pages_alloc_best(msize, true, &msize); + } while (mem == NULL); + time_stamp(ts, 64); + fprintf(stderr, "%s %s memory allocation done\n", __func__, ts); + } + + struct wormhmap hmap1 = *hmap; + hmap1.pmap = (typeof(hmap1.pmap))mem; + hmap1.wmap = (typeof(hmap1.wmap))(mem + psize); + hmap1.msize = msize; + hmap1.mask = mask1; + + const struct wormslot * const wmap0 = hmap->wmap; + const struct wormmbkt * const pmap0 = hmap->pmap; + + for (u32 s = 0; s < nr0; s++) { + const struct wormmbkt * const bkt = &pmap0[s]; + for (u32 i = 0; (i < WH_BKT_NR) && bkt->e[i]; i++) { + const struct wormmeta * const meta = bkt->e[i]; + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 idx0 = hash32 & mask0; + const u32 idx1 = ((idx0 == s) ? hash32 : wormhole_bswap(hash32)) & mask1; + + const u32 n = wormhmap_slot_count(&(hmap1.wmap[idx1])); + debug_assert(n < 8); + hmap1.wmap[idx1].t[n] = wmap0[s].t[i]; + hmap1.pmap[idx1].e[n] = bkt->e[i]; + } + } + pages_unmap(hmap->pmap, hmap->msize); + hmap->pmap = hmap1.pmap; + hmap->wmap = hmap1.wmap; + hmap->msize = hmap1.msize; + hmap->mask = hmap1.mask; + wormhmap_squeeze(hmap); +} + + static bool +wormhmap_cuckoo(struct wormhmap * const hmap, const u32 mid0, + struct wormmeta * const e0, const u16 s0, const u32 depth) +{ + const u32 ii = wormhmap_slot_count(&(hmap->wmap[mid0])); + if (ii < WH_BKT_NR) { + hmap->wmap[mid0].t[ii] = s0; + hmap->pmap[mid0].e[ii] = e0; + return true; + } else if (depth == 0) { + return false; + } + + // depth > 0 + struct wormmbkt * const bkt = &(hmap->pmap[mid0]); + u16 * const sv = &(hmap->wmap[mid0].t[0]); + for (u32 i = 0; i < WH_BKT_NR; i++) { + const struct wormmeta * const meta = bkt->e[i]; + debug_assert(meta); + const u32 hash32 = wormmeta_hash32_load(meta); + + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const u32 midt = (midx != mid0) ? midx : midy; + if (midt != mid0) { // possible + // no penalty if moving someone back to its 1st hash location + const u32 depth1 = (midt == midx) ? depth : (depth - 1); + if (wormhmap_cuckoo(hmap, midt, bkt->e[i], sv[i], depth1)) { + bkt->e[i] = e0; + sv[i] = s0; + return true; + } + } + } + return false; +} + + static void +wormhmap_set(struct wormhmap * const hmap, struct wormmeta * const meta) +{ + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const u16 pkey = wormhole_pkey(hash32); + // insert with cuckoo + if (likely(wormhmap_cuckoo(hmap, midx, meta, pkey, 1))) + return; + if (wormhmap_cuckoo(hmap, midy, meta, pkey, 1)) + return; + if (wormhmap_cuckoo(hmap, midx, meta, pkey, 2)) + return; + + // expand + wormhmap_expand(hmap); + + wormhmap_set(hmap, meta); +} + + static bool +wormhmap_del_slot(struct wormhmap * const hmap, const u32 mid, + const struct wormmeta * const meta, const m128 skey) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + const struct wormmeta * const meta1 = hmap->pmap[mid].e[i2>>1]; + if (likely(meta == meta1)) { + const u32 i = i2 >> 1; + const u32 j = wormhmap_slot_count(&(hmap->wmap[mid])) - 1; + hmap->wmap[mid].t[i] = hmap->wmap[mid].t[j]; + hmap->pmap[mid].e[i] = hmap->pmap[mid].e[j]; + hmap->wmap[mid].t[j] = 0; + hmap->pmap[mid].e[j] = NULL; + return true; + } + mask -= (3u << i2); + } + return false; +} + + static bool +wormhmap_del(struct wormhmap * const hmap, const struct wormmeta * const meta) +{ + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + return wormhmap_del_slot(hmap, midx, meta, skey) + || wormhmap_del_slot(hmap, midy, meta, skey); +} + + static bool +wormhmap_replace_slot(struct wormhmap * const hmap, const u32 mid, + const struct wormmeta * const old, const m128 skey, struct wormmeta * const new) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta ** const pslot = &hmap->pmap[mid].e[i2>>1]; + if (likely(old == *pslot)) { + *pslot = new; + return true; + } + mask -= (3u << i2); + } + return false; +} + + static bool +wormhmap_replace(struct wormhmap * const hmap, const struct wormmeta * const old, struct wormmeta * const new) +{ + const u32 hash32 = wormmeta_hash32_load(old); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + return wormhmap_replace_slot(hmap, midx, old, skey, new) + || wormhmap_replace_slot(hmap, midy, old, skey, new); +} +// }}} hmap + +// create {{{ +// it's unsafe + static bool +wormhole_create_leaf0(struct wormhole * const map) +{ + const bool sr = wormhole_slab_reserve(map, 1); + if (unlikely(!sr)) + return false; + + // create leaf of empty key + struct kv * const anchor = wormhole_alloc_akey(0); + if (anchor == NULL) + return false; + kv_dup2(kv_null(), anchor); + + struct wormleaf * const leaf0 = wormleaf_alloc(map, NULL, NULL, anchor); + if (leaf0 == NULL) { + wormhole_free_akey(anchor); + return false; + } + + struct kv * const mkey = wormhole_alloc_mkey(0); + if (mkey == NULL) { + wormleaf_free(map->slab_leaf, leaf0); + return false; + } + + wormhole_prefix(mkey, 0); + mkey->refcnt = 0; + // create meta of empty key + for (u32 i = 0; i < 2; i++) { + if (map->hmap2[i].slab1) { + struct wormmeta * const m0 = wormmeta_alloc(&map->hmap2[i], leaf0, mkey, 0, WH_FO); + debug_assert(m0); // already reserved enough + wormhmap_set(&(map->hmap2[i]), m0); + } + } + + map->leaf0 = leaf0; + return true; +} + + static struct wormhole * +wormhole_create_internal(const struct kvmap_mm * const mm, const u32 nh) +{ + struct wormhole * const map = yalloc(sizeof(*map)); + if (map == NULL) + return NULL; + memset(map, 0, sizeof(*map)); + // mm + map->mm = mm ? (*mm) : kvmap_mm_dup; + + // pbuf for meta-merge + map->pbuf = yalloc(1lu << 16); // 64kB + if (map->pbuf == NULL) + goto fail; + + // hmap + for (u32 i = 0; i < nh; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (!wormhmap_init(hmap, map->pbuf)) + goto fail; + + hmap->slab1 = slab_create(sizeof(struct wormmeta), WH_SLABMETA_SIZE); + if (hmap->slab1 == NULL) + goto fail; + + hmap->slab2 = slab_create(sizeof(struct wormmeta) + (sizeof(u64) * WH_BMNR), WH_SLABMETA_SIZE); + if (hmap->slab2 == NULL) + goto fail; + } + + // leaf slab + map->slab_leaf = slab_create(sizeof(struct wormleaf), WH_SLABLEAF_SIZE); + if (map->slab_leaf == NULL) + goto fail; + + // qsbr + map->qsbr = qsbr_create(); + if (map->qsbr == NULL) + goto fail; + + // leaf0 + if (!wormhole_create_leaf0(map)) + goto fail; + + rwlock_init(&(map->metalock)); + wormhmap_store(map, &map->hmap2[0]); + return map; + +fail: + if (map->qsbr) + qsbr_destroy(map->qsbr); + + if (map->slab_leaf) + slab_destroy(map->slab_leaf); + + for (u32 i = 0; i < nh; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (hmap->slab1) + slab_destroy(hmap->slab1); + if (hmap->slab2) + slab_destroy(hmap->slab2); + wormhmap_deinit(hmap); + } + + if (map->pbuf) + free(map->pbuf); + + free(map); + return NULL; +} + + struct wormhole * +wormhole_create(const struct kvmap_mm * const mm) +{ + return wormhole_create_internal(mm, 2); +} + + struct wormhole * +whunsafe_create(const struct kvmap_mm * const mm) +{ + return wormhole_create_internal(mm, 1); +} +// }}} create + +// jump {{{ + +// lcp {{{ +// search in the hash table for the Longest Prefix Match of the search key +// The corresponding wormmeta node is returned and the LPM is recorded in kref + static struct wormmeta * +wormhole_meta_lcp(const struct wormhmap * const hmap, struct kref * const kref, const u32 klen) +{ + // invariant: lo <= lcp < (lo + gd) + // ending condition: gd == 1 + u32 gd = (hmap->maxplen < klen ? hmap->maxplen : klen) + 1u; + u32 lo = 0; + u32 loh = KV_CRC32C_SEED; + +#define META_LCP_GAP_1 ((7u)) + while (META_LCP_GAP_1 < gd) { + const u32 inc = gd >> 3 << 2; // x4 + const u32 hash32 = crc32c_inc_x4(kref->ptr + lo, inc, loh); + if (wormhmap_peek(hmap, hash32)) { + loh = hash32; + lo += inc; + gd -= inc; + } else { + gd = inc; + } + } + + while (1 < gd) { + const u32 inc = gd >> 1; + const u32 hash32 = crc32c_inc_123(kref->ptr + lo, inc, loh); + if (wormhmap_peek(hmap, hash32)) { + loh = hash32; + lo += inc; + gd -= inc; + } else { + gd = inc; + } + } +#undef META_LCP_GAP_1 + + kref->hash32 = loh; + kref->len = lo; + struct wormmeta * ret = wormhmap_get_kref(hmap, kref); + if (likely(ret != NULL)) + return ret; + + gd = lo; + lo = 0; + loh = KV_CRC32C_SEED; + +#define META_LCP_GAP_2 ((5u)) + while (META_LCP_GAP_2 < gd) { + const u32 inc = (gd * 3) >> 2; + wormhole_kref_inc(kref, lo, loh, inc); + struct wormmeta * const tmp = wormhmap_get_kref(hmap, kref); + if (tmp) { + loh = kref->hash32; + lo += inc; + gd -= inc; + ret = tmp; + if (wormmeta_bm_test(tmp, kref->ptr[lo])) { + loh = crc32c_u8(loh, kref->ptr[lo]); + lo++; + gd--; + ret = NULL; + } else { + gd = 1; + break; + } + } else { + gd = inc; + } + } + + while (1 < gd) { + const u32 inc = (gd * 3) >> 2; + wormhole_kref_inc_123(kref, lo, loh, inc); + struct wormmeta * const tmp = wormhmap_get_kref(hmap, kref); + if (tmp) { + loh = kref->hash32; + lo += inc; + gd -= inc; + ret = tmp; + if (wormmeta_bm_test(tmp, kref->ptr[lo])) { + loh = crc32c_u8(loh, kref->ptr[lo]); + lo++; + gd--; + ret = NULL; + } else { + break; + } + } else { + gd = inc; + } + } +#undef META_LCP_GAP_2 + + if (kref->len != lo) { + kref->hash32 = loh; + kref->len = lo; + } + if (ret == NULL) + ret = wormhmap_get_kref(hmap, kref); + debug_assert(ret); + return ret; +} +// }}} lcp + +// down {{{ + static struct wormleaf * +wormhole_meta_down(const struct wormhmap * const hmap, const struct kref * const lcp, + const struct wormmeta * const meta, const u32 klen) +{ + if (likely(lcp->len < klen)) { // partial match + const u32 id0 = lcp->ptr[lcp->len]; + if (wormmeta_bitmin_load(meta) > id0) { // no left, don't care about right. + return wormmeta_lpath_load(meta); + } else if (wormmeta_bitmax_load(meta) < id0) { // has left sibling but no right sibling + return wormmeta_rmost_load(meta); + } else { // has both (expensive) + return wormmeta_rmost_load(wormhmap_get_kref1(hmap, lcp, (u8)wormmeta_bm_lt(meta, id0))); + } + } else { // lcp->len == klen + return wormmeta_lpath_load(meta); + } +} +// }}} down + +// jump-rw {{{ + static struct wormleaf * +wormhole_jump_leaf(const struct wormhmap * const hmap, const struct kref * const key) +{ + struct kref kref = {.ptr = key->ptr}; + debug_assert(kv_crc32c(key->ptr, key->len) == key->hash32); + + const struct wormmeta * const meta = wormhole_meta_lcp(hmap, &kref, key->len); + return wormhole_meta_down(hmap, &kref, meta, key->len); +} + + static struct wormleaf * +wormhole_jump_leaf_read(struct wormref * const ref, const struct kref * const key) +{ + struct wormhole * const map = ref->map; +#pragma nounroll + do { + const struct wormhmap * const hmap = wormhmap_load(map); + const u64 v = wormhmap_version_load(hmap); + qsbr_update(&ref->qref, v); + struct wormleaf * const leaf = wormhole_jump_leaf(hmap, key); + wormleaf_prefetch(leaf, key->hash32); +#pragma nounroll + do { + if (rwlock_trylock_read_nr(&(leaf->leaflock), 64)) { + if (wormleaf_version_load(leaf) <= v) + return leaf; + wormleaf_unlock_read(leaf); + break; + } + // v1 is loaded before lv; if lv <= v, can update v1 without redo jump + const u64 v1 = wormhmap_version_load(wormhmap_load(map)); + if (wormleaf_version_load(leaf) > v) + break; + wormhole_qsbr_update_pause(ref, v1); + } while (true); + } while (true); +} + + static struct wormleaf * +wormhole_jump_leaf_write(struct wormref * const ref, const struct kref * const key) +{ + struct wormhole * const map = ref->map; +#pragma nounroll + do { + const struct wormhmap * const hmap = wormhmap_load(map); + const u64 v = wormhmap_version_load(hmap); + qsbr_update(&ref->qref, v); + struct wormleaf * const leaf = wormhole_jump_leaf(hmap, key); + wormleaf_prefetch(leaf, key->hash32); +#pragma nounroll + do { + if (rwlock_trylock_write_nr(&(leaf->leaflock), 64)) { + if (wormleaf_version_load(leaf) <= v) + return leaf; + wormleaf_unlock_write(leaf); + break; + } + // v1 is loaded before lv; if lv <= v, can update v1 without redo jump + const u64 v1 = wormhmap_version_load(wormhmap_load(map)); + if (wormleaf_version_load(leaf) > v) + break; + wormhole_qsbr_update_pause(ref, v1); + } while (true); + } while (true); +} +// }}} jump-rw + +// }}} jump + +// leaf-read {{{ + static inline struct kv * +wormleaf_kv_at_ih(const struct wormleaf * const leaf, const u32 ih) +{ + return u64_to_ptr(leaf->hs[ih].e3); +} + + static inline struct kv * +wormleaf_kv_at_is(const struct wormleaf * const leaf, const u32 is) +{ + return u64_to_ptr(leaf->hs[leaf->ss[is]].e3); +} + + static inline void +wormleaf_prefetch_ss(const struct wormleaf * const leaf) +{ + for (u32 i = 0; i < WH_KPN; i+=64) + cpu_prefetch0(&leaf->ss[i]); +} + +// leaf must have been sorted +// return the key at [i] as if k1 has been inserted into leaf; i <= leaf->nr_sorted + static const struct kv * +wormleaf_kv_at_is1(const struct wormleaf * const leaf, const u32 i, const u32 is1, const struct kv * const k1) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + debug_assert(is1 <= leaf->nr_sorted); + if (i < is1) + return wormleaf_kv_at_is(leaf, i); + else if (i > is1) + return wormleaf_kv_at_is(leaf, i-1); + else // i == is1 + return k1; +} + + + +// fast point-lookup +// returns WH_KPN if not found + static u32 +wormleaf_match_hs(const struct wormleaf * const leaf, const struct kref * const key) +{ + const u16 pkey = wormhole_pkey(key->hash32); + const u32 i0 = pkey / WH_HDIV; + const struct entry13 * const hs = leaf->hs; + + if (hs[i0].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i0].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i0; + } + if (hs[i0].e1 == 0) + return WH_KPN; + + // search left + u32 i = i0 - 1; + while (i < WH_KPN) { + if (hs[i].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i; + } else if (hs[i].e1 < pkey) { + break; + } + i--; + } + + // search right + i = i0 + 1; + while (i < WH_KPN) { + if (hs[i].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i; + } else if ((hs[i].e1 > pkey) || (hs[i].e1 == 0)) { + break; + } + i++; + } + + + // not found + return WH_KPN; +} + +// search for an existing entry in hs + static u32 +wormleaf_search_ih(const struct wormleaf * const leaf, const struct entry13 e) +{ + const u16 pkey = e.e1; + const u32 i0 = pkey / WH_HDIV; + const struct entry13 * const hs = leaf->hs; + const struct entry13 e0 = hs[i0]; + + if (e0.v64 == e.v64) + return i0; + + if (e0.e1 == 0) + return WH_KPN; + + // search left + u32 i = i0 - 1; + while (i < WH_KPN) { + const struct entry13 ei = hs[i]; + if (ei.v64 == e.v64) { + return i; + } else if (ei.e1 < pkey) { + break; + } + i--; + } + + // search right + i = i0 + 1; + while (i < WH_KPN) { + const struct entry13 ei = hs[i]; + if (ei.v64 == e.v64) { + return i; + } else if ((ei.e1 > pkey) || (ei.e1 == 0)) { + break; + } + i++; + } + + // not found + return WH_KPN; +} + +// search for an existing entry in ss + static u32 +wormleaf_search_is(const struct wormleaf * const leaf, const u8 ih) +{ +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 i1 = _mm256_set1_epi8((char)ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const u32 mask = (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(sv, i1)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#else // SSE4.2 + const m128 i1 = _mm_set1_epi8((char)ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const u32 mask = (u32)_mm_movemask_epi8(_mm_cmpeq_epi8(sv, i1)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#endif // __AVX2__ +#elif defined(__aarch64__) + static const m128 vtbl = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; + static const uint16x8_t mbits = {0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080}; + const m128 i1 = vdupq_n_u8(ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 cmp = vceqq_u8(vld1q_u8(leaf->ss+i), i1); // cmpeq => 0xff or 0x00 + const m128 cmp1 = vqtbl1q_u8(cmp, vtbl); // reorder + const u32 mask = (u32)vaddvq_u16(vandq_u8(vreinterpretq_u16_u8(cmp1), mbits)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#endif // __x86_64__ + debug_die(); +} + +// assumes there in no duplicated keys +// search the first key that is >= the given key +// return 0 .. nr_sorted + static u32 +wormleaf_search_ss(const struct wormleaf * const leaf, const struct kref * const key) +{ + u32 lo = 0; + u32 hi = leaf->nr_sorted; + while ((lo + 2) < hi) { + const u32 i = (lo + hi) >> 1; + const struct kv * const curr = wormleaf_kv_at_is(leaf, i); + cpu_prefetch0(curr); + cpu_prefetch0(leaf->hs + leaf->ss[(lo + i) >> 1]); + cpu_prefetch0(leaf->hs + leaf->ss[(i + 1 + hi) >> 1]); + const int cmp = kref_kv_compare(key, curr); + debug_assert(cmp != 0); + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + + while (lo < hi) { + const u32 i = (lo + hi) >> 1; + const struct kv * const curr = wormleaf_kv_at_is(leaf, i); + const int cmp = kref_kv_compare(key, curr); + debug_assert(cmp != 0); + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + return lo; +} + + static u32 +wormleaf_seek(const struct wormleaf * const leaf, const struct kref * const key) +{ + debug_assert(leaf->nr_sorted == leaf->nr_keys); + wormleaf_prefetch_ss(leaf); // effective for both hit and miss + const u32 ih = wormleaf_match_hs(leaf, key); + if (ih < WH_KPN) { // hit + return wormleaf_search_is(leaf, (u8)ih); + } else { // miss, binary search for gt + return wormleaf_search_ss(leaf, key); + } +} + +// same to search_sorted but the target is very likely beyond the end + static u32 +wormleaf_seek_end(const struct wormleaf * const leaf, const struct kref * const key) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + if (leaf->nr_sorted) { + const int cmp = kref_kv_compare(key, wormleaf_kv_at_is(leaf, leaf->nr_sorted-1)); + if (cmp > 0) + return leaf->nr_sorted; + else if (cmp == 0) + return leaf->nr_sorted - 1; + else + return wormleaf_seek(leaf, key); + } else { + return 0; + } +} +// }}} leaf-read + +// leaf-write {{{ + static void +wormleaf_sort_m2(struct wormleaf * const leaf, const u32 n1, const u32 n2) +{ + if (n1 == 0 || n2 == 0) + return; // no need to sort + + u8 * const ss = leaf->ss; + u8 et[WH_KPN/2]; // min(n1,n2) < KPN/2 + if (n1 <= n2) { // merge left + memcpy(et, &(ss[0]), sizeof(ss[0]) * n1); + u8 * eo = ss; + u8 * e1 = et; // size == n1 + u8 * e2 = &(ss[n1]); // size == n2 + const u8 * const z1 = e1 + n1; + const u8 * const z2 = e2 + n2; + while ((e1 < z1) && (e2 < z2)) { + const int cmp = kv_compare(wormleaf_kv_at_ih(leaf, *e1), wormleaf_kv_at_ih(leaf, *e2)); + if (cmp < 0) + *(eo++) = *(e1++); + else if (cmp > 0) + *(eo++) = *(e2++); + else + debug_die(); + + if (eo == e2) + break; // finish early + } + if (eo < e2) + memcpy(eo, e1, sizeof(*eo) * (size_t)(e2 - eo)); + } else { + memcpy(et, &(ss[n1]), sizeof(ss[0]) * n2); + u8 * eo = &(ss[n1 + n2 - 1]); // merge backwards + u8 * e1 = &(ss[n1 - 1]); // size == n1 + u8 * e2 = &(et[n2 - 1]); // size == n2 + const u8 * const z1 = e1 - n1; + const u8 * const z2 = e2 - n2; + while ((e1 > z1) && (e2 > z2)) { + const int cmp = kv_compare(wormleaf_kv_at_ih(leaf, *e1), wormleaf_kv_at_ih(leaf, *e2)); + if (cmp < 0) + *(eo--) = *(e2--); + else if (cmp > 0) + *(eo--) = *(e1--); + else + debug_die(); + + if (eo == e1) + break; + } + if (eo > e1) + memcpy(e1 + 1, et, sizeof(*eo) * (size_t)(eo - e1)); + } +} + +#if defined(__linux__) + static int +wormleaf_ss_cmp(const void * const p1, const void * const p2, void * priv) +{ + const struct kv * const k1 = wormleaf_kv_at_ih(priv, *(const u8 *)p1); + const struct kv * const k2 = wormleaf_kv_at_ih(priv, *(const u8 *)p2); + return kv_compare(k1, k2); +} +#else // (FreeBSD and APPLE only) + static int +wormleaf_ss_cmp(void * priv, const void * const p1, const void * const p2) +{ + const struct kv * const k1 = wormleaf_kv_at_ih(priv, *(const u8 *)p1); + const struct kv * const k2 = wormleaf_kv_at_ih(priv, *(const u8 *)p2); + return kv_compare(k1, k2); +} +#endif // __linux__ + + static inline void +wormleaf_sort_range(struct wormleaf * const leaf, const u32 i0, const u32 nr) +{ +#if defined(__linux__) + qsort_r(&(leaf->ss[i0]), nr, sizeof(leaf->ss[0]), wormleaf_ss_cmp, leaf); +#else // (FreeBSD and APPLE only) + qsort_r(&(leaf->ss[i0]), nr, sizeof(leaf->ss[0]), leaf, wormleaf_ss_cmp); +#endif // __linux__ +} + +// make sure all keys are sorted in a leaf node + static void +wormleaf_sync_sorted(struct wormleaf * const leaf) +{ + const u32 s = leaf->nr_sorted; + const u32 n = leaf->nr_keys; + if (s == n) + return; + + wormleaf_sort_range(leaf, s, n - s); + // merge-sort inplace + wormleaf_sort_m2(leaf, s, n - s); + leaf->nr_sorted = n; +} + +// shift a sequence of entries on hs and update the corresponding ss values + static void +wormleaf_shift_inc(struct wormleaf * const leaf, const u32 to, const u32 from, const u32 nr) +{ + debug_assert(to == (from+1)); + struct entry13 * const hs = leaf->hs; + memmove(&(hs[to]), &(hs[from]), sizeof(hs[0]) * nr); + +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 ones = _mm256_set1_epi8(1); + const m256 addx = _mm256_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m256 cmpx = _mm256_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const m256 add1 = _mm256_and_si256(_mm256_cmpgt_epi8(_mm256_add_epi8(sv, addx), cmpx), ones); + _mm256_store_si256((m256 *)(leaf->ss+i), _mm256_add_epi8(sv, add1)); + } +#else // SSE4.2 + const m128 ones = _mm_set1_epi8(1); + const m128 addx = _mm_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m128 cmpx = _mm_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const m128 add1 = _mm_and_si128(_mm_cmpgt_epi8(_mm_add_epi8(sv, addx), cmpx), ones); + _mm_store_si128((m128 *)(leaf->ss+i), _mm_add_epi8(sv, add1)); + } +#endif // __AVX2__ +#elif defined(__aarch64__) // __x86_64__ + // aarch64 + const m128 subx = vdupq_n_u8((u8)from); + const m128 cmpx = vdupq_n_u8((u8)nr); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = vld1q_u8(leaf->ss+i); + const m128 add1 = vshrq_n_u8(vcltq_u8(vsubq_u8(sv, subx), cmpx), 7); + vst1q_u8(leaf->ss+i, vaddq_u8(sv, add1)); + } +#endif // __x86_64__ +} + + static void +wormleaf_shift_dec(struct wormleaf * const leaf, const u32 to, const u32 from, const u32 nr) +{ + debug_assert(to == (from-1)); + struct entry13 * const hs = leaf->hs; + memmove(&(hs[to]), &(hs[from]), sizeof(hs[0]) * nr); + +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 ones = _mm256_set1_epi8(1); + const m256 addx = _mm256_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m256 cmpx = _mm256_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const m256 add1 = _mm256_and_si256(_mm256_cmpgt_epi8(_mm256_add_epi8(sv, addx), cmpx), ones); + _mm256_store_si256((m256 *)(leaf->ss+i), _mm256_sub_epi8(sv, add1)); + } +#else // SSE4.2 + const m128 ones = _mm_set1_epi8(1); + const m128 addx = _mm_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m128 cmpx = _mm_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += 16) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const m128 add1 = _mm_and_si128(_mm_cmpgt_epi8(_mm_add_epi8(sv, addx), cmpx), ones); + _mm_store_si128((m128 *)(leaf->ss+i), _mm_sub_epi8(sv, add1)); + } +#endif // __AVX2__ +#elif defined(__aarch64__) // __x86_64__ + // aarch64 + const m128 subx = vdupq_n_u8((u8)from); + const m128 cmpx = vdupq_n_u8((u8)nr); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = vld1q_u8(leaf->ss+i); + const m128 add1 = vshrq_n_u8(vcltq_u8(vsubq_u8(sv, subx), cmpx), 7); + vst1q_u8(leaf->ss+i, vsubq_u8(sv, add1)); + } +#endif // __x86_64__ +} + +// insert hs and also shift ss + static u32 +wormleaf_insert_hs(struct wormleaf * const leaf, const struct entry13 e) +{ + struct entry13 * const hs = leaf->hs; + const u16 pkey = e.e1; + const u32 i0 = pkey / WH_HDIV; + if (hs[i0].e1 == 0) { // insert + hs[i0] = e; + return i0; + } + + // find left-most insertion point + u32 i = i0; + while (i && hs[i-1].e1 && (hs[i-1].e1 >= pkey)) + i--; + while ((i < WH_KPN) && hs[i].e1 && (hs[i].e1 < pkey)) // stop at >= or empty + i++; + const u32 il = --i; // i in [0, KPN] + + // find left empty slot + if (i > (i0 - 1)) + i = i0 - 1; + while ((i < WH_KPN) && hs[i].e1) + i--; + const u32 el = i; // el < i0 or el is invalid (>= KPN) + + // find right-most insertion point. + i = il + 1; + while ((i < WH_KPN) && hs[i].e1 && (hs[i].e1 == pkey)) + i++; + const u32 ir = i; // ir >= il, in [0, KPN] + + // find right empty slot + if (i < (i0 + 1)) + i = i0 + 1; + while ((i < WH_KPN) && hs[i].e1) + i++; + const u32 er = i; // er > i0 or el is invalid (>= KPN) + + // el <= il < ir <= er (if < WH_KPN) + const u32 dl = (el < WH_KPN) ? (il - el) : WH_KPN; + const u32 dr = (er < WH_KPN) ? (er - ir) : WH_KPN; + if (dl <= dr) { // push left + debug_assert(dl < WH_KPN); + if (dl) + wormleaf_shift_dec(leaf, el, el+1, dl); + hs[il] = e; + return il; + } else { + debug_assert(dr < WH_KPN); + if (dr) + wormleaf_shift_inc(leaf, ir+1, ir, dr); + hs[ir] = e; + return ir; + } +} + + static void +wormleaf_insert_e13(struct wormleaf * const leaf, const struct entry13 e) +{ + // insert to hs and fix all existing is + const u32 ih = wormleaf_insert_hs(leaf, e); + debug_assert(ih < WH_KPN); + // append the new is + leaf->ss[leaf->nr_keys] = (u8)ih; + // fix nr + leaf->nr_keys++; +} + + static void +wormleaf_insert(struct wormleaf * const leaf, const struct kv * const new) +{ + debug_assert(new->hash == kv_crc32c_extend(kv_crc32c(new->kv, new->klen))); + debug_assert(leaf->nr_keys < WH_KPN); + + // insert + const struct entry13 e = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + const u32 nr0 = leaf->nr_keys; + wormleaf_insert_e13(leaf, e); + + // optimize for seq insertion + if (nr0 == leaf->nr_sorted) { + if (nr0) { + const struct kv * const kvn = wormleaf_kv_at_is(leaf, nr0 - 1); + if (kv_compare(new, kvn) > 0) + leaf->nr_sorted = nr0 + 1; + } else { + leaf->nr_sorted = 1; + } + } +} + + static void +wormleaf_pull_ih(struct wormleaf * const leaf, const u32 ih) +{ + struct entry13 * const hs = leaf->hs; + // try left + u32 i = ih - 1; + while ((i < WH_KPN) && hs[i].e1 && ((hs[i].e1 / WH_HDIV) > i)) + i--; + + if ((++i) < ih) { + wormleaf_shift_inc(leaf, i+1, i, ih - i); + leaf->hs[i].v64 = 0; + return; + } + + // try right + i = ih + 1; + while ((i < WH_KPN) && hs[i].e1 && ((hs[i].e1 / WH_HDIV) < i)) + i++; + + if ((--i) > ih) { + wormleaf_shift_dec(leaf, ih, ih+1, i - ih); + hs[i].v64 = 0; + } + // hs[ih] may still be 0 +} + +// internal only + static struct kv * +wormleaf_remove(struct wormleaf * const leaf, const u32 ih, const u32 is) +{ + // ss + leaf->ss[is] = leaf->ss[leaf->nr_keys - 1]; + if (leaf->nr_sorted > is) + leaf->nr_sorted = is; + + // ret + struct kv * const victim = wormleaf_kv_at_ih(leaf, ih); + // hs + leaf->hs[ih].v64 = 0; + leaf->nr_keys--; + // use magnet + wormleaf_pull_ih(leaf, ih); + return victim; +} + +// remove key from leaf but do not call free + static struct kv * +wormleaf_remove_ih(struct wormleaf * const leaf, const u32 ih) +{ + // remove from ss + const u32 is = wormleaf_search_is(leaf, (u8)ih); + debug_assert(is < leaf->nr_keys); + return wormleaf_remove(leaf, ih, is); +} + + static struct kv * +wormleaf_remove_is(struct wormleaf * const leaf, const u32 is) +{ + return wormleaf_remove(leaf, leaf->ss[is], is); +} + +// for delr (delete-range) + static void +wormleaf_delete_range(struct wormhole * const map, struct wormleaf * const leaf, + const u32 i0, const u32 end) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + for (u32 i = end; i > i0; i--) { + const u32 ir = i - 1; + struct kv * const victim = wormleaf_remove_is(leaf, ir); + map->mm.free(victim, map->mm.priv); + } +} + +// return the old kv; the caller should free the old kv + static struct kv * +wormleaf_update(struct wormleaf * const leaf, const u32 ih, const struct kv * const new) +{ + debug_assert(new->hash == kv_crc32c_extend(kv_crc32c(new->kv, new->klen))); + // search entry in ss (is) + struct kv * const old = wormleaf_kv_at_ih(leaf, ih); + debug_assert(old); + + entry13_update_e3(&leaf->hs[ih], (u64)new); + return old; +} +// }}} leaf-write + +// leaf-split {{{ +// It only works correctly in cut_search +// quickly tell if a cut between k1 and k2 can achieve a specific anchor-key length + static bool +wormhole_split_cut_alen_check(const u32 alen, const struct kv * const k1, const struct kv * const k2) +{ + debug_assert(k2->klen >= alen); + return (k1->klen < alen) || (k1->kv[alen - 1] != k2->kv[alen - 1]); +} + +// return the number of keys that should go to leaf1 +// assert(r > 0 && r <= nr_keys) +// (1) r < is1, anchor key is ss[r-1]:ss[r] +// (2) r == is1: anchor key is ss[r-1]:new +// (3) r == is1+1: anchor key is new:ss[r-1] (ss[r-1] is the ss[r] on the logically sorted array) +// (4) r > is1+1: anchor key is ss[r-2]:ss[r-1] (ss[r-2] is the [r-1] on the logically sorted array) +// edge cases: +// (case 2) is1 == nr_keys: r = nr_keys; ss[r-1]:new +// (case 3) is1 == 0, r == 1; new:ss[0] +// return 1..WH_KPN + static u32 +wormhole_split_cut_search1(struct wormleaf * const leaf, u32 l, u32 h, const u32 is1, const struct kv * const new) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + debug_assert(leaf->nr_keys); + debug_assert(l < h && h <= leaf->nr_sorted); + + const struct kv * const kl0 = wormleaf_kv_at_is1(leaf, l, is1, new); + const struct kv * const kh0 = wormleaf_kv_at_is1(leaf, h, is1, new); + const u32 alen = kv_key_lcp(kl0, kh0) + 1; + if (unlikely(alen > UINT16_MAX)) + return WH_KPN2; + + const u32 target = leaf->next ? WH_MID : WH_KPN_MRG; + while ((l + 1) < h) { + const u32 m = (l + h + 1) >> 1; + if (m <= target) { // try right + const struct kv * const k1 = wormleaf_kv_at_is1(leaf, m, is1, new); + const struct kv * const k2 = wormleaf_kv_at_is1(leaf, h, is1, new); + if (wormhole_split_cut_alen_check(alen, k1, k2)) + l = m; + else + h = m; + } else { // try left + const struct kv * const k1 = wormleaf_kv_at_is1(leaf, l, is1, new); + const struct kv * const k2 = wormleaf_kv_at_is1(leaf, m, is1, new); + if (wormhole_split_cut_alen_check(alen, k1, k2)) + h = m; + else + l = m; + } + } + return h; +} + + static void +wormhole_split_leaf_move1(struct wormleaf * const leaf1, struct wormleaf * const leaf2, + const u32 cut, const u32 is1, const struct kv * const new) +{ + const u32 nr_keys = leaf1->nr_keys; + const struct entry13 e1 = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + struct entry13 es[WH_KPN]; + + if (cut <= is1) { // e1 goes to leaf2 + // leaf2 + for (u32 i = cut; i < is1; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + wormleaf_insert_e13(leaf2, e1); + + for (u32 i = is1; i < nr_keys; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + // leaf1 + for (u32 i = 0; i < cut; i++) + es[i] = leaf1->hs[leaf1->ss[i]]; + + } else { // e1 goes to leaf1 + // leaf2 + for (u32 i = cut - 1; i < nr_keys; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + // leaf1 + for (u32 i = 0; i < is1; i++) + es[i] = leaf1->hs[leaf1->ss[i]]; + + es[is1] = e1; + + for (u32 i = is1 + 1; i < cut; i++) + es[i] = leaf1->hs[leaf1->ss[i - 1]]; + } + + leaf2->nr_sorted = leaf2->nr_keys; + + memset(leaf1->hs, 0, sizeof(leaf1->hs[0]) * WH_KPN); + leaf1->nr_keys = 0; + for (u32 i = 0; i < cut; i++) + wormleaf_insert_e13(leaf1, es[i]); + leaf1->nr_sorted = cut; + debug_assert((leaf1->nr_sorted + leaf2->nr_sorted) == (nr_keys + 1)); +} + +// create an anchor for leaf-split + static struct kv * +wormhole_split_alloc_anchor(const struct kv * const key1, const struct kv * const key2) +{ + const u32 alen = kv_key_lcp(key1, key2) + 1; + debug_assert(alen <= key2->klen); + + struct kv * const anchor = wormhole_alloc_akey(alen); + if (anchor) + kv_refill(anchor, key2->kv, alen, NULL, 0); + return anchor; +} + +// leaf1 is locked +// split leaf1 into leaf1+leaf2; insert new into leaf1 or leaf2, return leaf2 + static struct wormleaf * +wormhole_split_leaf(struct wormhole * const map, struct wormleaf * const leaf1, struct kv * const new) +{ + wormleaf_sync_sorted(leaf1); + struct kref kref_new; + kref_ref_kv(&kref_new, new); + const u32 is1 = wormleaf_search_ss(leaf1, &kref_new); // new should be inserted at [is1] + const u32 cut = wormhole_split_cut_search1(leaf1, 0, leaf1->nr_keys, is1, new); + if (unlikely(cut == WH_KPN2)) + return NULL; + + // anchor of leaf2 + debug_assert(cut && (cut <= leaf1->nr_keys)); + const struct kv * const key1 = wormleaf_kv_at_is1(leaf1, cut - 1, is1, new); + const struct kv * const key2 = wormleaf_kv_at_is1(leaf1, cut, is1, new); + struct kv * const anchor2 = wormhole_split_alloc_anchor(key1, key2); + if (unlikely(anchor2 == NULL)) // anchor alloc failed + return NULL; + + // create leaf2 with anchor2 + struct wormleaf * const leaf2 = wormleaf_alloc(map, leaf1, leaf1->next, anchor2); + if (unlikely(leaf2 == NULL)) { + wormhole_free_akey(anchor2); + return NULL; + } + + // split_hmap will unlock the leaf nodes; must move now + wormhole_split_leaf_move1(leaf1, leaf2, cut, is1, new); + // leaf1 and leaf2 should be sorted after split + debug_assert(leaf1->nr_keys == leaf1->nr_sorted); + debug_assert(leaf2->nr_keys == leaf2->nr_sorted); + + return leaf2; +} +// }}} leaf-split + +// leaf-merge {{{ +// MERGE is the only operation that deletes a leaf node (leaf2). +// It ALWAYS merges the right node into the left node even if the left is empty. +// This requires both of their writer locks to be acquired. +// This allows iterators to safely probe the next node (but not backwards). +// In other words, if either the reader or the writer lock of node X has been acquired: +// X->next (the pointer) cannot be changed by any other thread. +// X->next cannot be deleted. +// But the content in X->next can still be changed. + static bool +wormleaf_merge(struct wormleaf * const leaf1, struct wormleaf * const leaf2) +{ + debug_assert((leaf1->nr_keys + leaf2->nr_keys) <= WH_KPN); + const bool leaf1_sorted = leaf1->nr_keys == leaf1->nr_sorted; + + for (u32 i = 0; i < leaf2->nr_keys; i++) + wormleaf_insert_e13(leaf1, leaf2->hs[leaf2->ss[i]]); + if (leaf1_sorted) + leaf1->nr_sorted += leaf2->nr_sorted; + return true; +} + +// for undoing insertion under split_meta failure; leaf2 is still local +// remove the new key; merge keys in leaf2 into leaf1; free leaf2 + static void +wormleaf_split_undo(struct wormhole * const map, struct wormleaf * const leaf1, + struct wormleaf * const leaf2, struct kv * const new) +{ + if (new) { + const struct entry13 e = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + const u32 im1 = wormleaf_search_ih(leaf1, e); + if (im1 < WH_KPN) { + (void)wormleaf_remove_ih(leaf1, im1); + } else { // not found in leaf1; search leaf2 + const u32 im2 = wormleaf_search_ih(leaf2, e); + debug_assert(im2 < WH_KPN); + (void)wormleaf_remove_ih(leaf2, im2); + } + } + // this merge must succeed + if (!wormleaf_merge(leaf1, leaf2)) + debug_die(); + // Keep this to avoid triggering false alarm in wormleaf_free + leaf2->leaflock.opaque = 0; + wormleaf_free(map->slab_leaf, leaf2); +} +// }}} leaf-merge + +// get/probe {{{ + struct kv * +wormhole_get(struct wormref * const ref, const struct kref * const key, struct kv * const out) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 i = wormleaf_match_hs(leaf, key); + struct kv * const tmp = (i < WH_KPN) ? ref->map->mm.out(wormleaf_kv_at_ih(leaf, i), out) : NULL; + wormleaf_unlock_read(leaf); + return tmp; +} + + struct kv * +whsafe_get(struct wormref * const ref, const struct kref * const key, struct kv * const out) +{ + wormhole_resume(ref); + struct kv * const ret = wormhole_get(ref, key, out); + wormhole_park(ref); + return ret; +} + + struct kv * +whunsafe_get(struct wormhole * const map, const struct kref * const key, struct kv * const out) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 i = wormleaf_match_hs(leaf, key); + return (i < WH_KPN) ? map->mm.out(wormleaf_kv_at_ih(leaf, i), out) : NULL; +} + + bool +wormhole_probe(struct wormref * const ref, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 i = wormleaf_match_hs(leaf, key); + wormleaf_unlock_read(leaf); + return i < WH_KPN; +} + + bool +whsafe_probe(struct wormref * const ref, const struct kref * const key) +{ + wormhole_resume(ref); + const bool r = wormhole_probe(ref, key); + wormhole_park(ref); + return r; +} + + bool +whunsafe_probe(struct wormhole * const map, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + return wormleaf_match_hs(leaf, key) < WH_KPN; +} +// }}} get/probe + +// meta-split {{{ +// duplicate from meta1; only has one bit but will soon add a new bit + static struct wormmeta * +wormmeta_expand(struct wormhmap * const hmap, struct wormmeta * const meta1) +{ + struct wormmeta * const meta2 = slab_alloc_unsafe(hmap->slab2); + if (meta2 == NULL) + return NULL; + + memcpy(meta2, meta1, sizeof(*meta1)); + for (u32 i = 0; i < WH_BMNR; i++) + meta2->bitmap[i] = 0; + const u32 bitmin = wormmeta_bitmin_load(meta1); + debug_assert(bitmin == wormmeta_bitmax_load(meta1)); + debug_assert(bitmin < WH_FO); + // set the only bit + meta2->bitmap[bitmin >> 6u] |= (1lu << (bitmin & 0x3fu)); + + wormhmap_replace(hmap, meta1, meta2); + slab_free_unsafe(hmap->slab1, meta1); + return meta2; +} + + static struct wormmeta * +wormmeta_bm_set_helper(struct wormhmap * const hmap, struct wormmeta * const meta, const u32 id) +{ + debug_assert(id < WH_FO); + const u32 bitmin = wormmeta_bitmin_load(meta); + const u32 bitmax = wormmeta_bitmax_load(meta); + if (bitmin < bitmax) { // already in full size + wormmeta_bm_set(meta, id); + return meta; + } else if (id == bitmin) { // do nothing + return meta; + } else if (bitmin == WH_FO) { // add the first bit + wormmeta_bitmin_store(meta, id); + wormmeta_bitmax_store(meta, id); + return meta; + } else { // need to expand + struct wormmeta * const meta2 = wormmeta_expand(hmap, meta); + wormmeta_bm_set(meta2, id); + return meta2; + } +} + +// return true if a new node is created + static void +wormmeta_split_touch(struct wormhmap * const hmap, struct kv * const mkey, + struct wormleaf * const leaf, const u32 alen) +{ + struct wormmeta * meta = wormhmap_get(hmap, mkey); + if (meta) { + if (mkey->klen < alen) + meta = wormmeta_bm_set_helper(hmap, meta, mkey->kv[mkey->klen]); + if (wormmeta_lmost_load(meta) == leaf->next) + wormmeta_lmost_store(meta, leaf); + else if (wormmeta_rmost_load(meta) == leaf->prev) + wormmeta_rmost_store(meta, leaf); + } else { // create new node + const u32 bit = (mkey->klen < alen) ? mkey->kv[mkey->klen] : WH_FO; + meta = wormmeta_alloc(hmap, leaf, mkey, alen, bit); + debug_assert(meta); + wormhmap_set(hmap, meta); + } +} + + static void +wormmeta_lpath_update(struct wormhmap * const hmap, const struct kv * const a1, const struct kv * const a2, + struct wormleaf * const lpath) +{ + struct kv * const pbuf = hmap->pbuf; + kv_dup2_key(a2, pbuf); + + // only need to update a2's own branch + u32 i = kv_key_lcp(a1, a2) + 1; + debug_assert(i <= pbuf->klen); + wormhole_prefix(pbuf, i); + while (i < a2->klen) { + debug_assert(i <= hmap->maxplen); + struct wormmeta * const meta = wormhmap_get(hmap, pbuf); + debug_assert(meta); + wormmeta_lpath_store(meta, lpath); + + i++; + wormhole_prefix_inc1(pbuf); + } +} + +// for leaf1, a leaf2 is already linked at its right side. +// this function updates the meta-map by moving leaf1 and hooking leaf2 at correct positions + static void +wormmeta_split(struct wormhmap * const hmap, struct wormleaf * const leaf, + struct kv * const mkey) +{ + // left branches + struct wormleaf * const prev = leaf->prev; + struct wormleaf * const next = leaf->next; + u32 i = next ? kv_key_lcp(prev->anchor, next->anchor) : 0; + const u32 alen = leaf->anchor->klen; + + // save klen + const u32 mklen = mkey->klen; + wormhole_prefix(mkey, i); + do { + wormmeta_split_touch(hmap, mkey, leaf, alen); + if (i >= alen) + break; + i++; + wormhole_prefix_inc1(mkey); + } while (true); + + // adjust maxplen; i is the plen of the last _touch() + if (i > hmap->maxplen) + hmap->maxplen = i; + debug_assert(i <= UINT16_MAX); + + // restore klen + mkey->klen = mklen; + + if (next) + wormmeta_lpath_update(hmap, leaf->anchor, next->anchor, leaf); +} + +// all locks will be released before returning + static bool +wormhole_split_meta(struct wormref * const ref, struct wormleaf * const leaf2) +{ + struct kv * const mkey = wormhole_alloc_mkey(leaf2->anchor->klen); + if (unlikely(mkey == NULL)) + return false; + kv_dup2_key(leaf2->anchor, mkey); + + struct wormhole * const map = ref->map; + // metalock + wormhmap_lock(map, ref); + + // check slab reserve + const bool sr = wormhole_slab_reserve(map, mkey->klen); + if (unlikely(!sr)) { + wormhmap_unlock(map); + wormhole_free_mkey(mkey); + return false; + } + + struct wormhmap * const hmap0 = wormhmap_load(map); + struct wormhmap * const hmap1 = wormhmap_switch(map, hmap0); + + // link + struct wormleaf * const leaf1 = leaf2->prev; + leaf1->next = leaf2; + if (leaf2->next) + leaf2->next->prev = leaf2; + + // update versions + const u64 v1 = wormhmap_version_load(hmap0) + 1; + wormleaf_version_store(leaf1, v1); + wormleaf_version_store(leaf2, v1); + wormhmap_version_store(hmap1, v1); + + wormmeta_split(hmap1, leaf2, mkey); + + qsbr_update(&ref->qref, v1); + + // switch hmap + wormhmap_store(map, hmap1); + + wormleaf_unlock_write(leaf1); + wormleaf_unlock_write(leaf2); + + qsbr_wait(map->qsbr, v1); + + wormmeta_split(hmap0, leaf2, mkey); + + wormhmap_unlock(map); + + if (mkey->refcnt == 0) // this is possible + wormhole_free_mkey(mkey); + return true; +} + +// all locks (metalock + leaflocks) will be released before returning +// leaf1->lock (write) is already taken + static bool +wormhole_split_insert(struct wormref * const ref, struct wormleaf * const leaf1, + struct kv * const new) +{ + struct wormleaf * const leaf2 = wormhole_split_leaf(ref->map, leaf1, new); + if (unlikely(leaf2 == NULL)) { + wormleaf_unlock_write(leaf1); + return false; + } + + rwlock_lock_write(&(leaf2->leaflock)); + const bool rsm = wormhole_split_meta(ref, leaf2); + if (unlikely(!rsm)) { + // undo insertion & merge; free leaf2 + wormleaf_split_undo(ref->map, leaf1, leaf2, new); + wormleaf_unlock_write(leaf1); + } + return rsm; +} + + static bool +whunsafe_split_meta(struct wormhole * const map, struct wormleaf * const leaf2) +{ + struct kv * const mkey = wormhole_alloc_mkey(leaf2->anchor->klen); + if (unlikely(mkey == NULL)) + return false; + kv_dup2_key(leaf2->anchor, mkey); + + const bool sr = wormhole_slab_reserve(map, mkey->klen); + if (unlikely(!sr)) { + wormhmap_unlock(map); + wormhole_free_mkey(mkey); + return false; + } + + // link + leaf2->prev->next = leaf2; + if (leaf2->next) + leaf2->next->prev = leaf2; + + for (u32 i = 0; i < 2; i++) + if (map->hmap2[i].pmap) + wormmeta_split(&(map->hmap2[i]), leaf2, mkey); + if (mkey->refcnt == 0) // this is possible + wormhole_free_mkey(mkey); + return true; +} + + static bool +whunsafe_split_insert(struct wormhole * const map, struct wormleaf * const leaf1, + struct kv * const new) +{ + struct wormleaf * const leaf2 = wormhole_split_leaf(map, leaf1, new); + if (unlikely(leaf2 == NULL)) + return false; + + const bool rsm = whunsafe_split_meta(map, leaf2); + if (unlikely(!rsm)) // undo insertion, merge, free leaf2 + wormleaf_split_undo(map, leaf1, leaf2, new); + + return rsm; +} +// }}} meta-split + +// meta-merge {{{ +// now it only contains one bit + static struct wormmeta * +wormmeta_shrink(struct wormhmap * const hmap, struct wormmeta * const meta2) +{ + debug_assert(wormmeta_bitmin_load(meta2) == wormmeta_bitmax_load(meta2)); + struct wormmeta * const meta1 = slab_alloc_unsafe(hmap->slab1); + if (meta1 == NULL) + return NULL; + + memcpy(meta1, meta2, sizeof(*meta1)); + + wormhmap_replace(hmap, meta2, meta1); + slab_free_unsafe(hmap->slab2, meta2); + return meta1; +} + + static void +wormmeta_bm_clear_helper(struct wormhmap * const hmap, struct wormmeta * const meta, const u32 id) +{ + if (wormmeta_bitmin_load(meta) == wormmeta_bitmax_load(meta)) { + debug_assert(wormmeta_bitmin_load(meta) < WH_FO); + wormmeta_bitmin_store(meta, WH_FO); + wormmeta_bitmax_store(meta, WH_FO); + } else { // has more than 1 bit + wormmeta_bm_clear(meta, id); + if (wormmeta_bitmin_load(meta) == wormmeta_bitmax_load(meta)) + wormmeta_shrink(hmap, meta); + } +} + +// all locks held + static void +wormmeta_merge(struct wormhmap * const hmap, struct wormleaf * const leaf) +{ + // leaf->next is the new next after merge, which can be NULL + struct wormleaf * const prev = leaf->prev; + struct wormleaf * const next = leaf->next; + struct kv * const pbuf = hmap->pbuf; + kv_dup2_key(leaf->anchor, pbuf); + u32 i = (prev && next) ? kv_key_lcp(prev->anchor, next->anchor) : 0; + const u32 alen = leaf->anchor->klen; + wormhole_prefix(pbuf, i); + struct wormmeta * parent = NULL; + do { + debug_assert(i <= hmap->maxplen); + struct wormmeta * meta = wormhmap_get(hmap, pbuf); + if (wormmeta_lmost_load(meta) == wormmeta_rmost_load(meta)) { // delete single-child + debug_assert(wormmeta_lmost_load(meta) == leaf); + const u32 bitmin = wormmeta_bitmin_load(meta); + wormhmap_del(hmap, meta); + wormmeta_free(hmap, meta); + if (parent) { + wormmeta_bm_clear_helper(hmap, parent, pbuf->kv[i-1]); + parent = NULL; + } + if (bitmin == WH_FO) // no child + break; + } else { // adjust lmost rmost + if (wormmeta_lmost_load(meta) == leaf) + wormmeta_lmost_store(meta, next); + else if (wormmeta_rmost_load(meta) == leaf) + wormmeta_rmost_store(meta, prev); + parent = meta; + } + + if (i >= alen) + break; + i++; + wormhole_prefix_inc1(pbuf); + } while (true); + + if (next) + wormmeta_lpath_update(hmap, leaf->anchor, next->anchor, prev); +} + +// all locks (metalock + two leaflock) will be released before returning +// merge leaf2 to leaf1, removing all metadata to leaf2 and leaf2 itself + static void +wormhole_meta_merge(struct wormref * const ref, struct wormleaf * const leaf1, + struct wormleaf * const leaf2, const bool unlock_leaf1) +{ + debug_assert(leaf1->next == leaf2); + debug_assert(leaf2->prev == leaf1); + struct wormhole * const map = ref->map; + + wormhmap_lock(map, ref); + + struct wormhmap * const hmap0 = wormhmap_load(map); + struct wormhmap * const hmap1 = wormhmap_switch(map, hmap0); + const u64 v1 = wormhmap_version_load(hmap0) + 1; + + leaf1->next = leaf2->next; + if (leaf2->next) + leaf2->next->prev = leaf1; + + wormleaf_version_store(leaf1, v1); + wormleaf_version_store(leaf2, v1); + wormhmap_version_store(hmap1, v1); + + wormmeta_merge(hmap1, leaf2); + + qsbr_update(&ref->qref, v1); + + // switch hmap + wormhmap_store(map, hmap1); + + if (unlock_leaf1) + wormleaf_unlock_write(leaf1); + wormleaf_unlock_write(leaf2); + + qsbr_wait(map->qsbr, v1); + + wormmeta_merge(hmap0, leaf2); + // leaf2 is now safe to be removed + wormleaf_free(map->slab_leaf, leaf2); + wormhmap_unlock(map); +} + +// caller must acquire leaf->wlock and next->wlock +// all locks will be released when this function returns + static bool +wormhole_meta_leaf_merge(struct wormref * const ref, struct wormleaf * const leaf) +{ + struct wormleaf * const next = leaf->next; + debug_assert(next); + + // double check + if ((leaf->nr_keys + next->nr_keys) <= WH_KPN) { + if (wormleaf_merge(leaf, next)) { + wormhole_meta_merge(ref, leaf, next, true); + return true; + } + } + // merge failed but it's fine + wormleaf_unlock_write(leaf); + wormleaf_unlock_write(next); + return false; +} + + static void +whunsafe_meta_leaf_merge(struct wormhole * const map, struct wormleaf * const leaf1, + struct wormleaf * const leaf2) +{ + debug_assert(leaf1->next == leaf2); + debug_assert(leaf2->prev == leaf1); + if (!wormleaf_merge(leaf1, leaf2)) + return; + + leaf1->next = leaf2->next; + if (leaf2->next) + leaf2->next->prev = leaf1; + for (u32 i = 0; i < 2; i++) + if (map->hmap2[i].pmap) + wormmeta_merge(&(map->hmap2[i]), leaf2); + wormleaf_free(map->slab_leaf, leaf2); +} +// }}} meta-merge + +// put {{{ + bool +wormhole_put(struct wormref * const ref, struct kv * const kv) +{ + // we always allocate a new item on SET + // future optimizations may perform in-place update + struct wormhole * const map = ref->map; + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + const struct kref kref = kv_kref(new); + + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, &kref); + // update + const u32 im = wormleaf_match_hs(leaf, &kref); + if (im < WH_KPN) { + struct kv * const old = wormleaf_update(leaf, im, new); + wormleaf_unlock_write(leaf); + map->mm.free(old, map->mm.priv); + return true; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + wormleaf_unlock_write(leaf); + return true; + } + + // split_insert changes hmap + // all locks should be released in wormhole_split_insert() + const bool rsi = wormhole_split_insert(ref, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +whsafe_put(struct wormref * const ref, struct kv * const kv) +{ + wormhole_resume(ref); + const bool r = wormhole_put(ref, kv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_put(struct wormhole * const map, struct kv * const kv) +{ + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + const struct kref kref = kv_kref(new); + + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, &kref); + // update + const u32 im = wormleaf_match_hs(leaf, &kref); + if (im < WH_KPN) { // overwrite + struct kv * const old = wormleaf_update(leaf, im, new); + map->mm.free(old, map->mm.priv); + return true; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + return true; + } + + // split_insert changes hmap + const bool rsi = whunsafe_split_insert(map, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +wormhole_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + struct wormhole * const map = ref->map; + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, kref); + // update + const u32 im = wormleaf_match_hs(leaf, kref); + if (im < WH_KPN) { // update + struct kv * const kv0 = wormleaf_kv_at_ih(leaf, im); + struct kv * const kv = uf(kv0, priv); + if ((kv == kv0) || (kv == NULL)) { // no replacement + wormleaf_unlock_write(leaf); + return true; + } + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) { // mm error + wormleaf_unlock_write(leaf); + return false; + } + + struct kv * const old = wormleaf_update(leaf, im, new); + wormleaf_unlock_write(leaf); + map->mm.free(old, map->mm.priv); + return true; + } + + struct kv * const kv = uf(NULL, priv); + if (kv == NULL) { // nothing to be inserted + wormleaf_unlock_write(leaf); + return true; + } + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) { // mm error + wormleaf_unlock_write(leaf); + return false; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + wormleaf_unlock_write(leaf); + return true; + } + + // split_insert changes hmap + // all locks should be released in wormhole_split_insert() + const bool rsi = wormhole_split_insert(ref, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +whsafe_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_merge(ref, kref, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_merge(struct wormhole * const map, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, kref); + // update + const u32 im = wormleaf_match_hs(leaf, kref); + if (im < WH_KPN) { // update + struct kv * const kv0 = wormleaf_kv_at_ih(leaf, im); + struct kv * const kv = uf(kv0, priv); + if ((kv == kv0) || (kv == NULL)) + return true; + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + + struct kv * const old = wormleaf_update(leaf, im, new); + map->mm.free(old, map->mm.priv); + return true; + } + + struct kv * const kv = uf(NULL, priv); + if (kv == NULL) // nothing to be inserted + return true; + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) // mm error + return false; + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + return true; + } + + // split_insert changes hmap + const bool rsi = whunsafe_split_insert(map, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} +// }}} put + +// inplace {{{ + bool +wormhole_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { + uf(wormleaf_kv_at_ih(leaf, im), priv); + wormleaf_unlock_read(leaf); + return true; + } else { + uf(NULL, priv); + wormleaf_unlock_read(leaf); + return false; + } +} + + bool +wormhole_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { + uf(wormleaf_kv_at_ih(leaf, im), priv); + wormleaf_unlock_write(leaf); + return true; + } else { + uf(NULL, priv); + wormleaf_unlock_write(leaf); + return false; + } +} + + bool +whsafe_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_inpr(ref, key, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whsafe_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_inpw(ref, key, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_inp(struct wormhole * const map, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // overwrite + uf(wormleaf_kv_at_ih(leaf, im), priv); + return true; + } else { + uf(NULL, priv); + return false; + } +} +// }}} put + +// del {{{ + static void +wormhole_del_try_merge(struct wormref * const ref, struct wormleaf * const leaf) +{ + struct wormleaf * const next = leaf->next; + if (next && ((leaf->nr_keys == 0) || ((leaf->nr_keys + next->nr_keys) < WH_KPN_MRG))) { + // try merge, it may fail if size becomes larger after locking + wormleaf_lock_write(next, ref); + (void)wormhole_meta_leaf_merge(ref, leaf); + // locks are already released; immediately return + } else { + wormleaf_unlock_write(leaf); + } +} + + bool +wormhole_del(struct wormref * const ref, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // found + struct kv * const kv = wormleaf_remove_ih(leaf, im); + wormhole_del_try_merge(ref, leaf); + debug_assert(kv); + // free after releasing locks + struct wormhole * const map = ref->map; + map->mm.free(kv, map->mm.priv); + return true; + } else { + wormleaf_unlock_write(leaf); + return false; + } +} + + bool +whsafe_del(struct wormref * const ref, const struct kref * const key) +{ + wormhole_resume(ref); + const bool r = wormhole_del(ref, key); + wormhole_park(ref); + return r; +} + + static void +whunsafe_del_try_merge(struct wormhole * const map, struct wormleaf * const leaf) +{ + const u32 n0 = leaf->prev ? leaf->prev->nr_keys : WH_KPN; + const u32 n1 = leaf->nr_keys; + const u32 n2 = leaf->next ? leaf->next->nr_keys : WH_KPN; + + if ((leaf->prev && (n1 == 0)) || ((n0 + n1) < WH_KPN_MRG)) { + whunsafe_meta_leaf_merge(map, leaf->prev, leaf); + } else if ((leaf->next && (n1 == 0)) || ((n1 + n2) < WH_KPN_MRG)) { + whunsafe_meta_leaf_merge(map, leaf, leaf->next); + } +} + + bool +whunsafe_del(struct wormhole * const map, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // found + struct kv * const kv = wormleaf_remove_ih(leaf, im); + debug_assert(kv); + + whunsafe_del_try_merge(map, leaf); + map->mm.free(kv, map->mm.priv); + return true; + } + return false; +} + + u64 +wormhole_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end) +{ + struct wormleaf * const leafa = wormhole_jump_leaf_write(ref, start); + wormleaf_sync_sorted(leafa); + const u32 ia = wormleaf_seek(leafa, start); + const u32 iaz = end ? wormleaf_seek_end(leafa, end) : leafa->nr_keys; + if (iaz < ia) { // do nothing if end < start + wormleaf_unlock_write(leafa); + return 0; + } + u64 ndel = iaz - ia; + struct wormhole * const map = ref->map; + wormleaf_delete_range(map, leafa, ia, iaz); + if (leafa->nr_keys > ia) { // end hit; done + wormhole_del_try_merge(ref, leafa); + return ndel; + } + + while (leafa->next) { + struct wormleaf * const leafx = leafa->next; + wormleaf_lock_write(leafx, ref); + // two leaf nodes locked + wormleaf_sync_sorted(leafx); + const u32 iz = end ? wormleaf_seek_end(leafx, end) : leafx->nr_keys; + ndel += iz; + wormleaf_delete_range(map, leafx, 0, iz); + if (leafx->nr_keys == 0) { // removed all + // must hold leaf1's lock for the next iteration + wormhole_meta_merge(ref, leafa, leafx, false); + } else { // partially removed; done + (void)wormhole_meta_leaf_merge(ref, leafa); + return ndel; + } + } + wormleaf_unlock_write(leafa); + return ndel; +} + + u64 +whsafe_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end) +{ + wormhole_resume(ref); + const u64 ret = wormhole_delr(ref, start, end); + wormhole_park(ref); + return ret; +} + + u64 +whunsafe_delr(struct wormhole * const map, const struct kref * const start, + const struct kref * const end) +{ + // first leaf + struct wormhmap * const hmap = map->hmap; + struct wormleaf * const leafa = wormhole_jump_leaf(hmap, start); + wormleaf_sync_sorted(leafa); + // last leaf + struct wormleaf * const leafz = end ? wormhole_jump_leaf(hmap, end) : NULL; + + // select start/end on leafa + const u32 ia = wormleaf_seek(leafa, start); + const u32 iaz = end ? wormleaf_seek_end(leafa, end) : leafa->nr_keys; + if (iaz < ia) + return 0; + + wormleaf_delete_range(map, leafa, ia, iaz); + u64 ndel = iaz - ia; + + if (leafa == leafz) { // one node only + whunsafe_del_try_merge(map, leafa); + return ndel; + } + + // 0 or more nodes between leafa and leafz + while (leafa->next != leafz) { + struct wormleaf * const leafx = leafa->next; + ndel += leafx->nr_keys; + for (u32 i = 0; i < leafx->nr_keys; i++) + map->mm.free(wormleaf_kv_at_is(leafx, i), map->mm.priv); + leafx->nr_keys = 0; + leafx->nr_sorted = 0; + whunsafe_meta_leaf_merge(map, leafa, leafx); + } + // delete the smaller keys in leafz + if (leafz) { + wormleaf_sync_sorted(leafz); + const u32 iz = wormleaf_seek_end(leafz, end); + wormleaf_delete_range(map, leafz, 0, iz); + ndel += iz; + whunsafe_del_try_merge(map, leafa); + } + return ndel; +} +// }}} del + +// iter {{{ +// safe iter: safe sort with read-lock acquired +// unsafe iter: allow concurrent seek/skip + static void +wormhole_iter_leaf_sync_sorted(struct wormleaf * const leaf) +{ + if (unlikely(leaf->nr_keys != leaf->nr_sorted)) { + spinlock_lock(&(leaf->sortlock)); + wormleaf_sync_sorted(leaf); + spinlock_unlock(&(leaf->sortlock)); + } +} + + struct wormhole_iter * +wormhole_iter_create(struct wormref * const ref) +{ + struct wormhole_iter * const iter = malloc(sizeof(*iter)); + if (iter == NULL) + return NULL; + iter->ref = ref; + iter->map = ref->map; + iter->leaf = NULL; + iter->is = 0; + return iter; +} + + static void +wormhole_iter_fix(struct wormhole_iter * const iter) +{ + if (!wormhole_iter_valid(iter)) + return; + + while (unlikely(iter->is >= iter->leaf->nr_sorted)) { + struct wormleaf * const next = iter->leaf->next; + if (likely(next != NULL)) { + struct wormref * const ref = iter->ref; + wormleaf_lock_read(next, ref); + wormleaf_unlock_read(iter->leaf); + + wormhole_iter_leaf_sync_sorted(next); + } else { + wormleaf_unlock_read(iter->leaf); + } + iter->leaf = next; + iter->is = 0; + if (!wormhole_iter_valid(iter)) + return; + } +} + + void +wormhole_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + debug_assert(key); + if (iter->leaf) + wormleaf_unlock_read(iter->leaf); + + struct wormleaf * const leaf = wormhole_jump_leaf_read(iter->ref, key); + wormhole_iter_leaf_sync_sorted(leaf); + + iter->leaf = leaf; + iter->is = wormleaf_seek(leaf, key); + wormhole_iter_fix(iter); +} + + void +whsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + wormhole_resume(iter->ref); + wormhole_iter_seek(iter, key); +} + + bool +wormhole_iter_valid(struct wormhole_iter * const iter) +{ + return iter->leaf != NULL; +} + + static struct kv * +wormhole_iter_current(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + debug_assert(iter->is < iter->leaf->nr_sorted); + struct kv * const kv = wormleaf_kv_at_is(iter->leaf, iter->is); + return kv; + } + return NULL; +} + + struct kv * +wormhole_iter_peek(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + struct kv * const ret = iter->map->mm.out(kv, out); + return ret; + } + return NULL; +} + + bool +wormhole_iter_kref(struct wormhole_iter * const iter, struct kref * const kref) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + kref_ref_kv(kref, kv); + return true; + } + return false; +} + + bool +wormhole_iter_kvref(struct wormhole_iter * const iter, struct kvref * const kvref) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + kvref_ref_kv(kvref, kv); + return true; + } + return false; +} + + void +wormhole_iter_skip1(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + iter->is++; + wormhole_iter_fix(iter); + } +} + + void +wormhole_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + u32 todo = nr; + while (todo && wormhole_iter_valid(iter)) { + const u32 cap = iter->leaf->nr_sorted - iter->is; + const u32 nskip = (cap < todo) ? cap : todo; + iter->is += nskip; + wormhole_iter_fix(iter); + todo -= nskip; + } +} + + struct kv * +wormhole_iter_next(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const ret = wormhole_iter_peek(iter, out); + wormhole_iter_skip1(iter); + return ret; +} + + bool +wormhole_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv) +{ + struct kv * const kv = wormhole_iter_current(iter); + uf(kv, priv); // call uf even if (kv == NULL) + return kv != NULL; +} + + void +wormhole_iter_park(struct wormhole_iter * const iter) +{ + if (iter->leaf) { + wormleaf_unlock_read(iter->leaf); + iter->leaf = NULL; + } +} + + void +whsafe_iter_park(struct wormhole_iter * const iter) +{ + wormhole_iter_park(iter); + wormhole_park(iter->ref); +} + + void +wormhole_iter_destroy(struct wormhole_iter * const iter) +{ + if (iter->leaf) + wormleaf_unlock_read(iter->leaf); + free(iter); +} + + void +whsafe_iter_destroy(struct wormhole_iter * const iter) +{ + wormhole_park(iter->ref); + wormhole_iter_destroy(iter); +} +// }}} iter + +// unsafe iter {{{ + struct wormhole_iter * +whunsafe_iter_create(struct wormhole * const map) +{ + struct wormhole_iter * const iter = malloc(sizeof(*iter)); + if (iter == NULL) + return NULL; + iter->ref = NULL; + iter->map = map; + iter->leaf = NULL; + iter->is = 0; + whunsafe_iter_seek(iter, kref_null()); + return iter; +} + + static void +whunsafe_iter_fix(struct wormhole_iter * const iter) +{ + if (!wormhole_iter_valid(iter)) + return; + + while (unlikely(iter->is >= iter->leaf->nr_sorted)) { + struct wormleaf * const next = iter->leaf->next; + if (likely(next != NULL)) + wormhole_iter_leaf_sync_sorted(next); + iter->leaf = next; + iter->is = 0; + if (!wormhole_iter_valid(iter)) + return; + } +} + + void +whunsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(iter->map->hmap, key); + wormhole_iter_leaf_sync_sorted(leaf); + + iter->leaf = leaf; + iter->is = wormleaf_seek(leaf, key); + whunsafe_iter_fix(iter); +} + + void +whunsafe_iter_skip1(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + iter->is++; + whunsafe_iter_fix(iter); + } +} + + void +whunsafe_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + u32 todo = nr; + while (todo && wormhole_iter_valid(iter)) { + const u32 cap = iter->leaf->nr_sorted - iter->is; + const u32 nskip = (cap < todo) ? cap : todo; + iter->is += nskip; + whunsafe_iter_fix(iter); + todo -= nskip; + } +} + + struct kv * +whunsafe_iter_next(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const ret = wormhole_iter_peek(iter, out); + whunsafe_iter_skip1(iter); + return ret; +} + + void +whunsafe_iter_destroy(struct wormhole_iter * const iter) +{ + free(iter); +} +// }}} unsafe iter + +// misc {{{ + struct wormref * +wormhole_ref(struct wormhole * const map) +{ + struct wormref * const ref = malloc(sizeof(*ref)); + if (ref == NULL) + return NULL; + ref->map = map; + if (qsbr_register(map->qsbr, &(ref->qref)) == false) { + free(ref); + return NULL; + } + return ref; +} + + struct wormref * +whsafe_ref(struct wormhole * const map) +{ + struct wormref * const ref = wormhole_ref(map); + if (ref) + wormhole_park(ref); + return ref; +} + + struct wormhole * +wormhole_unref(struct wormref * const ref) +{ + struct wormhole * const map = ref->map; + qsbr_unregister(map->qsbr, &(ref->qref)); + free(ref); + return map; +} + + inline void +wormhole_park(struct wormref * const ref) +{ + qsbr_park(&(ref->qref)); +} + + inline void +wormhole_resume(struct wormref * const ref) +{ + qsbr_resume(&(ref->qref)); +} + + inline void +wormhole_refresh_qstate(struct wormref * const ref) +{ + qsbr_update(&(ref->qref), wormhmap_version_load(wormhmap_load(ref->map))); +} + + static void +wormhole_clean_hmap(struct wormhole * const map) +{ + for (u32 x = 0; x < 2; x++) { + if (map->hmap2[x].pmap == NULL) + continue; + struct wormhmap * const hmap = &(map->hmap2[x]); + const u64 nr_slots = ((u64)(hmap->mask)) + 1; + struct wormmbkt * const pmap = hmap->pmap; + for (u64 s = 0; s < nr_slots; s++) { + struct wormmbkt * const slot = &(pmap[s]); + for (u32 i = 0; i < WH_BKT_NR; i++) + if (slot->e[i]) + wormmeta_keyref_release(slot->e[i]); + } + + slab_free_all(hmap->slab1); + slab_free_all(hmap->slab2); + memset(hmap->pmap, 0, hmap->msize); + hmap->maxplen = 0; + } +} + + static void +wormhole_free_leaf_keys(struct wormhole * const map, struct wormleaf * const leaf) +{ + const u32 nr = leaf->nr_keys; + for (u32 i = 0; i < nr; i++) { + void * const curr = wormleaf_kv_at_is(leaf, i); + debug_assert(curr); + map->mm.free(curr, map->mm.priv); + } + wormhole_free_akey(leaf->anchor); +} + + static void +wormhole_clean_helper(struct wormhole * const map) +{ + wormhole_clean_hmap(map); + for (struct wormleaf * leaf = map->leaf0; leaf; leaf = leaf->next) + wormhole_free_leaf_keys(map, leaf); + slab_free_all(map->slab_leaf); + map->leaf0 = NULL; +} + +// unsafe + void +wormhole_clean(struct wormhole * const map) +{ + wormhole_clean_helper(map); + wormhole_create_leaf0(map); +} + + void +wormhole_destroy(struct wormhole * const map) +{ + wormhole_clean_helper(map); + for (u32 i = 0; i < 2; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (hmap->slab1) + slab_destroy(hmap->slab1); + if (hmap->slab2) + slab_destroy(hmap->slab2); + wormhmap_deinit(hmap); + } + qsbr_destroy(map->qsbr); + slab_destroy(map->slab_leaf); + free(map->pbuf); + free(map); +} + + void +wormhole_fprint(struct wormhole * const map, FILE * const out) +{ + const u64 nr_slab_ul = slab_get_nalloc(map->slab_leaf); + const u64 nr_slab_um11 = slab_get_nalloc(map->hmap2[0].slab1); + const u64 nr_slab_um12 = slab_get_nalloc(map->hmap2[0].slab2); + const u64 nr_slab_um21 = map->hmap2[1].slab1 ? slab_get_nalloc(map->hmap2[1].slab1) : 0; + const u64 nr_slab_um22 = map->hmap2[1].slab2 ? slab_get_nalloc(map->hmap2[1].slab2) : 0; + fprintf(out, "%s L-SLAB %lu M-SLAB [0] %lu+%lu [1] %lu+%lu\n", + __func__, nr_slab_ul, nr_slab_um11, nr_slab_um12, nr_slab_um21, nr_slab_um22); +} +// }}} misc + +// api {{{ +const struct kvmap_api kvmap_api_wormhole = { + .hashkey = true, + .ordered = true, + .threadsafe = true, + .unique = true, + .refpark = true, + .put = (void *)wormhole_put, + .get = (void *)wormhole_get, + .probe = (void *)wormhole_probe, + .del = (void *)wormhole_del, + .inpr = (void *)wormhole_inpr, + .inpw = (void *)wormhole_inpw, + .merge = (void *)wormhole_merge, + .delr = (void *)wormhole_delr, + .iter_create = (void *)wormhole_iter_create, + .iter_seek = (void *)wormhole_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)wormhole_iter_skip1, + .iter_skip = (void *)wormhole_iter_skip, + .iter_next = (void *)wormhole_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_park = (void *)wormhole_iter_park, + .iter_destroy = (void *)wormhole_iter_destroy, + .ref = (void *)wormhole_ref, + .unref = (void *)wormhole_unref, + .park = (void *)wormhole_park, + .resume = (void *)wormhole_resume, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + +const struct kvmap_api kvmap_api_whsafe = { + .hashkey = true, + .ordered = true, + .threadsafe = true, + .unique = true, + .put = (void *)whsafe_put, + .get = (void *)whsafe_get, + .probe = (void *)whsafe_probe, + .del = (void *)whsafe_del, + .inpr = (void *)whsafe_inpr, + .inpw = (void *)whsafe_inpw, + .merge = (void *)whsafe_merge, + .delr = (void *)whsafe_delr, + .iter_create = (void *)wormhole_iter_create, + .iter_seek = (void *)whsafe_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)wormhole_iter_skip1, + .iter_skip = (void *)wormhole_iter_skip, + .iter_next = (void *)wormhole_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_park = (void *)whsafe_iter_park, + .iter_destroy = (void *)whsafe_iter_destroy, + .ref = (void *)whsafe_ref, + .unref = (void *)wormhole_unref, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + +const struct kvmap_api kvmap_api_whunsafe = { + .hashkey = true, + .ordered = true, + .unique = true, + .put = (void *)whunsafe_put, + .get = (void *)whunsafe_get, + .probe = (void *)whunsafe_probe, + .del = (void *)whunsafe_del, + .inpr = (void *)whunsafe_inp, + .inpw = (void *)whunsafe_inp, + .merge = (void *)whunsafe_merge, + .delr = (void *)whunsafe_delr, + .iter_create = (void *)whunsafe_iter_create, + .iter_seek = (void *)whunsafe_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)whunsafe_iter_skip1, + .iter_skip = (void *)whunsafe_iter_skip, + .iter_next = (void *)whunsafe_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_destroy = (void *)whunsafe_iter_destroy, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + + static void * +wormhole_kvmap_api_create(const char * const name, const struct kvmap_mm * const mm, char ** args) +{ + (void)args; + if ((!strcmp(name, "wormhole")) || (!strcmp(name, "whsafe"))) { + return wormhole_create(mm); + } else if (!strcmp(name, "whunsafe")) { + return whunsafe_create(mm); + } else { + return NULL; + } +} + +__attribute__((constructor)) + static void +wormhole_kvmap_api_init(void) +{ + kvmap_api_register(0, "wormhole", "", wormhole_kvmap_api_create, &kvmap_api_wormhole); + kvmap_api_register(0, "whsafe", "", wormhole_kvmap_api_create, &kvmap_api_whsafe); + kvmap_api_register(0, "whunsafe", "", wormhole_kvmap_api_create, &kvmap_api_whunsafe); +} +// }}} api + +// wh {{{ +// Users often don't enjoy dealing with struct kv/kref and just want to use plain buffers. +// No problem! +// This example library shows you how to use Wormhole efficiently in the most intuitive way. + +// Use the worry-free api +static const struct kvmap_api * const wh_api = &kvmap_api_whsafe; + +// You can change the wh_api to kvmap_api_wormhole with a one-line replacement +// The standard Wormhole api can give you ~5% boost; read README for thread-safety tips +//static const struct kvmap_api * const wh_api = &kvmap_api_wormhole; + + struct wormhole * +wh_create(void) +{ + // kvmap_mm_ndf (kv.h) will let the caller allocate the kv when inserting + // This can avoid a memcpy if the caller does not have the data in a struct kv + return wormhole_create(&kvmap_mm_ndf); +} + + struct wormref * +wh_ref(struct wormhole * const wh) +{ + return wh_api->ref(wh); +} + + void +wh_unref(struct wormref * const ref) +{ + (void)wh_api->unref(ref); +} + + void +wh_park(struct wormref * const ref) +{ + if (wh_api->park) + wh_api->park(ref); +} + + void +wh_resume(struct wormref * const ref) +{ + if (wh_api->resume) + wh_api->resume(ref); +} + + void +wh_clean(struct wormhole * const map) +{ + wh_api->clean(map); +} + + void +wh_destroy(struct wormhole * const map) +{ + wh_api->destroy(map); +} + +// Do set/put with explicit kv buffers + bool +wh_put(struct wormref * const ref, const void * const kbuf, const u32 klen, + const void * const vbuf, const u32 vlen) +{ + struct kv * const newkv = kv_create(kbuf, klen, vbuf, vlen); + if (newkv == NULL) + return false; + // must use with kvmap_mm_ndf (see below) + // the newkv will be saved in the Wormhole and freed by Wormhole when upon deletion + return wh_api->put(ref, newkv); +} + +// delete a key + bool +wh_del(struct wormref * const ref, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->del(ref, &kref); +} + +// test if the key exist in Wormhole + bool +wh_probe(struct wormref * const ref, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->probe(ref, &kref); +} + +// for wh_get() +struct wh_inp_info { void * vbuf_out; u32 * vlen_out; u32 vbuf_size; }; + +// a kv_inp_func; use this to retrieve the KV's data without unnecesary memory copying + static void +wh_inp_copy_value(struct kv * const curr, void * const priv) +{ + if (curr) { // found + struct wh_inp_info * const info = (typeof(info))priv; + // copy the value data out + const u32 copy_size = info->vbuf_size < curr->vlen ? info->vbuf_size : curr->vlen; + memcpy(info->vbuf_out, kv_vptr_c(curr), copy_size); + // copy the vlen out + *info->vlen_out = curr->vlen; + } +} + +// returns a boolean value indicating whether the key is found. +// the value's data will be written to *vlen_out and vbuf_out if the key is found +// if vbuf_size < vlen, then only the first vbuf_size bytes is copied to the buffer +// a small vbuf_size can be used to reduce memcpy cost when only the first a few bytes are needed + bool +wh_get(struct wormref * const ref, const void * const kbuf, const u32 klen, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + struct wh_inp_info info = {vbuf_out, vlen_out, vbuf_size}; + // use the inplace read function to get the value if it exists + return wh_api->inpr(ref, &kref, wh_inp_copy_value, &info); +} + + bool +wh_inpr(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->inpr(ref, &kref, uf, priv); +} + +// inplace update KV's value with a user-defined hook function +// the update should only modify the data in the value; It should not change the value size + bool +wh_inpw(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->inpw(ref, &kref, uf, priv); +} + +// merge existing KV with updates with a user-defined hook function + bool +wh_merge(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_merge_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->merge(ref, &kref, uf, priv); +} + +// remove a range of KVs from start (inclusive) to end (exclusive); [start, end) + u64 +wh_delr(struct wormref * const ref, const void * const kbuf_start, const u32 klen_start, + const void * const kbuf_end, const u32 klen_end) +{ + struct kref kref_start, kref_end; + kref_ref_hash32(&kref_start, kbuf_start, klen_start); + kref_ref_hash32(&kref_end, kbuf_end, klen_end); + return wh_api->delr(ref, &kref_start, &kref_end); +} + + struct wormhole_iter * +wh_iter_create(struct wormref * const ref) +{ + return wh_api->iter_create(ref); +} + + void +wh_iter_seek(struct wormhole_iter * const iter, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + wh_api->iter_seek(iter, &kref); +} + + bool +wh_iter_valid(struct wormhole_iter * const iter) +{ + return wh_api->iter_valid(iter); +} + +// for wh_iter_peek() +// the out ptrs must be provided in pairs; use a pair of NULLs to ignore the key or value +struct wh_iter_inp_info { void * kbuf_out; void * vbuf_out; u32 kbuf_size; u32 vbuf_size; u32 * klen_out; u32 * vlen_out; }; + +// a kv_inp_func; use this to retrieve the KV's data without unnecesary memory copying + static void +inp_copy_kv_cb(struct kv * const curr, void * const priv) +{ + if (curr) { // found + struct wh_iter_inp_info * const info = (typeof(info))priv; + + // copy the key + if (info->kbuf_out) { // it assumes klen_out is also not NULL + // copy the key data out + const u32 clen = curr->klen < info->kbuf_size ? curr->klen : info->kbuf_size; + memcpy(info->kbuf_out, kv_kptr_c(curr), clen); + // copy the klen out + *info->klen_out = curr->klen; + } + + // copy the value + if (info->vbuf_out) { // it assumes vlen_out is also not NULL + // copy the value data out + const u32 clen = curr->vlen < info->vbuf_size ? curr->vlen : info->vbuf_size; + memcpy(info->vbuf_out, kv_vptr_c(curr), clen); + // copy the vlen out + *info->vlen_out = curr->vlen; + } + } +} + +// seek is similar to get + bool +wh_iter_peek(struct wormhole_iter * const iter, + void * const kbuf_out, const u32 kbuf_size, u32 * const klen_out, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out) +{ + struct wh_iter_inp_info info = {kbuf_out, vbuf_out, kbuf_size, vbuf_size, klen_out, vlen_out}; + return wh_api->iter_inp(iter, inp_copy_kv_cb, &info); +} + + void +wh_iter_skip1(struct wormhole_iter * const iter) +{ + wh_api->iter_skip1(iter); +} + + void +wh_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + wh_api->iter_skip(iter, nr); +} + + bool +wh_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv) +{ + return wh_api->iter_inp(iter, uf, priv); +} + + void +wh_iter_park(struct wormhole_iter * const iter) +{ + wh_api->iter_park(iter); +} + + void +wh_iter_destroy(struct wormhole_iter * const iter) +{ + wh_api->iter_destroy(iter); +} +// }}} wh + +// vim:fdm=marker diff --git a/run/MassTrie-beta/wormhole/wh.h b/run/MassTrie-beta/wormhole/wh.h new file mode 100644 index 00000000..bd17b38d --- /dev/null +++ b/run/MassTrie-beta/wormhole/wh.h @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct wormhole; +struct wormref; + +// wormhole {{{ +// the wh created by wormhole_create() can work with all of safe/unsafe operations. + extern struct wormhole * +wormhole_create(const struct kvmap_mm * const mm); + +// the wh created by whunsafe_create() can only work with the unsafe operations. + extern struct wormhole * +whunsafe_create(const struct kvmap_mm * const mm); + + extern struct kv * +wormhole_get(struct wormref * const ref, const struct kref * const key, struct kv * const out); + + extern bool +wormhole_probe(struct wormref * const ref, const struct kref * const key); + + extern bool +wormhole_put(struct wormref * const ref, struct kv * const kv); + + extern bool +wormhole_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +wormhole_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +wormhole_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +wormhole_del(struct wormref * const ref, const struct kref * const key); + + extern u64 +wormhole_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end); + + extern struct wormhole_iter * +wormhole_iter_create(struct wormref * const ref); + + extern void +wormhole_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + + extern bool +wormhole_iter_valid(struct wormhole_iter * const iter); + + extern struct kv * +wormhole_iter_peek(struct wormhole_iter * const iter, struct kv * const out); + + extern bool +wormhole_iter_kref(struct wormhole_iter * const iter, struct kref * const kref); + + extern bool +wormhole_iter_kvref(struct wormhole_iter * const iter, struct kvref * const kvref); + + extern void +wormhole_iter_skip1(struct wormhole_iter * const iter); + + extern void +wormhole_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern struct kv * +wormhole_iter_next(struct wormhole_iter * const iter, struct kv * const out); + + extern bool +wormhole_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv); + + extern void +wormhole_iter_park(struct wormhole_iter * const iter); + + extern void +wormhole_iter_destroy(struct wormhole_iter * const iter); + + extern struct wormref * +wormhole_ref(struct wormhole * const map); + + extern struct wormhole * +wormhole_unref(struct wormref * const ref); + + extern void +wormhole_park(struct wormref * const ref); + + extern void +wormhole_resume(struct wormref * const ref); + + extern void +wormhole_refresh_qstate(struct wormref * const ref); + +// clean with more threads + extern void +wormhole_clean_th(struct wormhole * const map, const u32 nr_threads); + + extern void +wormhole_clean(struct wormhole * const map); + + extern void +wormhole_destroy(struct wormhole * const map); + +// safe API (no need to refresh qstate) + + extern struct kv * +whsafe_get(struct wormref * const ref, const struct kref * const key, struct kv * const out); + + extern bool +whsafe_probe(struct wormref * const ref, const struct kref * const key); + + extern bool +whsafe_put(struct wormref * const ref, struct kv * const kv); + + extern bool +whsafe_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +whsafe_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whsafe_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whsafe_del(struct wormref * const ref, const struct kref * const key); + + extern u64 +whsafe_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end); + +// use wormhole_iter_create + extern void +whsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + + extern struct kv * +whsafe_iter_peek(struct wormhole_iter * const iter, struct kv * const out); + +// use wormhole_iter_valid +// use wormhole_iter_peek +// use wormhole_iter_kref +// use wormhole_iter_kvref +// use wormhole_iter_skip1 +// use wormhole_iter_skip +// use wormhole_iter_next +// use wormhole_iter_inp + + extern void +whsafe_iter_park(struct wormhole_iter * const iter); + + extern void +whsafe_iter_destroy(struct wormhole_iter * const iter); + + extern struct wormref * +whsafe_ref(struct wormhole * const map); + +// use wormhole_unref + +// unsafe API + + extern struct kv * +whunsafe_get(struct wormhole * const map, const struct kref * const key, struct kv * const out); + + extern bool +whunsafe_probe(struct wormhole * const map, const struct kref * const key); + + extern bool +whunsafe_put(struct wormhole * const map, struct kv * const kv); + + extern bool +whunsafe_merge(struct wormhole * const map, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +whunsafe_inp(struct wormhole * const map, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whunsafe_del(struct wormhole * const map, const struct kref * const key); + + extern u64 +whunsafe_delr(struct wormhole * const map, const struct kref * const start, + const struct kref * const end); + + extern struct wormhole_iter * +whunsafe_iter_create(struct wormhole * const map); + + extern void +whunsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + +// unsafe iter_valid: use wormhole_iter_valid +// unsafe iter_peek: use wormhole_iter_peek +// unsafe iter_kref: use wormhole_iter_kref + + extern void +whunsafe_iter_skip1(struct wormhole_iter * const iter); + + extern void +whunsafe_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern struct kv * +whunsafe_iter_next(struct wormhole_iter * const iter, struct kv * const out); + +// unsafe iter_inp: use wormhole_iter_inp + + extern void +whunsafe_iter_destroy(struct wormhole_iter * const iter); + + extern void +wormhole_fprint(struct wormhole * const map, FILE * const out); + +extern const struct kvmap_api kvmap_api_wormhole; +extern const struct kvmap_api kvmap_api_whsafe; +extern const struct kvmap_api kvmap_api_whunsafe; +// }}} wormhole + +// wh {{{ + extern struct wormhole * +wh_create(void); + + extern struct wormref * +wh_ref(struct wormhole * const wh); + + extern void +wh_unref(struct wormref * const ref); + + extern void +wh_park(struct wormref * const ref); + + extern void +wh_resume(struct wormref * const ref); + + extern void +wh_clean(struct wormhole * const map); + + extern void +wh_destroy(struct wormhole * const map); + + extern bool +wh_put(struct wormref * const ref, const void * const kbuf, const u32 klen, + const void * const vbuf, const u32 vlen); + + extern bool +wh_del(struct wormref * const ref, const void * const kbuf, const u32 klen); + + extern bool +wh_probe(struct wormref * const ref, const void * const kbuf, const u32 klen); + + extern bool +wh_get(struct wormref * const ref, const void * const kbuf, const u32 klen, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out); + + extern bool +wh_inpr(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv); + + extern bool +wh_inpw(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv); + + extern bool +wh_merge(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_merge_func uf, void * const priv); + + extern u64 +wh_delr(struct wormref * const ref, const void * const kbuf_start, const u32 klen_start, + const void * const kbuf_end, const u32 klen_end); + + extern struct wormhole_iter * +wh_iter_create(struct wormref * const ref); + + extern void +wh_iter_seek(struct wormhole_iter * const iter, const void * const kbuf, const u32 klen); + + extern bool +wh_iter_valid(struct wormhole_iter * const iter); + + extern bool +wh_iter_peek(struct wormhole_iter * const iter, + void * const kbuf_out, const u32 kbuf_size, u32 * const klen_out, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out); + + extern void +wh_iter_skip1(struct wormhole_iter * const iter); + + extern void +wh_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern bool +wh_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv); + + extern void +wh_iter_park(struct wormhole_iter * const iter); + + extern void +wh_iter_destroy(struct wormhole_iter * const iter); +// }}} wh + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/run/MassTrie-beta/wormhole/wh.py b/run/MassTrie-beta/wormhole/wh.py new file mode 100644 index 00000000..e744cec8 --- /dev/null +++ b/run/MassTrie-beta/wormhole/wh.py @@ -0,0 +1,192 @@ +#!/usr/bin/python3 + +# +# Copyright (c) 2016--2021 Wu, Xingbo +# +# All rights reserved. No warranty, explicit or implicit, provided. +# + +import msgpack +from ctypes import * # CDLL and c_xxx types + +# libwh {{{ +# Change this path when necessary +libwh = CDLL("./libwh.so") + +# create +libwh.wh_create.argtypes = [] +libwh.wh_create.restype = c_void_p + +# close (no return value) +libwh.wh_destroy.argtypes = [c_void_p] + +# ref +libwh.wh_ref.argtypes = [c_void_p] +libwh.wh_ref.restype = c_void_p + +# unref +libwh.wh_unref.argtypes = [c_void_p] + +# put +libwh.wh_put.argtypes = [c_void_p, c_char_p, c_uint, c_char_p, c_uint] +libwh.wh_put.restype = c_bool + +# get +libwh.wh_get.argtypes = [c_void_p, c_char_p, c_uint, c_char_p, c_uint, c_void_p] +libwh.wh_get.restype = c_bool + +# probe +libwh.wh_probe.argtypes = [c_void_p, c_char_p, c_uint] +libwh.wh_probe.restype = c_bool + +# del +libwh.wh_del.argtypes = [c_void_p, c_char_p, c_uint] +libwh.wh_del.restype = c_bool + +# iter_create +libwh.wh_iter_create.argtypes = [c_void_p] +libwh.wh_iter_create.restype = c_void_p + +# iter_seek +libwh.wh_iter_seek.argtypes = [c_void_p, c_char_p, c_uint] + +# iter_valid +libwh.wh_iter_valid.argtypes = [c_void_p] +libwh.wh_iter_valid.restype = c_bool + +# iter_skip1 +libwh.wh_iter_skip1.argtypes = [c_void_p] + +# iter_skip +libwh.wh_iter_skip.argtypes = [c_void_p, c_uint] + +# iter_peek +libwh.wh_iter_peek.argtypes = [c_void_p, c_char_p, c_uint, c_void_p, c_char_p, c_uint, c_void_p] +libwh.wh_iter_peek.restype = c_bool + +# iter_park +libwh.wh_iter_park.argtypes = [c_void_p] + +# iter_destroy +libwh.wh_iter_destroy.argtypes = [c_void_p] +# }}} libwh + +# class {{{ +class Wh: + def __init__(self, maxklen=256, maxvlen=8192): + self.whptr = libwh.wh_create() + self.kbufsz = maxklen + self.vbufsz = maxvlen + + # user must call explicitly + def destroy(self): + libwh.wh_destroy(self.whptr) + + def ref(self): + return WhRef(self.whptr, self.kbufsz, self.vbufsz) + +class WhRef: + def __init__(self, whptr, kbufsz, vbufsz): + self.refptr = libwh.wh_ref(whptr) + self.kbufsz = kbufsz + self.vbufsz = vbufsz + self.vbuf = create_string_buffer(self.vbufsz) + + # user must call explicitly + def unref(self): + libwh.wh_unref(self.refptr) + + def iter(self): + return WhIter(self.refptr, self.kbufsz, self.vbufsz) + + # key: python string; value: any (hierarchical) python object + def put(self, key, value): + binkey = key.encode() + binvalue = msgpack.packb(value) + return libwh.wh_put(self.refptr, binkey, c_uint(len(binkey)), binvalue, c_uint(len(binvalue))) + + # return the value as a python object + def get(self, key): + binkey = key.encode() + vlen = c_uint() + ret = libwh.wh_get(self.refptr, binkey, len(binkey), self.vbuf, self.vbufsz, byref(vlen)) + if ret and vlen.value <= self.vbufsz: + return msgpack.unpackb(self.vbuf.value) + else: + return None + + def delete(self, key): + binkey = key.encode() + return libwh.wh_del(self.refptr, binkey, c_uint(len(binkey))) + + def probe(self, key): + binkey = key.encode() + return libwh.wh_probe(self.refptr, binkey, c_uint(len(binkey))) + +class WhIter: + def __init__(self, refptr, kbufsz, vbufsz): + self.iptr = libwh.wh_iter_create(refptr) + self.kbufsz = kbufsz + self.vbufsz = vbufsz + self.kbuf = create_string_buffer(kbufsz) + self.vbuf = create_string_buffer(vbufsz) + + # user must call explicitly + def destroy(self): + libwh.wh_iter_destroy(self.iptr) + + def seek(self, key): + if key is None: + libwh.wh_iter_seek(self.iptr, None, c_uint(0)) + else: + binkey = key.encode() + libwh.wh_iter_seek(self.iptr, binkey, c_uint(len(binkey))) + + def valid(self): + return libwh.wh_iter_valid(self.iptr) + + def skip1(self): + libwh.wh_iter_skip1(self.iptr) + + def skip(self, nr): + libwh.wh_iter_skip(self.iptr, c_uint(nr)) + + # return (key, value) pair or None + def peek(self): + klen = c_uint() + vlen = c_uint() + ret = libwh.wh_iter_peek(self.iptr, self.kbuf, self.kbufsz, byref(klen), self.vbuf, self.vbufsz, byref(vlen)) + if ret and klen.value <= self.kbufsz and vlen.value <= self.vbufsz: + self.kbuf[klen.value] = b'\x00' + return (self.kbuf.value.decode(), klen.value, msgpack.unpackb(self.vbuf.value), vlen.value) + else: + return None + +# }}} class + +# examples +wh1 = Wh(32, 1024) +ref1 = wh1.ref() # take a ref for kv operations + +ref1.put("Hello", "pywh") +ref1.put("key1", "value1") +ref1.put("key2", "value2") +ref1.put("key3", {"xxx":"valuex", "yyy":"valuey"}) +ref1.delete("key2") + +rget = ref1.get("Hello") +print(rget) + +# don't use ref when iterating +iter1 = ref1.iter() +iter1.seek(None) +while iter1.valid(): + r = iter1.peek() + print(r) + iter1.skip1() + +iter1.destroy() # must destroy all iters before unref +ref1.unref() # must unref all refs before close() +wh1.destroy() + +# vim:fdm=marker diff --git a/run/MassTrie-beta/wormhole/wh.strip b/run/MassTrie-beta/wormhole/wh.strip new file mode 100644 index 00000000..e7b3971f --- /dev/null +++ b/run/MassTrie-beta/wormhole/wh.strip @@ -0,0 +1,161 @@ +-K key_size +-K key_size_align +-K kref_compare +-K kref_kv_compare +-K kref_kv_match +-K kref_lcp +-K kref_match +-K kref_null +-K kref_ref_hash32 +-K kref_ref_kv +-K kref_ref_kv_hash32 +-K kref_ref_raw +-K kref_update_hash32 +-K kv_compare +-K kv_compare_ptrs +-K kv_crc32c +-K kv_crc32c_extend +-K kv_create +-K kv_create_kref +-K kv_create_str +-K kv_create_str_str +-K kv_dup +-K kv_dup2 +-K kv_dup2_key +-K kv_dup2_key_prefix +-K kv_dup_key +-K kv_key_lcp +-K kv_kptr +-K kv_kptr_c +-K kv_kref +-K kvmap_api_whsafe +-K kvmap_api_whunsafe +-K kvmap_api_wormhole +-K kvmap_dump_keys +-K kvmap_inp_steal_kv +-K kvmap_kv_del +-K kvmap_kv_delr +-K kvmap_kv_get +-K kvmap_kv_inpr +-K kvmap_kv_inpw +-K kvmap_kv_iter_seek +-K kvmap_kv_merge +-K kvmap_kv_probe +-K kvmap_kv_put +-K kvmap_mm_dup +-K kvmap_mm_free_free +-K kvmap_mm_free_noop +-K kvmap_mm_in_dup +-K kvmap_mm_in_noop +-K kvmap_mm_ndf +-K kvmap_mm_out_dup +-K kvmap_mm_out_noop +-K kvmap_raw_del +-K kvmap_raw_get +-K kvmap_raw_inpr +-K kvmap_raw_inpw +-K kvmap_raw_iter_seek +-K kvmap_raw_probe +-K kvmap_ref +-K kvmap_unref +-K kv_match +-K kv_match_full +-K kv_null +-K kv_print +-K kv_qsort +-K kvref_dup2_key +-K kvref_dup2_kv +-K kv_refill +-K kv_refill_hex32 +-K kv_refill_hex64 +-K kv_refill_hex64_klen +-K kv_refill_kref +-K kv_refill_kref_v +-K kv_refill_str +-K kv_refill_str_str +-K kv_refill_u64 +-K kv_refill_value +-K kvref_kv_compare +-K kvref_ref_kv +-K kv_size +-K kv_size_align +-K kv_update_hash +-K kv_vptr +-K kv_vptr_c +-K wh_clean +-K wh_create +-K wh_del +-K wh_delr +-K wh_destroy +-K wh_get +-K wh_inpr +-K wh_inpw +-K wh_iter_create +-K wh_iter_destroy +-K wh_iter_inp +-K wh_iter_park +-K wh_iter_peek +-K wh_iter_seek +-K wh_iter_skip +-K wh_iter_valid +-K wh_merge +-K wh_park +-K wh_probe +-K wh_ref +-K wh_resume +-K whsafe_del +-K whsafe_delr +-K whsafe_get +-K whsafe_inpr +-K whsafe_inpw +-K whsafe_iter_destroy +-K whsafe_iter_park +-K whsafe_iter_seek +-K whsafe_merge +-K whsafe_probe +-K whsafe_ref +-K whsafe_put +-K wh_put +-K wh_unref +-K whunsafe_create +-K whunsafe_del +-K whunsafe_delr +-K whunsafe_get +-K whunsafe_inp +-K whunsafe_iter_create +-K whunsafe_iter_destroy +-K whunsafe_iter_next +-K whunsafe_iter_seek +-K whunsafe_iter_skip +-K whunsafe_merge +-K whunsafe_probe +-K whunsafe_put +-K wormhole_clean +-K wormhole_create +-K wormhole_del +-K wormhole_delr +-K wormhole_destroy +-K wormhole_fprint +-K wormhole_get +-K wormhole_inpr +-K wormhole_inpw +-K wormhole_iter_create +-K wormhole_iter_destroy +-K wormhole_iter_inp +-K wormhole_iter_kref +-K wormhole_iter_kvref +-K wormhole_iter_next +-K wormhole_iter_park +-K wormhole_iter_peek +-K wormhole_iter_seek +-K wormhole_iter_skip +-K wormhole_iter_valid +-K wormhole_kvmap_api_create +-K wormhole_merge +-K wormhole_park +-K wormhole_probe +-K wormhole_ref +-K wormhole_refresh_qstate +-K wormhole_resume +-K wormhole_put +-K wormhole_unref diff --git a/test/MassTrie-beta/MassTrie.hh b/test/MassTrie-beta/MassTrie.hh new file mode 100644 index 00000000..53cfd776 --- /dev/null +++ b/test/MassTrie-beta/MassTrie.hh @@ -0,0 +1,318 @@ +#include + +#include + +#include + +#include + +#include + +#include + +#include "wormhole/lib.h" + +#include "wormhole/kv.h" + +#include "wormhole/wh.h" + +#define NUM_THREADS 64 + +#define MAX_SIZE 64 + +using namespace std; + +//~~~~~~~~~CLASS MASSTRIE~~~~~~~~~~~~~~ + +class MassTrie +{ + +public: + // constructor + + MassTrie() + { + + // creating wh wormhole mapping key to internal_elem (as uintptr_t) + + wh = wh_create(); + + ref = wh_ref(this->wh); + + iter = wh_iter_create(this->ref); + + this->kbuf_out = (void *)malloc(sizeof(char) * MAX_SIZE); + + this->vbuf_out = (void *)malloc(sizeof(char) * MAX_SIZE); + + r = false; + } + + // destructor + + ~MassTrie() + { + + wh_iter_destroy(this->iter); + + wh_unref(this->ref); + + wh_clean(this->wh); + + wh_destroy(this->wh); + + free(kbuf_out); + + free(vbuf_out); + } + + //~~~~~~~~~MASSTRIE FUNCTIONS~~~~~~~~~~~~~~ + + // put function - putting a uintptr_t which is the internal_elem + + bool put(const void *key, int klen, const void *value, int vlen) + { + + return (wh_put(this->ref, key, klen, value, vlen)); + } + + // get function + + void *get(struct wormref *const ref, const void *key, int klen) + { + + // variables + + // bool r; + + u32 vlen_out = 0; + + // get action performed + + r = wh_get(ref, key, klen, vbuf_out, sizeof(vbuf_out), &vlen_out); + + return r ? vbuf_out : nullptr; + } + + // delete function + + bool del(const void *key, int klen) + { + + return (wh_del(this->ref, key, klen)); + } + + // probe function - returns true if key exists, false otherwise + + bool probe(const void *key, int klen) + { + + r = (wh_probe(this->ref, key, klen)); + + return r; + } + + // finds the closest pointer currently in the MassTrie + + // to a pointer passed as a parameter + + void *find_closest(const void *key) + { + + // variables + + u32 klen_out = 0; + + u32 vlen_out = 0; + + // bool r; + + int min = INT_MAX; + + int curr; + + void *res = NULL; + + // search loop + + wh_iter_seek(this->iter, NULL, 0); // seek to the head + + // printf("wh_iter_seek closest pointer to key\"\"\n"); + + while (wh_iter_valid(this->iter)) + { + + r = wh_iter_peek(this->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) + { + + // calculate disatnce + + curr = abs((long)(reinterpret_cast(kbuf_out)) - (long)(reinterpret_cast(key))); + + if (curr < min) + { + + // perform malloc + + if (!res) + + res = (void *)malloc(sizeof(char) * MAX_SIZE); + + // error handling + + if (res == NULL) + { + + printf("Error! memory not allocated."); + + exit(1); + } + + min = curr; + + // cout<<"curr = "<iter); + + memset(kbuf_out, 0, sizeof(kbuf_out)); + + memset(vbuf_out, 0, sizeof(vbuf_out)); + } + + return (res != NULL) ? res : nullptr; + } + + // deletes all from MassTrie + + void delete_all() + { + + // variables + + u32 klen_out = 0; + + u32 vlen_out = 0; + + // bool + + // search loop + + wh_iter_seek(this->iter, NULL, 0); // seek to the head + + // printf("wh_iter_seek closest pointer to key\"\"\n"); + + while (wh_iter_valid(this->iter)) + { + + r = wh_iter_peek(this->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) + { + + // delete key + + this->del(kbuf_out, sizeof(kbuf_out)); + } + + else + { + + printf("ERROR!\n"); + } + + wh_iter_skip1(this->iter); + + memset(kbuf_out, 0, sizeof(kbuf_out)); + + memset(vbuf_out, 0, sizeof(vbuf_out)); + } + } + + // data members + + struct wormhole *wh; + + struct wormref *ref; + + struct wormhole_iter *iter; + + void *kbuf_out; + + void *vbuf_out; + + bool r; + +}; // class MassTrie + +/** + +//override the << operation + + + +ostream& operator<<(ostream &os, MassTrie* m){ + + + +u32 klen_out = 0; + + char kbuf_out[MAX_SIZE] = {}; + + u32 vlen_out = 0; + + char vbuf_out[MAX_SIZE] = {}; + + bool r; + + + + wh_iter_seek(m->iter, NULL, 0); // seek to the head + + printf("wh_iter_seek \"\"\n"); + + while (wh_iter_valid(m->iter)) { + + r = wh_iter_peek(m->iter, kbuf_out, MAX_SIZE, &klen_out, vbuf_out, MAX_SIZE, &vlen_out); + + if (r) { + + os << "wh_iter_peek: key = "<(kbuf_out)<<" , klen = "<< klen_out<<" , "<< + + " value= "<(vbuf_out) << ", vlen= "<< vlen_out<iter); + + + + memset(kbuf_out,0,sizeof(kbuf_out)); + + memset(vbuf_out,0,sizeof(vbuf_out)); + + } + + return os; + +} + + + +**/ diff --git a/test/MassTrie-beta/wormhole/LICENSE b/test/MassTrie-beta/wormhole/LICENSE new file mode 100644 index 00000000..f288702d --- /dev/null +++ b/test/MassTrie-beta/wormhole/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/test/MassTrie-beta/wormhole/Makefile b/test/MassTrie-beta/wormhole/Makefile new file mode 100644 index 00000000..f00e6b59 --- /dev/null +++ b/test/MassTrie-beta/wormhole/Makefile @@ -0,0 +1,45 @@ +# Makefile +# rules (always with .out) +# SRC-X.out += abc # extra source: abc.c +# MOD-X.out += abc # extra module: abc.c abc.h +# ASM-X.out += abc # extra assembly: abc.S +# DEP-X.out += abc # extra dependency: abc +# FLG-X.out += -finline # extra flags +# LIB-X.out += abc # extra -labc options + +# X.out : xyz.h xyz.c # for extra dependences that are to be compiled/linked. + +# X => X.out +TARGETS += easydemo concbench stresstest +# X => X.c only +SOURCES += +# X => X.S only +ASSMBLY += +# X => X.c X.h +MODULES += lib kv wh +# X => X.h +HEADERS += ctypes + +FLG += +LIB += m + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),FreeBSD) +LIB += execinfo +endif + +# when $ make FORKER_PAPI=y +ifeq ($(strip $(FORKER_PAPI)),y) +LIB += papi +FLG += -DFORKER_PAPI +endif + +bin : libwh.so +libwh.so : Makefile Makefile.common lib.c lib.h kv.c kv.h wh.c wh.h wh.strip + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) -shared -fPIC) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + $(CCC) $(ALLFLG) -o $@ lib.c kv.c wh.c $(ALLLIB) + strip --strip-all --discard-all @wh.strip $@ + + +include Makefile.common diff --git a/test/MassTrie-beta/wormhole/Makefile.common b/test/MassTrie-beta/wormhole/Makefile.common new file mode 100644 index 00000000..ecd761e7 --- /dev/null +++ b/test/MassTrie-beta/wormhole/Makefile.common @@ -0,0 +1,216 @@ +#usage: include Makefile.common at the end of your Makefile + +# no builtin rules/vars (CC, CXX, etc. are still defined but will be empty) +MAKEFLAGS += -r -R + +HDR = $(addsuffix .h,$(MODULES) $(HEADERS)) +SRC = $(addsuffix .c,$(MODULES) $(SOURCES)) +ASM = $(addsuffix .S,$(ASSMBLY)) +OBJ = $(addsuffix .o,$(MODULES) $(SOURCES) $(ASSEMBLY)) +DEP = Makefile.common Makefile $(HDR) $(EXTERNDEP) $(EXTERNSRC) +BIN = $(addsuffix .out,$(TARGETS)) +DIS = $(addsuffix .dis,$(TARGETS)) + +# clang: +# EXTRA="-Rpass=loop-vectorize" # IDs loops that were successfully V-ed +# EXTRA="-Rpass-missed=loop-vectorize" # IDs loops that failed V +# EXTRA="-Rpass-analysis=loop-vectorize" # IDs the statements that caused V to fail +# EXTRA="-Rpass=\ *" # remarks for all passes +# other passes: https://llvm.org/docs/Passes.html + +O ?= rg + +# predefined OPT: make O={rg,r,0g,3g,p,0s,3s,cov,mc,hc,wn,stk} +ifeq ($O,rg) # make O=rg +OPT ?= -DNDEBUG -g3 -O3 -flto -fno-stack-protector +else ifeq ($O,r) # make O=r (for release) +OPT ?= -DNDEBUG -O3 -flto -fno-stack-protector +else ifeq ($O,ns) # make O=ns (no signal handlers) +OPT ?= -DNDEBUG -O3 -flto -fno-stack-protector -DNOSIGNAL +else ifeq ($O,0g) # make O=0g +OPT ?= -g3 -O0 -fno-inline +else ifeq ($O,2g) # make O=2g +OPT ?= -g3 -O2 +else ifeq ($O,3g) # make O=3g +OPT ?= -g3 -O3 -flto -fno-inline +else ifeq ($O,p) # make O=p (profiling: rg+noinline) +OPT ?= -DNDEBUG -g3 -O3 -flto -fno-stack-protector -fno-inline +else ifeq ($O,0s) # make O=0s (address sanitizer) +OPT ?= -g3 -O0 -fno-inline -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls -DHEAPCHECKING +else ifeq ($O,3s) # make O=3s (address sanitizer) +OPT ?= -g3 -O3 -fno-inline -fsanitize=address -fno-omit-frame-pointer -fno-optimize-sibling-calls -DHEAPCHECKING +else ifeq ($O,t) # make O=0t (thread sanitizer) +OPT ?= -g3 -O1 -fno-inline -fsanitize=thread -fno-stack-protector +else ifeq ($O,cov) # make O=cov (for gcov) +OPT ?= -g3 -DNDEBUG -O0 --coverage +CCC = gcc +else ifeq ($O,mc) # make O=mc (for valgrind memcheck) +OPT ?= -g3 -O1 -fno-inline -DHEAPCHECKING +ARCH ?= broadwell +else ifeq ($O,hc) # make O=hc (for gperftools heapcheck) +OPT ?= -g3 -O1 -fno-inline +LIB += tcmalloc +else ifeq ($O,wn) # more warning +OPT ?= -g3 -O3 -Wvla -Wformat=2 -Wconversion -Wstrict-prototypes -Wmissing-prototypes +else ifeq ($O,stk) # check stack usage with gcc +OPT ?= -g3 -O3 -DNDEBUG -fstack-usage +CCC = gcc +endif + +# malloc: g:glibc, t:tcmalloc, j:jemalloc +M ?= g + +ifeq ($M,t) + LIB += tcmalloc + FLG += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free +else ifeq ($M,j) + LIB += jemalloc +endif + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Linux) + CHECK_S := -D__linux__ + LIB += rt +else ifeq ($(UNAME_S),FreeBSD) + CHECK_S := -D__FreeBSD__ + FLG += -I/usr/local/include -L/usr/local/lib + LIB += rt + LIB += execinfo + TPUT := /usr/local/bin/tput +else ifeq ($(UNAME_S),Darwin) + CHECK_S := -D__APPLE__ -D__MACH__ + # do nothing +else + $(error "Supported Platforms: Linux, FreeBSD, Darwin") +endif +TPUT ?= tput + +CCC ?= clang +CSTD = -std=gnu18 +XCC ?= clang++ +XSTD = -std=gnu++17 + +UNAME_M := $(shell uname -m) +ifeq ($(UNAME_M),aarch64) # "native" does not work for clang@aarch64 + CHECK_M := -D__aarch64__ + ARCH ?= armv8-a+crc +else ifeq ($(UNAME_M),arm64) # "native" does not work for clang@aarch64 + CHECK_M := -D__aarch64__ + ARCH ?= armv8-a+crc +else ifeq ($(UNAME_M),x86_64) + CHECK_M := -D__x86_64__ + ARCH ?= native +else ifeq ($(UNAME_M),amd64) # freebsd + CHECK_M := -D__x86_64__ + ARCH ?= native +else + $(error "Supported Platforms: aarch64, x86_64") +endif + +TUNE ?= native + +NBI += memcpy memmove memcmp + +# minimal requirement on x86_64: -march=nehalem +# minimal requirement on aarch64: -march=armv8-a+crc +FLG += -march=$(ARCH) -mtune=$(TUNE) +FLG += -pthread -Wall -Wextra -Wshadow #-Weverything +FLG += $(addprefix -fno-builtin-,$(NBI)) +FLG += $(OPT) + +ifneq ($(shell $(CCC) --version 2>/dev/null | grep clang),) +FLG += -ferror-limit=3 +CCCTYPE := clang +else ifneq ($(shell $(CCC) --version 2>/dev/null | grep gcc),) +FLG += -fmax-errors=3 +FLG += -Wno-unknown-pragmas +CCCTYPE := gcc +else + $(error "Supported Compilers: clang, gcc") +endif + +ifeq ($(CCCTYPE),clang) + CCINST = /usr/lib/clang/$(shell $(CCC) --version 2>/dev/null | awk '/^clang/ { print $$3 }') + CCINC = $(CCINST)/include +else ifeq ($(CCCTYPE),gcc) + CCINST = /usr/lib/gcc/$(shell $(CCC) -dumpmachine)/$(shell $(CCC) -dumpversion) + CCINC = $(CCINST)/include $(CCINST)/include-fixed +endif +CCINC = /usr/include /usr/local/include + +ifneq ($(shell find $(CCINC) -name backtrace-supported.h 2>/dev/null),) + LIB += backtrace + FLG += -DBACKTRACE +endif + +ifneq ($(shell find $(CCINC) -name liburing.h 2>/dev/null),) + LIB += uring + FLG += -DLIBURING +endif + + +uniq = $(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1))) +magentatxt := $(shell $(TPUT) setaf 5) +greentxt := $(shell $(TPUT) setaf 2) +bluetxt := $(shell $(TPUT) setaf 4) +normaltxt := $(shell $(TPUT) sgr0) + +.PHONY : bin dis def clean cleanx check tags + +bin : $(BIN) +dis : $(DIS) bin +.DEFAULT_GOAL = bin +.SECONDEXPANSION: + +ifeq ($(J),o) +# DANGER. Don't use unless it works! +# build from .o files but target-specific flags are missing in %.o : %.x +%.out : %.o $(OBJ) $$(addsuffix .o,$$(SRC-$$@) $$(MOD-$$@) $$(ASM-$$@)) + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$*.o) $(FLG-$@) -rdynamic) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + $(CCC) $(ALLFLG) -o $@ $^ $(ALLLIB) +# +else # default: all-in-one command +%.out : %.c $(SRC) $(ASM) $(DEP) $$(DEP-$$@) $$(addsuffix .c,$$(SRC-$$@) $$(MOD-$$@)) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) $$(addsuffix .S,$$(ASM-$$@)) + $(eval ALLSRC := $(SRC) $(addsuffix .c,$(SRC-$@) $(MOD-$@)) $(ASM) $(addsuffix .S,$(ASM-$@))) + $(eval UNIQSRC := $(call uniq,$(ALLSRC))) + $(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) $(FLG-$@) -rdynamic) + $(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@))) + @printf '$(bluetxt)$@$(magentatxt) <= $(greentxt)$< $(UNIQSRC)$(normaltxt)\n' + $(CCC) $(ALLFLG) -o $@ $< $(UNIQSRC) $(ALLLIB) +# +endif + + +%.dis : %.out + objdump -SlwtC $< 1>$@ 2>/dev/null + +%.o : %.cc $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(XCC) $(XSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.o : %.c $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.o : %.S $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $< + +%.s : %.c $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) + $(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$*.o) -S -o $@ -c $< + +def : + $(CCC) $(FLG) -dM -E - = "h"; the iter will be placed on "hello" + r = wh_iter_valid(iter); // r == true; You should always check if iter is valid after a seek() and skip() + r = wh_iter_peek(iter, buf, 6, &len_out, NULL, 0, NULL); // only need the key: will get "hello" and 5 + r = wh_iter_peek(iter, NULL, 0, NULL, buf, 6, &len_out); // only need the value: will get "world!" and 6 + // (you can also get both key and value using one call with two buffers) + wh_iter_skip1(iter); // skip the current key; equivalent to wh_iter_skip(iter, 1); + r = wh_iter_valid(iter); // r == false; already passed the end of the dataset + wh_iter_park(iter); // an iter may hold locks; It's a good manner to "park" the iter before sleep. + sleep(10); // not interacting with the wormhole instance. + wh_iter_seek(iter, NULL, 0); // need to do another seek to reactivate the iter + r = wh_iter_valid(iter); // r == true; on the zero-sized key now + wh_iter_destroy(iter); // now we're done with the iter + wh_del(ref, "hello", 5); // delete a key + wh_del(ref, NULL, NULL); // delete the zero-sized key + wh_unref(ref); // the current thread is no longer interested in accessing the index + wh_destroy(wh); // fully destroy the index; all references should have been released before calling this +} +``` + +## Integer keys + +Wormhole supports binary keys, which means you don't need to print integers into text when using Wormhole to index integer keys. +Here are some quick examples for using Wormhole as an integer-key index. A little-endian CPU is assumed. + +```C +{ + // 32-bit unsigned integer keys + u32 key = __builtin_bswap32(1000); // reverse byte order of key 1000 + wh_put(ref, &key, 4, NULL, 0); + key = __builtin_bswap32(2000); // reverse byte order of key 2000 +    wh_put(ref, &key, 4, NULL, 0); + struct wormhole_iter * iter = wh_iter_create(ref); + key = __builtin_bswap32(999); + wh_iter_seek(iter, &key, 4); // seek 999 + u32 key_out, len_out; + r = wh_iter_peek(iter, &key_out, 4, &len_out, NULL, 0, NULL); // see 1000 in key_out in reversed byte order + wh_iter_skip1(iter); + r = wh_iter_peek(iter, &key_out, 4, &len_out, NULL, 0, NULL); // see 2000 in key_out in reversed byte order +} +``` + +# Advanced APIs + +If the simple and thread-safe `wh_*` interface already meets your performance requirements, You don't need to read the following sections. +Using the `wormhole_*` and `whunsafe_*` APIs can maximize the efficiency of your code with a roughly 5%-10% speedup. +However, inefficient use of these APIs, such as repeatedly calling malloc() to prepare the key buffer, can easily hurt the performance. + +## `struct kv` and `struct kref` + +There are a handful of helper functions (`kv_*` and `kref_*` functions) at the beginning of wh.h. +It's worth noting that the *key's hash* (`hash` of `struct kv` and `hash32` of `struct kref`) +must be up-to-date before passed to wormhole. +The `kv_refill*` helper functions internally update the hash after filling the kv contents. +In a more general case, `kv_update_hash` directly updates a `struct kv`'s hash. +Similarly, `kref_refill_hash32()` calculates the 32-bit hash for `struct kref`. +Performing the hash calculation at the client side can achieve the best efficiency on the server (the index operations). + +## The Wormhole API + +`concbench.c` and `stresstest.c` are examples of how to use a Wormhole index. +There are three sets of Wormhole API: `whsafe`, `wormhole`, and `whunsafe`. +* `whsafe`: The *worry-free* thread-safe API. If you use Wormhole in a concurrent environment and want minimal complexity in your code, you should use `whsafe`. +* `wormhole`: The standard thread-safe API. It offers better efficiency than `whsafe` but requires some extra effort for blocking prevention. +* `whunsafe`: the thread-unsafe API. It offers the best speed and efficiency but does not perform internal concurrency control. +External synchronization should be employed when accessing `whunsafe` in a concurrent environment. + +The functions of each API can be found near the end of `wh.c` (search `kvmap_api_whsafe`, `kvmap_api_wormhole`, and `kvmap_api_whunsafe`). +Note that each API contains a mix of `whsafe_*`, `wormhole_*`, and `whunsafe_*` functions. + +### The `whsafe` API +The `whsafe` API functions are listed in the `kvmap_api_whsafe` structure in `wh.c`. The API consists of a mix of `wormhole_*` and `whsafe_*` functions. + +The index operations (GET, SET, UPDATE, DEL, PROBE, INPLACE, MERGE, and SCAN (`wormhole_iter_*` functions)) are all *thread safe*. +A thread needs to hold a reference of the index (_wormref_) to perform safe index operations. + +An example of using point-query operations using the `whsafe` API. + +```C +{ + wh = wormhole_create(NULL); // use NULL here unless you want to change the allocator. + ref = whsafe_ref(wh); + for (...) { + whsafe_put(ref, ...); + whsafe_get(ref, ...); + whsafe_del(ref, ...); + ... // other safe operations + } + ... // other safe operations + wormhole_unref(ref); + wormhole_destroy(wh); +} +``` + +An example of range-query operations: + +```C +{ + ref = whsafe_ref(wh); + // ... assume we already have a valid ref + iter = wormhole_iter_create(ref); + for (...) { + whsafe_iter_seek(iter, key); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 1); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 3); + wormhole_iter_inp(iter, uf, priv); + // other iter operations + } + // An active iterator is likely holding a lock. + whsafe_iter_park(iter); // Release resources to avoid blocking other threads + // it's now safe to do something such as sleep() or waitpid() + // ... start using the iterator again + whsafe_iter_seek(iter, key2); + // ... other iter operations + whsafe_iter_destroy(iter); + // ... do something + // must destroy iterators before unref() + wormhole_unref(ref); +} +``` + +### The `wormhole` API +Similar to `whsafe`, `wormhole` is also thread safe. It's often faster than `whsafe` but requires extra caution when using it. + +An example of using point-query operations using the `wormhole` API. + +```C +{ + wh = wormhole_create(NULL); // use NULL here unless you want to change the allocator. + ref = wormhole_ref(wh); + for (...) { + wormhole_put(ref, ...); + wormhole_get(ref, ...); + wormhole_del(ref, ...); + ... // other safe operations + } + ... // other safe operations + wormhole_unref(ref); + wormhole_destroy(wh); +} +``` + +An example of range-query operations: + +```C +{ + ref = wormhole_ref(wh); + // ... assume we already have a valid ref + iter = wormhole_iter_create(ref); + for (...) { + wormhole_iter_seek(iter, key); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 1); + wormhole_iter_peek(iter, buf); + wormhole_iter_skip(iter, 3); + wormhole_iter_inp(iter, uf, priv); + // other iter operations + } + // An active iterator is likely holding a lock. + wormhole_iter_park(iter); // Release resources to avoid blocking other threads + while (condition not met) { // See below for explanation + wormhole_refresh_qstate(ref); + } + // ... start using the iterator again + wormhole_iter_seek(iter, key2); + // ... other iter operations + wormhole_iter_destroy(iter); + // ... do something + // must destroy iterators before unref() + wormhole_unref(ref); +} +``` + +### Avoid blocking writers when using the `wormhole` API +Wormhole internally uses QSBR RCU to synchronize readers/writers so every holder of a reference (`ref`) +needs to actively perform index operations. +An ref-holder, if not actively performing index operations, may block a writer thread that is performing split/merge operations. +(because of not periodically announcing its quiescent state). +If a ref-holder is about to become inactive from Wormhole's perspective (doing something else or just sleeping), +it is recommended that the holder temporarily releases the `ref` before entering the inactive status (such as calling `sleep(10)`), +and reactivate the `ref` before performing the next index operation. + +```C +{ + // assume we already have an active ref + wormhole_park(ref); // this will avoid blocking any other threads + sleep(10); + wormhole_resume(ref); // this will reactivate the ref + // continue to perform index operations +} +``` + +A common scenario of dead-locking is acquiring locks with an active wormhole reference, +The following example could cause deadlock between two threads. + +```C +// Thread A has an active ref and try to lock() +{ + struct wormref * ref = wormhole_ref(wh); + lock(just_a_lock); // << block here forever +} + +// Thread B already acquired the lock and wants to insert a key to wh +{ + lock(just_a_lock); + wormhole_put(ref, kv); << block here forever +} +``` + +To avoid this scenario, thread A should either call `wormhole_park(ref)` before acquiring the lock, or keep updating the qstate of the ref: +```C +// Solution A.1: use wormhole_park() +{ + struct wormref * ref = wormhole_ref(wh); + wormhole_park(ref); + lock(just_a_lock); + wormhole_resume(ref); // can use ref afterward +} + +// Solution A.2: use try_lock and wormhole_refresh_qstate() +{ + struct wormref * ref = wormhole_ref(wh); + while (!try_lock(just_a_lock)) { + wormhole_refresh_qstate(ref); + } + // continue to use ref +} +``` + +The above issues with QSBR are specific to the `wormhole` API. `whsafe` does not have these issues. + +### The `whunsafe` API +A set of *thread-unsafe* functions are also provided. See the functions with _prefix_ `whunsafe`. +The thread-unsafe functions don't use the reference (_wormref_). +Simply feed them with the pointer to the wormhole index: + +```C +{ + wh = whunsafe_create(NULL); + for (...) { + whunsafe_put(wh, ...); + whunsafe_get(wh, ...); + whunsafe_del(wh, ...); + ... // other unsafe operations + } + ... // other unsafe operations + wormhole_destroy(wh); +} +``` + +### In-place update with user-defined function +`wormhole_inp` executes a user-defined function on an existing key-value item. +If the key does not exist, a NULL pointer will be passed to the user-defined function. +A simple example would be incrementing a counter stored in a key-value pair. + +```C +{ + // user-defined in-place update function + void myadd1(struct kv * kv, void * priv) { + if (kv != NULL) { + assert(kv->vlen >= sizeof(u64)); + u64 * pvalue = kv_vptr(kv); + (*pvalue)++; + } + } + + // create the counter + u64 zero = 0; + struct kv * tmp = kv_create("counter", 7, &zero, 8); // malloc-ed + wormhole_put(ref, tmp); + + // perform +1 on the stored value + struct kref kref = kv_ref(tmp); // create a kref of tmp + wormhole_inp(ref, &kref, myadd1, NULL); +} +``` + +Note that the user-defined function should ONLY change the value's content, and nothing else. +Otherwise, the index can be corrupted. +A similar mechanism is also provided for iterators (`wormhole_iter_inp`). + +The inplace function can also be used to retrieve key-value data. For example: + +```C +{ + void inplace_getu64(struct kv * kv, void * priv) { + if (kv != NULL) { + assert(kv->vlen >= sizeof(u64)); + u64 * pvalue = kv_vptr(kv); + *(u64 *)priv = *pvalue; + } else { + *(u64 *)priv = 0; + } + } + ... + struct kref kref = ... + u64 val; + wormhole_inp(ref, &kref, inplace_getu64, &val); +} +``` + +### `merge`: atomic Read-Modify-Write +The `wormhole_merge` and `whsafe_merge` functions perform atomic Read-Modify-Write (RMW) operations. +In a RMW operation, if the search key is found, the KV pair will be passed to a user-defined callback function `uf` (short for user function). +Otherwise, a NULL pointer is passed to `uf`. +`uf` could update the KV in-place if it does not require any memory reallocation. +In such a case, `uf` should return the KV's pointer back and the merge function will do nothing else. +If `uf` want to replace the KV with something new, it should return a pointer that is different than the original KV pointer. +The `uf` should not make memory allocation by itself. +Instead, the `merge` function will copy the returned KV and replace the existing KV with the newly created one. +`uf` should not return NULL unless the key was not found. + +### Iterator +The `wormhole_iter_{seek,peek,skip,next,inp}` functions provide range-search functionalities. +If the search key does not exist, the `seek` operation will put the cursor on the item that is greater than the search-key. +`next` will return the item under the current cursor and move the cursor forward. +`peek` is similar but does not move the cursor. For example, with keys `{1,3,5}`, `seek(2); r = next()` will see `r == 3`. + +Currently Wormhole does not provide `seek_for_less_equal()` and `prev()` for backward scanning. This feature will be added in the future. + +# Memory management + +By default, Wormhole manages all the key-value data internally and only copies to or from a user-supplied +buffer (a `struct kv` object). +This draws a clear boundary in the memory space between the index structure and its users. +After a call to any of the index operations, the caller can immediately free +the buffer holding the key-reference or the key-value data. +This also allows users to use stack-allocated variables to interact with Wormhole. + +The memory manager of the internal key-value objects can be customized when creating a new Wormhole (see `wormhole_create`). +The customization will _only_ affect the internal `struct kv` objects. +Actually, the memory manager can be configured to directly use the caller's `struct kv` object and store it in Wormhole. +This `struct kvmap_mm` structure shows an example: + +```C +{ + const struct kvmap_mm kvmap_mm_ualloc { + .in = kvmap_mm_in_noop, // in wormhole_put(), store caller's kv in wh + .out = kvmap_mm_out_dup, // but still make a copy in wormhole_get() + .free = kvmap_mm_free_free, // call free() for delete/update + }; + ... + struct wormhole * wh = wormhole_create(&kvmap_mm_ualloc); + struct wormref * ref = wormhole_ref(wh); + ... + struct kv * newkv = malloc(size); + ... + wormhole_put(ref, newkv); + // Don't free newkv! it's now managed by wh +} +``` + +Each of the in/out/free functions can be freely customized. +A few `kvmap_mm_*` functions are already provided for common scenarios. +`kvmap_mm_ndf` is identical to the `kvmap_mm_ualloc` structure in the above example. + +## Hugepages +Wormhole uses hugepages when available. To reserve some hugepages in Linux (10000 * 2MB): + + # echo 10000 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + +# Tuning + +A few macros in `wh.c` can be tuned. + +* `WH_SLABLEAF_SIZE` controls the slab size for leaf node allocation. +The default is `((1lu << 21))` (2MB slabs). If 1GB hugepages are available, `WH_SLABLEAF_SIZE` can be set to `((1lu << 30))` to utilize 1GB hugepages. +Using 1GB hugepages can improve search performance on a large dataset. + +* `WH_KPN` controls "Keys Per (leaf-)Node". The default value is 128. +Compared to the default, `WH_KPN=256` can offer 5-10%+ higher point query and update speed. +However, range queries prefer a smaller node size such as 64. + + +* `QSBR_STATES_NR` and `QSBR_SHARDS_NR` control the capacity (number of active references) of the QSBR RCU. +The product of the two values is the capacity. For efficiency, `QSBR_STATES_NR` can be set to 23, 39, and 55, and `QSBR_SHARDS_NR` must be 2^n, n<=6. +The defaults are 23 and 32, respectively. The QSBR registry can run out of space if there are a few hundred of threads, which is not a problem in practice. + +# Limitations + +## Key Patterns +A **split** operation will fail when **129** (`WH_KPN + 1`) keys share a common prefix of 65535+ bytes. +In Wormhole, the maximum _anchor-key_ length is 65535 (2^16) bytes, which is shorter than the maximum key-length (2^32). + +## Memory Allocation +Insertions/updates can fail and return false when a memory allocation fails. +On memory-allocation failure, the hash-table expansion function will block and wait for available memory. + +# Performance +Some benchmarking results with some real-world datasets: See [this](https://github.com/wuxb45/wormhole/issues/5) page for more information. + +![Concurrent GET](https://user-images.githubusercontent.com/564235/112712778-704d7200-8e9f-11eb-9f4d-795de46772d1.png) diff --git a/test/MassTrie-beta/wormhole/README.txt b/test/MassTrie-beta/wormhole/README.txt new file mode 100644 index 00000000..e70108ef --- /dev/null +++ b/test/MassTrie-beta/wormhole/README.txt @@ -0,0 +1,31 @@ +To setup the project: + +If you're not already in the folder 'wormhole', preform: + +1. cd wormhole + +Once you're there, set the variable LD_LIBRARY_PATH to the +current working directory using: + +2. setenv LD_LIBRARY_PATH `pwd` + +You can check (optionally) that this operation was exceuted properly using: + +3. echo $LD_LIBRARY_PATH + + +Then, do: + +4. cd sto + +5. /./bootstrap.sh + +6. ./configure + +To run the test file do: + +7. make unit-testMTrie + +Then run it using: + +8. ./unit-test_MTrie diff --git a/test/MassTrie-beta/wormhole/concbench.c b/test/MassTrie-beta/wormhole/concbench.c new file mode 100644 index 00000000..f18abde9 --- /dev/null +++ b/test/MassTrie-beta/wormhole/concbench.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2018-2019 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "lib.h" +#include "kv.h" +#include "wh.h" + +atomic_uint_least64_t __seqno = 0; +u64 __nth = 0; +struct kv ** __samples = NULL; +u64 __nkeys = 0; +atomic_uint_least64_t __tot = 0; +u64 __endtime = 0; + + static void * +kv_load_worker(struct wormhole * const wh) +{ + srandom_u64(time_nsec() * time_nsec()); + struct wormref * const ref = wormhole_ref(wh); + const u64 seq = atomic_fetch_add(&__seqno, 1); + const u64 n0 = __nkeys / __nth * seq; + const u64 nz = (seq == (__nth - 1)) ? __nkeys : (__nkeys / __nth * (seq + 1)); + printf("load worker %lu %lu\n", n0, nz); + for (u64 i = n0; i < nz; i++) + wormhole_put(ref, __samples[i]); + wormhole_unref(ref); + return NULL; +} + + static void * +kv_probe_worker(struct wormhole * const wh) +{ + struct wormref * const ref = wormhole_ref(wh); + struct kv * next = __samples[random_u64() % __nkeys]; + u64 rnext = random_u64() % __nkeys; + u64 count = 0; + u64 succ = 0; +#define BATCH ((10000)) + do { + for (u64 i = 0; i < BATCH; i++) { + // reading kv samples leads to unnecessary cache misses + // use prefetch to minimize overhead on workload generation + struct kv * const key = next; + next = __samples[rnext]; + __builtin_prefetch(next, 0, 0); + __builtin_prefetch(((u8 *)next) + 64, 0, 0); + rnext = random_u64() % __nkeys; + __builtin_prefetch(&(__samples[rnext])); + + // do probe + // customize your benchmark: do a mix of wh operations with switch-cases + const struct kref kref = kv_kref(key); + if (wormhole_probe(ref, &kref)) + succ++; + } + count += BATCH; + } while (time_nsec() < __endtime); + if (count != succ) + printf("count %lu success %lu\n", count, succ); + (void)atomic_fetch_add(&__tot, count); + wormhole_unref(ref); + return NULL; +} + + int +main(int argc, char ** argv) +{ + if (argc < 3) { + printf("usage: <#keys> <#threads>\n"); + printf(" Get words.txt: wget https://github.com/dwyl/english-words/raw/master/words.txt\n"); + printf(" Example: %s words.txt 1000000 4\n", argv[0]); + printf(" Better to use only one numa node with numactl -N 0\n"); + printf(" Better to run X thread on X cores\n"); + return 0; + } + + char ** const words = malloc(sizeof(char *) * 1000000); // or `wc -l words.txt` + u64 nr_words = 0; + char * buf = malloc(8192); + size_t bufsize = 8192; + FILE * const fwords = fopen(argv[1], "r"); + if (fwords == NULL) { + printf("open words file failed\n"); + return 0; + } + + // read all words to words + while (getline(&buf, &bufsize, fwords) > 0) { + buf[strlen(buf)-1] = '\0'; + words[nr_words] = strdup(buf); + nr_words++; + } + fclose(fwords); + + // generate keys + const u64 nkeys = strtoull(argv[2], NULL, 10); + struct kv ** const samples = malloc(sizeof(struct kv *) * nkeys); + char * ss[6]; + for (u64 i = 0; i < nkeys; i++) { + for (u64 j = 0; j < 6; j++) + ss[j] = words[random() % nr_words]; + sprintf(buf, "%s %s %s %s %s %s!", ss[0], ss[1], ss[2], ss[3], ss[4], ss[5]); + samples[i] = kv_create_str(buf, NULL, 0); + } + // free words & buf + for (u64 i = 0; i < nr_words; i++) + free(words[i]); + free(words); + free(buf); + + // load (4) + __samples = samples; + __nkeys = nkeys; + struct wormhole * const wh = wormhole_create(NULL); + __nth = 4; + const u64 dtl = thread_fork_join(4, (void *)kv_load_worker, false, (void *)wh); + printf("load x4 %.2lf mops\n", ((double)nkeys) * 1e3 / ((double)dtl)); + + const u64 nth = strtoull(argv[3], NULL, 10); + printf("probe with %lu threads. each round takes 3 seconds\n", nth); + for (u64 i = 0; i < 3; i++) { + __tot = 0; + __endtime = time_nsec() + 3e9; // 3 sec + const u64 dt = thread_fork_join(nth, (void *)kv_probe_worker, false, (void *)wh); + const double mops = ((double)__tot) * 1e3 / ((double)dt); + printf("probe x%lu %.2lf mops\n", nth, mops); + sleep(1); + } + + // final clean up for valgrind + for (u64 i = 0; i < nkeys; i++) + free(samples[i]); + free(samples); + wormhole_destroy(wh); + return 0; +} diff --git a/test/MassTrie-beta/wormhole/concbench.out b/test/MassTrie-beta/wormhole/concbench.out new file mode 100644 index 00000000..ee87ca31 Binary files /dev/null and b/test/MassTrie-beta/wormhole/concbench.out differ diff --git a/test/MassTrie-beta/wormhole/ctypes.h b/test/MassTrie-beta/wormhole/ctypes.h new file mode 100644 index 00000000..314ca5dc --- /dev/null +++ b/test/MassTrie-beta/wormhole/ctypes.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +// C types only; C++ source code don't use this + +#include +#include + +/* C11 atomic types */ +typedef atomic_bool abool; + +typedef atomic_uchar au8; +typedef atomic_ushort au16; +typedef atomic_uint au32; +typedef atomic_ulong au64; +static_assert(sizeof(au8) == 1, "sizeof(au8)"); +static_assert(sizeof(au16) == 2, "sizeof(au16)"); +static_assert(sizeof(au32) == 4, "sizeof(au32)"); +static_assert(sizeof(au64) == 8, "sizeof(au64)"); + +typedef atomic_char as8; +typedef atomic_short as16; +typedef atomic_int as32; +typedef atomic_long as64; +static_assert(sizeof(as8) == 1, "sizeof(as8)"); +static_assert(sizeof(as16) == 2, "sizeof(as16)"); +static_assert(sizeof(as32) == 4, "sizeof(as32)"); +static_assert(sizeof(as64) == 8, "sizeof(as64)"); + +// shorten long names +#define MO_RELAXED memory_order_relaxed +#define MO_CONSUME memory_order_consume +#define MO_ACQUIRE memory_order_acquire +#define MO_RELEASE memory_order_release +#define MO_ACQ_REL memory_order_acq_rel +#define MO_SEQ_CST memory_order_seq_cst diff --git a/test/MassTrie-beta/wormhole/easydemo.c b/test/MassTrie-beta/wormhole/easydemo.c new file mode 100644 index 00000000..f095a6ac --- /dev/null +++ b/test/MassTrie-beta/wormhole/easydemo.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE +#include + +#include "lib.h" +#include "kv.h" +#include "wh.h" + + int +main(int argc, char ** argv) +{ + (void)argc; + (void)argv; + struct wormhole * const wh = wh_create(); + struct wormref * const ref = wh_ref(wh); + + bool r; + + r = wh_put(ref, "wormhole", 8, "easy", 4); + printf("wh_put wormhole easy %c\n", r?'T':'F'); + + r = wh_put(ref, "time_travel", 11, "impossible", 10); + printf("wh_put time_travel impossible %c\n", r?'T':'F'); + + r = wh_del(ref, "time_travel", 11); + printf("wh_del time_travel %c\n", r?'T':'F'); + + r = wh_probe(ref, "time_travel", 11); + printf("wh_probe time_travel %c\n", r?'T':'F'); + + u32 klen_out = 0; + char kbuf_out[8] = {}; + u32 vlen_out = 0; + char vbuf_out[8] = {}; + r = wh_get(ref, "wormhole", 8, vbuf_out, 8, &vlen_out); + printf("wh_get wormhole %c %u %.*s\n", r?'T':'F', vlen_out, vlen_out, vbuf_out); + + // in a concurrent environment, the kvmap_api_wormhole need park&resume when a thread is about to go idle + // don't need park&resume if you're using the default kvmap_api_whsafe in whwh.c! + wh_park(ref); + usleep(10); + wh_resume(ref); + + // prepare a few keys for range ops + wh_put(ref, "00", 2, "0_value", 7); + wh_put(ref, "11", 2, "1_value", 7); + wh_put(ref, "22", 2, "2_value", 7); + + struct wormhole_iter * const iter = wh_iter_create(ref); + + wh_iter_seek(iter, NULL, 0); // seek to the head + printf("wh_iter_seek \"\"\n"); + while (wh_iter_valid(iter)) { + r = wh_iter_peek(iter, kbuf_out, 8, &klen_out, vbuf_out, 8, &vlen_out); + if (r) { + printf("wh_iter_peek klen=%u key=%.*s vlen=%u value=%.*s\n", + klen_out, klen_out, kbuf_out, vlen_out, vlen_out, vbuf_out); + } else { + printf("ERROR!\n"); + } + wh_iter_skip1(iter); + } + + // call iter_park if you will go idle but want to use the iter later + // don't need to call iter_park if you're actively using iter + wh_iter_park(iter); + usleep(10); + + wh_iter_seek(iter, "0", 1); + printf("wh_iter_seek \"0\"\n"); + // this time we don't want to copy the value + r = wh_iter_peek(iter, kbuf_out, 8, &klen_out, NULL, 0, NULL); + if (r){ + printf("wh_iter_peek klen=%u key=%.*s\n", klen_out, klen_out, kbuf_out); + } else { + printf("ERROR: iter_peek failed\n"); + } + + wh_iter_destroy(iter); + // there must be no active iter when calling unref() + wh_unref(ref); + + // unsafe operations: should have released all references + wh_clean(wh); // just for demonstration + wh_destroy(wh); // destroy also calls clean interally + return 0; +} diff --git a/test/MassTrie-beta/wormhole/easydemo.out b/test/MassTrie-beta/wormhole/easydemo.out new file mode 100644 index 00000000..32521210 Binary files /dev/null and b/test/MassTrie-beta/wormhole/easydemo.out differ diff --git a/test/MassTrie-beta/wormhole/kv.c b/test/MassTrie-beta/wormhole/kv.c new file mode 100644 index 00000000..a1720e88 --- /dev/null +++ b/test/MassTrie-beta/wormhole/kv.c @@ -0,0 +1,1131 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include // static_assert +#include +#include "lib.h" +#include "ctypes.h" +#include "kv.h" +// }}} headers + +// crc32c {{{ + inline u32 +kv_crc32c(const void * const ptr, u32 len) +{ + return crc32c_inc((const u8 *)ptr, len, KV_CRC32C_SEED); +} + + inline u64 +kv_crc32c_extend(const u32 lo) +{ + const u64 hi = (u64)(~lo); + return (hi << 32) | ((u64)lo); +} +// }}} crc32c + +// kv {{{ + +// size {{{ + inline size_t +kv_size(const struct kv * const kv) +{ + return sizeof(*kv) + kv->klen + kv->vlen; +} + + inline size_t +kv_size_align(const struct kv * const kv, const u64 align) +{ + debug_assert(align && ((align & (align - 1)) == 0)); + return (sizeof(*kv) + kv->klen + kv->vlen + (align - 1)) & (~(align - 1)); +} + + inline size_t +key_size(const struct kv *const key) +{ + return sizeof(*key) + key->klen; +} + + inline size_t +key_size_align(const struct kv *const key, const u64 align) +{ + debug_assert(align && ((align & (align - 1)) == 0)); + return (sizeof(*key) + key->klen + (align - 1)) & (~(align - 1)); +} +// }}} size + +// construct {{{ + inline void +kv_update_hash(struct kv * const kv) +{ + const u32 lo = kv_crc32c((const void *)kv->kv, kv->klen); + kv->hash = kv_crc32c_extend(lo); +} + + inline void +kv_refill_value(struct kv * const kv, const void * const value, const u32 vlen) +{ + debug_assert((vlen == 0) || value); + memcpy(&(kv->kv[kv->klen]), value, vlen); + kv->vlen = vlen; +} + + inline void +kv_refill(struct kv * const kv, const void * const key, const u32 klen, + const void * const value, const u32 vlen) +{ + debug_assert(kv); + kv->klen = klen; + memcpy(&(kv->kv[0]), key, klen); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_str(struct kv * const kv, const char * const key, + const void * const value, const u32 vlen) +{ + kv_refill(kv, key, (u32)strlen(key), value, vlen); +} + + inline void +kv_refill_str_str(struct kv * const kv, const char * const key, + const char * const value) +{ + kv_refill(kv, key, (u32)strlen(key), value, (u32)strlen(value)); +} + +// the u64 key is filled in big-endian byte order for correct ordering + inline void +kv_refill_u64(struct kv * const kv, const u64 key, const void * const value, const u32 vlen) +{ + kv->klen = sizeof(u64); + *(u64 *)(kv->kv) = __builtin_bswap64(key); // bswap on little endian + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex32(struct kv * const kv, const u32 hex, const void * const value, const u32 vlen) +{ + kv->klen = 8; + strhex_32(kv->kv, hex); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex64(struct kv * const kv, const u64 hex, const void * const value, const u32 vlen) +{ + kv->klen = 16; + strhex_64(kv->kv, hex); + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_hex64_klen(struct kv * const kv, const u64 hex, + const u32 klen, const void * const value, const u32 vlen) +{ + strhex_64(kv->kv, hex); + if (klen > 16) { + kv->klen = klen; + memset(kv->kv + 16, '!', klen - 16); + } else { + kv->klen = 16; + } + kv_refill_value(kv, value, vlen); + kv_update_hash(kv); +} + + inline void +kv_refill_kref(struct kv * const kv, const struct kref * const kref) +{ + kv->klen = kref->len; + kv->vlen = 0; + kv->hash = kv_crc32c_extend(kref->hash32); + memmove(kv->kv, kref->ptr, kref->len); +} + + inline void +kv_refill_kref_v(struct kv * const kv, const struct kref * const kref, + const void * const value, const u32 vlen) +{ + kv->klen = kref->len; + kv->vlen = vlen; + kv->hash = kv_crc32c_extend(kref->hash32); + memmove(kv->kv, kref->ptr, kref->len); + memcpy(kv->kv + kv->klen, value, vlen); +} + + inline struct kref +kv_kref(const struct kv * const key) +{ + return (struct kref){.ptr = key->kv, .len = key->klen, .hash32 = key->hashlo}; +} + + inline struct kv * +kv_create(const void * const key, const u32 klen, const void * const value, const u32 vlen) +{ + struct kv * const kv = malloc(sizeof(*kv) + klen + vlen); + if (kv) + kv_refill(kv, key, klen, value, vlen); + return kv; +} + + inline struct kv * +kv_create_str(const char * const key, const void * const value, const u32 vlen) +{ + return kv_create(key, (u32)strlen(key), value, vlen); +} + + inline struct kv * +kv_create_str_str(const char * const key, const char * const value) +{ + return kv_create(key, (u32)strlen(key), value, (u32)strlen(value)); +} + + inline struct kv * +kv_create_kref(const struct kref * const kref, const void * const value, const u32 vlen) +{ + return kv_create(kref->ptr, kref->len, value, vlen); +} + +static struct kv __kv_null = {}; + +__attribute__((constructor)) + static void +kv_null_init(void) +{ + kv_update_hash(&__kv_null); +} + + inline const struct kv * +kv_null(void) +{ + return &__kv_null; +} +// }}} construct + +// dup {{{ + inline struct kv * +kv_dup(const struct kv * const kv) +{ + if (kv == NULL) + return NULL; + + const size_t sz = kv_size(kv); + struct kv * const new = malloc(sz); + if (new) + memcpy(new, kv, sz); + return new; +} + + inline struct kv * +kv_dup_key(const struct kv * const kv) +{ + if (kv == NULL) + return NULL; + + const size_t sz = key_size(kv); + struct kv * const new = malloc(sz); + if (new) { + memcpy(new, kv, sz); + new->vlen = 0; + } + return new; +} + + inline struct kv * +kv_dup2(const struct kv * const from, struct kv * const to) +{ + if (from == NULL) + return NULL; + const size_t sz = kv_size(from); + struct kv * const new = to ? to : malloc(sz); + if (new) + memcpy(new, from, sz); + return new; +} + + inline struct kv * +kv_dup2_key(const struct kv * const from, struct kv * const to) +{ + if (from == NULL) + return NULL; + const size_t sz = key_size(from); + struct kv * const new = to ? to : malloc(sz); + if (new) { + memcpy(new, from, sz); + new->vlen = 0; + } + return new; +} + + inline struct kv * +kv_dup2_key_prefix(const struct kv * const from, struct kv * const to, const u32 plen) +{ + if (from == NULL) + return NULL; + debug_assert(plen <= from->klen); + const size_t sz = key_size(from) - from->klen + plen; + struct kv * const new = to ? to : malloc(sz); + if (new) { + new->klen = plen; + memcpy(new->kv, from->kv, plen); + new->vlen = 0; + kv_update_hash(new); + } + return new; +} +// }}} dup + +// compare {{{ + static inline int +klen_compare(const u32 len1, const u32 len2) +{ + if (len1 < len2) + return -1; + else if (len1 > len2) + return 1; + else + return 0; +} + +// compare whether the two keys are identical +// optimistic: do not check hash + inline bool +kv_match(const struct kv * const key1, const struct kv * const key2) +{ + //cpu_prefetch0(((u8 *)key2) + 64); + //return (key1->hash == key2->hash) + // && (key1->klen == key2->klen) + // && (!memcmp(key1->kv, key2->kv, key1->klen)); + return (key1->klen == key2->klen) && (!memcmp(key1->kv, key2->kv, key1->klen)); +} + +// compare whether the two keys are identical +// check hash first +// pessimistic: return false quickly if their hashes mismatch + inline bool +kv_match_hash(const struct kv * const key1, const struct kv * const key2) +{ + return (key1->hash == key2->hash) + && (key1->klen == key2->klen) + && (!memcmp(key1->kv, key2->kv, key1->klen)); +} + + inline bool +kv_match_full(const struct kv * const kv1, const struct kv * const kv2) +{ + return (kv1->kvlen == kv2->kvlen) + && (!memcmp(kv1, kv2, sizeof(*kv1) + kv1->klen + kv1->vlen)); +} + + bool +kv_match_kv128(const struct kv * const sk, const u8 * const kv128) +{ + debug_assert(sk); + debug_assert(kv128); + + u32 klen128 = 0; + u32 vlen128 = 0; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(kv128, &klen128), &vlen128); + (void)vlen128; + return (sk->klen == klen128) && (!memcmp(sk->kv, pdata, klen128)); +} + + inline int +kv_compare(const struct kv * const kv1, const struct kv * const kv2) +{ + const u32 len = kv1->klen < kv2->klen ? kv1->klen : kv2->klen; + const int cmp = memcmp(kv1->kv, kv2->kv, (size_t)len); + return cmp ? cmp : klen_compare(kv1->klen, kv2->klen); +} + +// for qsort and bsearch + static int +kv_compare_ptrs(const void * const p1, const void * const p2) +{ + const struct kv * const * const pp1 = (typeof(pp1))p1; + const struct kv * const * const pp2 = (typeof(pp2))p2; + return kv_compare(*pp1, *pp2); +} + + int +kv_k128_compare(const struct kv * const sk, const u8 * const k128) +{ + debug_assert(sk); + const u32 klen1 = sk->klen; + u32 klen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(k128, &klen2); + debug_assert(ptr2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->kv, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + + int +kv_kv128_compare(const struct kv * const sk, const u8 * const kv128) +{ + debug_assert(sk); + const u32 klen1 = sk->klen; + u32 klen2 = 0; + u32 vlen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(vi128_decode_u32(kv128, &klen2), &vlen2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->kv, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + + inline void +kv_qsort(struct kv ** const kvs, const size_t nr) +{ + qsort(kvs, nr, sizeof(kvs[0]), kv_compare_ptrs); +} + +// return the length of longest common prefix of the two keys + inline u32 +kv_key_lcp(const struct kv * const key1, const struct kv * const key2) +{ + const u32 max = (key1->klen < key2->klen) ? key1->klen : key2->klen; + return memlcp(key1->kv, key2->kv, max); +} + +// return the length of longest common prefix of the two keys with a known lcp0 + inline u32 +kv_key_lcp_skip(const struct kv * const key1, const struct kv * const key2, const u32 lcp0) +{ + const u32 max = (key1->klen < key2->klen) ? key1->klen : key2->klen; + debug_assert(max >= lcp0); + return lcp0 + memlcp(key1->kv+lcp0, key2->kv+lcp0, max-lcp0); +} +// }}} + +// psort {{{ + static inline void +kv_psort_exchange(struct kv ** const kvs, const u64 i, const u64 j) +{ + if (i != j) { + struct kv * const tmp = kvs[i]; + kvs[i] = kvs[j]; + kvs[j] = tmp; + } +} + + static u64 +kv_psort_partition(struct kv ** const kvs, const u64 lo, const u64 hi) +{ + if (lo >= hi) + return lo; + + const u64 p = (lo+hi) >> 1; + kv_psort_exchange(kvs, lo, p); + u64 i = lo; + u64 j = hi + 1; + do { + while (kv_compare(kvs[++i], kvs[lo]) < 0 && i < hi); + while (kv_compare(kvs[--j], kvs[lo]) > 0); + if (i >= j) + break; + kv_psort_exchange(kvs, i, j); + } while (true); + kv_psort_exchange(kvs, lo, j); + return j; +} + + static void +kv_psort_rec(struct kv ** const kvs, const u64 lo, const u64 hi, const u64 tlo, const u64 thi) +{ + if (lo >= hi) + return; + const u64 c = kv_psort_partition(kvs, lo, hi); + + if (c > tlo) // go left + kv_psort_rec(kvs, lo, c-1, tlo, thi); + + if (c < thi) // go right + kv_psort_rec(kvs, c+1, hi, tlo, thi); +} + + inline void +kv_psort(struct kv ** const kvs, const u64 nr, const u64 tlo, const u64 thi) +{ + debug_assert(tlo <= thi); + debug_assert(thi < nr); + kv_psort_rec(kvs, 0, nr-1, tlo, thi); +} +// }}} psort + +// ptr {{{ + inline void * +kv_vptr(struct kv * const kv) +{ + return (void *)(&(kv->kv[kv->klen])); +} + + inline void * +kv_kptr(struct kv * const kv) +{ + return (void *)(&(kv->kv[0])); +} + + inline const void * +kv_vptr_c(const struct kv * const kv) +{ + return (const void *)(&(kv->kv[kv->klen])); +} + + inline const void * +kv_kptr_c(const struct kv * const kv) +{ + return (const void *)(&(kv->kv[0])); +} +// }}} ptr + +// print {{{ +// cmd "KV" K and V can be 's': string, 'x': hex, 'd': dec, or else for not printing. +// n for newline after kv + void +kv_print(const struct kv * const kv, const char * const cmd, FILE * const out) +{ + debug_assert(cmd); + const u32 klen = kv->klen; + fprintf(out, "#%016lx k[%3u]", kv->hash, klen); + + switch(cmd[0]) { + case 's': fprintf(out, " %.*s", klen, kv->kv); break; + case 'x': str_print_hex(out, kv->kv, klen); break; + case 'd': str_print_dec(out, kv->kv, klen); break; + default: break; + } + + const u32 vlen = kv->vlen; + switch (cmd[1]) { + case 's': fprintf(out, " v[%4u] %.*s", vlen, vlen, kv->kv+klen); break; + case 'x': fprintf(out, " v[%4u]", vlen); str_print_hex(out, kv->kv+klen, vlen); break; + case 'd': fprintf(out, " v[%4u]", vlen); str_print_dec(out, kv->kv+klen, vlen); break; + default: break; + } + if (strchr(cmd, 'n')) + fprintf(out, "\n"); +} +// }}} print + +// mm {{{ + struct kv * +kvmap_mm_in_noop(struct kv * const kv, void * const priv) +{ + (void)priv; + return kv; +} + +// copy-out + struct kv * +kvmap_mm_out_noop(struct kv * const kv, struct kv * const out) +{ + (void)out; + return kv; +} + + void +kvmap_mm_free_noop(struct kv * const kv, void * const priv) +{ + (void)kv; + (void)priv; +} + +// copy-in + struct kv * +kvmap_mm_in_dup(struct kv * const kv, void * const priv) +{ + (void)priv; + return kv_dup(kv); +} + +// copy-out + struct kv * +kvmap_mm_out_dup(struct kv * const kv, struct kv * const out) +{ + return kv_dup2(kv, out); +} + + void +kvmap_mm_free_free(struct kv * const kv, void * const priv) +{ + (void)priv; + free(kv); +} + +const struct kvmap_mm kvmap_mm_dup = { + .in = kvmap_mm_in_dup, + .out = kvmap_mm_out_dup, + .free = kvmap_mm_free_free, + .priv = NULL, +}; + +const struct kvmap_mm kvmap_mm_ndf = { + .in = kvmap_mm_in_noop, + .out = kvmap_mm_out_dup, + .free = kvmap_mm_free_free, + .priv = NULL, +}; + +// }}} mm + +// kref {{{ + inline void +kref_ref_raw(struct kref * const kref, const u8 * const ptr, const u32 len) +{ + kref->ptr = ptr; + kref->len = len; + kref->hash32 = 0; +} + + inline void +kref_ref_hash32(struct kref * const kref, const u8 * const ptr, const u32 len) +{ + kref->ptr = ptr; + kref->len = len; + kref->hash32 = kv_crc32c(ptr, len); +} + + inline void +kref_update_hash32(struct kref * const kref) +{ + kref->hash32 = kv_crc32c(kref->ptr, kref->len); +} + + inline void +kref_ref_kv(struct kref * const kref, const struct kv * const kv) +{ + kref->ptr = kv->kv; + kref->len = kv->klen; + kref->hash32 = kv->hashlo; +} + + inline void +kref_ref_kv_hash32(struct kref * const kref, const struct kv * const kv) +{ + kref->ptr = kv->kv; + kref->len = kv->klen; + kref->hash32 = kv_crc32c(kv->kv, kv->klen); +} + + inline bool +kref_match(const struct kref * const k1, const struct kref * const k2) +{ + return (k1->len == k2->len) && (!memcmp(k1->ptr, k2->ptr, k1->len)); +} + +// match a kref and a key + inline bool +kref_kv_match(const struct kref * const kref, const struct kv * const k) +{ + return (kref->len == k->klen) && (!memcmp(kref->ptr, k->kv, kref->len)); +} + + inline int +kref_compare(const struct kref * const kref1, const struct kref * const kref2) +{ + const u32 len = kref1->len < kref2->len ? kref1->len : kref2->len; + const int cmp = memcmp(kref1->ptr, kref2->ptr, (size_t)len); + return cmp ? cmp : klen_compare(kref1->len, kref2->len); +} + +// compare a kref and a key + inline int +kref_kv_compare(const struct kref * const kref, const struct kv * const k) +{ + debug_assert(kref); + debug_assert(k); + const u32 len = kref->len < k->klen ? kref->len : k->klen; + const int cmp = memcmp(kref->ptr, k->kv, (size_t)len); + return cmp ? cmp : klen_compare(kref->len, k->klen); +} + + inline u32 +kref_lcp(const struct kref * const k1, const struct kref * const k2) +{ + const u32 max = (k1->len < k2->len) ? k1->len : k2->len; + return memlcp(k1->ptr, k2->ptr, max); +} + + inline u32 +kref_kv_lcp(const struct kref * const kref, const struct kv * const kv) +{ + const u32 max = (kref->len < kv->klen) ? kref->len : kv->klen; + return memlcp(kref->ptr, kv->kv, max); +} + +// klen, key, ... + inline int +kref_k128_compare(const struct kref * const sk, const u8 * const k128) +{ + debug_assert(sk); + const u32 klen1 = sk->len; + u32 klen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(k128, &klen2); + debug_assert(ptr2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->ptr, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + +// klen, vlen, key, ... + inline int +kref_kv128_compare(const struct kref * const sk, const u8 * const kv128) +{ + debug_assert(sk); + const u32 klen1 = sk->len; + u32 klen2 = 0; + u32 vlen2 = 0; + const u8 * const ptr2 = vi128_decode_u32(vi128_decode_u32(kv128, &klen2), &vlen2); + const u32 len = (klen1 < klen2) ? klen1 : klen2; + const int cmp = memcmp(sk->ptr, ptr2, len); + return cmp ? cmp : klen_compare(klen1, klen2); +} + +static struct kref __kref_null = {.hash32 = KV_CRC32C_SEED}; + + inline const struct kref * +kref_null(void) +{ + return &__kref_null; +} +// }}} kref + +// kvref {{{ + inline void +kvref_ref_kv(struct kvref * const ref, struct kv * const kv) +{ + ref->kptr = kv->kv; + ref->vptr = kv->kv + kv->klen; + ref->hdr = *kv; +} + + struct kv * +kvref_dup2_kv(struct kvref * const ref, struct kv * const to) +{ + if (ref == NULL) + return NULL; + const size_t sz = sizeof(*to) + ref->hdr.klen + ref->hdr.vlen; + struct kv * const new = to ? to : malloc(sz); + if (new == NULL) + return NULL; + + *new = ref->hdr; + memcpy(new->kv, ref->kptr, new->klen); + memcpy(new->kv + new->klen, ref->vptr, new->vlen); + return new; +} + + struct kv * +kvref_dup2_key(struct kvref * const ref, struct kv * const to) +{ + if (ref == NULL) + return NULL; + const size_t sz = sizeof(*to) + ref->hdr.klen; + struct kv * const new = to ? to : malloc(sz); + if (new == NULL) + return NULL; + + *new = ref->hdr; + memcpy(new->kv, ref->kptr, new->klen); + return new; +} + + int +kvref_kv_compare(const struct kvref * const ref, const struct kv * const kv) +{ + const u32 len = ref->hdr.klen < kv->klen ? ref->hdr.klen : kv->klen; + const int cmp = memcmp(ref->kptr, kv->kv, (size_t)len); + return cmp ? cmp : klen_compare(ref->hdr.klen, kv->klen); +} +// }}} kvref + +// kv128 {{{ +// estimate the encoded size + inline size_t +kv128_estimate_kv(const struct kv * const kv) +{ + return vi128_estimate_u32(kv->klen) + vi128_estimate_u32(kv->vlen) + kv->klen + kv->vlen; +} + +// create a kv128 from kv + u8 * +kv128_encode_kv(const struct kv * const kv, u8 * const out, size_t * const pesize) +{ + u8 * const ptr = out ? out : malloc(kv128_estimate_kv(kv)); + if (!ptr) + return NULL; + + u8 * const pdata = vi128_encode_u32(vi128_encode_u32(ptr, kv->klen), kv->vlen); + memcpy(pdata, kv->kv, kv->klen + kv->vlen); + + if (pesize) + *pesize = (size_t)(pdata - ptr) + kv->klen + kv->vlen; + return ptr; // return the head of the encoded kv128 +} + +// dup kv128 to a kv + struct kv * +kv128_decode_kv(const u8 * const ptr, struct kv * const out, size_t * const pesize) +{ + u32 klen, vlen; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(ptr, &klen), &vlen); + struct kv * const ret = out ? out : malloc(sizeof(struct kv) + klen + vlen); + if (ret) + kv_refill(ret, pdata, klen, pdata + klen, vlen); + + if (pesize) + *pesize = (size_t)(pdata - ptr) + klen + vlen; + return ret; // return the kv +} + + inline size_t +kv128_size(const u8 * const ptr) +{ + u32 klen, vlen; + const u8 * const pdata = vi128_decode_u32(vi128_decode_u32(ptr, &klen), &vlen); + return ((size_t)(pdata - ptr)) + klen + vlen; +} +// }}} kv128 + +// }}} kv + +// kvmap {{{ + +// registry {{{ +// increase MAX if need more +#define KVMAP_API_MAX ((32)) +static struct kvmap_api_reg kvmap_api_regs[KVMAP_API_MAX]; +static u64 kvmap_api_regs_nr = 0; + + void +kvmap_api_register(const int nargs, const char * const name, const char * const args_msg, + void * (*create)(const char *, const struct kvmap_mm *, char **), const struct kvmap_api * const api) +{ + if (kvmap_api_regs_nr < KVMAP_API_MAX) { + kvmap_api_regs[kvmap_api_regs_nr].nargs = nargs; + kvmap_api_regs[kvmap_api_regs_nr].name = name; + kvmap_api_regs[kvmap_api_regs_nr].args_msg = args_msg; + kvmap_api_regs[kvmap_api_regs_nr].create = create; + kvmap_api_regs[kvmap_api_regs_nr].api = api; + kvmap_api_regs_nr++; + } else { + fprintf(stderr, "%s failed to register [%s]\n", __func__, name); + } +} + void +kvmap_api_helper_message(void) +{ + fprintf(stderr, "%s Usage: api ...\n", __func__); + for (u64 i = 0; i < kvmap_api_regs_nr; i++) { + fprintf(stderr, "%s example: api %s %s\n", __func__, + kvmap_api_regs[i].name, kvmap_api_regs[i].args_msg); + } +} + + int +kvmap_api_helper(int argc, char ** const argv, const struct kvmap_mm * const mm, + const struct kvmap_api ** const api_out, void ** const map_out) +{ + // "api" "name" "arg1", ... + if (argc < 2 || strcmp(argv[0], "api") != 0) + return -1; + + for (u64 i = 0; i < kvmap_api_regs_nr; i++) { + const struct kvmap_api_reg * const reg = &kvmap_api_regs[i]; + if (0 != strcmp(argv[1], reg->name)) + continue; + + if ((argc - 2) < reg->nargs) + return -1; + + void * const map = reg->create(argv[1], mm, argv + 2); // skip "api" "name" + if (map) { + *api_out = reg->api; + *map_out = map; + return 2 + reg->nargs; + } else { + return -1; + } + } + + // no match + return -1; +} +// }}} registry + +// misc {{{ + void +kvmap_inp_steal_kv(struct kv * const kv, void * const priv) +{ + // steal the kv pointer out so we don't need a dangerous get_key_interanl() + if (priv) + *(struct kv **)priv = kv; +} + + inline void * +kvmap_ref(const struct kvmap_api * const api, void * const map) +{ + return api->ref ? api->ref(map) : map; +} + +// return the original map pointer; usually unused by caller + inline void * +kvmap_unref(const struct kvmap_api * const api, void * const ref) +{ + return api->unref ? api->unref(ref) : ref; +} +// }}} misc + +// kvmap_kv_op {{{ + inline struct kv * +kvmap_kv_get(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, struct kv * const out) +{ + const struct kref kref = kv_kref(key); + return api->get(ref, &kref, out); +} + + inline bool +kvmap_kv_probe(const struct kvmap_api * const api, void * const ref, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + return api->probe(ref, &kref); +} + + inline bool +kvmap_kv_put(const struct kvmap_api * const api, void * const ref, + struct kv * const kv) +{ + return api->put(ref, kv); +} + + inline bool +kvmap_kv_del(const struct kvmap_api * const api, void * const ref, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + return api->del(ref, &kref); +} + + inline bool +kvmap_kv_inpr(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->inpr(ref, &kref, uf, priv); +} + + inline bool +kvmap_kv_inpw(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->inpw(ref, &kref, uf, priv); +} + + inline bool +kvmap_kv_merge(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_merge_func uf, void * const priv) +{ + const struct kref kref = kv_kref(key); + return api->merge(ref, &kref, uf, priv); +} + + inline u64 +kvmap_kv_delr(const struct kvmap_api * const api, void * const ref, + const struct kv * const start, const struct kv * const end) +{ + const struct kref kref0 = kv_kref(start); + if (end) { + const struct kref krefz = kv_kref(end); + return api->delr(ref, &kref0, &krefz); + } else { + return api->delr(ref, &kref0, NULL); + } +} + + inline void +kvmap_kv_iter_seek(const struct kvmap_api * const api, void * const iter, + const struct kv * const key) +{ + const struct kref kref = kv_kref(key); + api->iter_seek(iter, &kref); +} +// }}} kvmap_kv_op + +// kvmap_raw_op {{{ + inline struct kv * +kvmap_raw_get(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, struct kv * const out) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->get(ref, &kref, out); +} + + inline bool +kvmap_raw_probe(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->probe(ref, &kref); +} + + inline bool +kvmap_raw_del(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->del(ref, &kref); +} + + inline bool +kvmap_raw_inpr(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->inpr(ref, &kref, uf, priv); +} + + inline bool +kvmap_raw_inpw(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + return api->inpw(ref, &kref, uf, priv); +} + + inline void +kvmap_raw_iter_seek(const struct kvmap_api * const api, void * const iter, + const u32 len, const u8 * const ptr) +{ + const struct kref kref = {.ptr = ptr, .len = len, + .hash32 = api->hashkey ? kv_crc32c(ptr, len) : 0}; + api->iter_seek(iter, &kref); +} +// }}}} kvmap_raw_op + +// dump {{{ + u64 +kvmap_dump_keys(const struct kvmap_api * const api, void * const map, const int fd) +{ + void * const ref = kvmap_ref(api, map); + void * const iter = api->iter_create(ref); + api->iter_seek(iter, kref_null()); + u64 i = 0; + while (api->iter_valid(iter)) { + struct kvref kvref; + api->iter_kvref(iter, &kvref); + dprintf(fd, "%010lu [%3u] %.*s [%u]\n", i, kvref.hdr.klen, kvref.hdr.klen, kvref.kptr, kvref.hdr.vlen); + i++; + api->iter_skip1(iter); + } + api->iter_destroy(iter); + kvmap_unref(api, ref); + return i; +} +// }}} dump + +// kv64 {{{ +struct kv64 { // internal only + struct kv kv; + u64 key_be; // must be in big endian + u64 value; +}; + + inline bool +kvmap_kv64_get(const struct kvmap_api * const api, void * const ref, + const u64 key, u64 * const out) +{ + struct kv64 keybuf, kvout; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + struct kv * const ret = api->get(ref, &kref, &kvout.kv); + if (ret) { + *out = kvout.value; + return true; + } else { + return false; + } +} + + inline bool +kvmap_kv64_probe(const struct kvmap_api * const api, void * const ref, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + return api->probe(ref, &kref); +} + + inline bool +kvmap_kv64_put(const struct kvmap_api * const api, void * const ref, + const u64 key, const u64 value) +{ + struct kv64 kv; + kv.key_be = __builtin_bswap64(key); + kv.value = value; + kv.kv.klen = sizeof(key); + kv.kv.vlen = sizeof(value); + if (api->hashkey) + kv_update_hash(&kv.kv); + + return api->put(ref, &kv.kv); +} + + inline bool +kvmap_kv64_del(const struct kvmap_api * const api, void * const ref, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + return api->del(ref, &kref); +} + + inline void +kvmap_kv64_iter_seek(const struct kvmap_api * const api, void * const iter, + const u64 key) +{ + struct kv64 keybuf; + struct kref kref; + keybuf.key_be = __builtin_bswap64(key); + kref_ref_hash32(&kref, keybuf.kv.kv, sizeof(keybuf.key_be)); + api->iter_seek(iter, &kref); +} + + inline bool +kvmap_kv64_iter_peek(const struct kvmap_api * const api, void * const iter, + u64 * const key_out, u64 * const value_out) +{ + struct kv64 kvout; + struct kv * const ret = api->iter_peek(iter, &kvout.kv); + if (key_out) + *key_out = __builtin_bswap64(kvout.key_be); // to LE + if (value_out) + *value_out = kvout.value; + return ret != NULL; +} +// }}} kv64 + +// }}} kvmap + +// vim:fdm=marker diff --git a/test/MassTrie-beta/wormhole/kv.h b/test/MassTrie-beta/wormhole/kv.h new file mode 100644 index 00000000..1e251e58 --- /dev/null +++ b/test/MassTrie-beta/wormhole/kv.h @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +// crc32c {{{ +#define KV_CRC32C_SEED ((0xDEADBEEFu)) + + extern u32 +kv_crc32c(const void * const ptr, u32 len); + + extern u64 +kv_crc32c_extend(const u32 crc32c); +// }}} crc32c + +// kv {{{ + +// struct {{{ +/* + * Some internal union names can be ignored: + * struct kv { + * u32 klen; + * u32 vlen; + * u64 hash; + * u8 kv[]; + * }; + */ +struct kv { + union { // the first u64 + u64 kvlen; + struct { + u32 klen; + union { u32 vlen; u32 refcnt; }; + }; + }; + union { + u64 hash; // hashvalue of the key + u64 priv; // can hide a value here if hash is not used + void * privptr; + struct { u32 hashlo; u32 hashhi; }; // little endian + struct { u32 privlo; u32 privhi; }; + }; + u8 kv[0]; // len(kv) == klen + vlen +} __attribute__((packed)); + +struct kref { + u32 len; + union { u32 hash32; u32 priv; }; + const u8 * ptr; +} __attribute__((packed)); + +struct kvref { + const u8 * kptr; // read-only + const u8 * vptr; // read-only + struct kv hdr; // hdr.kv[] is invalid +}; +// }}} struct + +// kv {{{ +typedef int (*kv_kv_cmp_func)(const struct kv *, const struct kv *); + + extern size_t +kv_size(const struct kv * const kv); + + extern size_t +kv_size_align(const struct kv * const kv, const u64 align); + + extern size_t +key_size(const struct kv * const key); + + extern size_t +key_size_align(const struct kv * const key, const u64 align); + + extern void +kv_update_hash(struct kv * const kv); + + extern void +kv_refill_value(struct kv * const kv, const void * const value, const u32 vlen); + + extern void +kv_refill(struct kv * const kv, const void * const key, const u32 klen, + const void * const value, const u32 vlen); + + extern void +kv_refill_str(struct kv * const kv, const char * const key, + const void * const value, const u32 vlen); + + extern void +kv_refill_str_str(struct kv * const kv, const char * const key, + const char * const value); + +// the u64 key is filled in big-endian byte order + extern void +kv_refill_u64(struct kv * const kv, const u64 key, const void * const value, const u32 vlen); + + extern void +kv_refill_hex32(struct kv * const kv, const u32 hex, const void * const value, const u32 vlen); + + extern void +kv_refill_hex64(struct kv * const kv, const u64 hex, const void * const value, const u32 vlen); + + extern void +kv_refill_hex64_klen(struct kv * const kv, const u64 hex, const u32 klen, + const void * const value, const u32 vlen); + + extern void +kv_refill_kref(struct kv * const kv, const struct kref * const kref); + + extern void +kv_refill_kref_v(struct kv * const kv, const struct kref * const kref, + const void * const value, const u32 vlen); + + extern struct kref +kv_kref(const struct kv * const key); + + extern struct kv * +kv_create(const void * const key, const u32 klen, const void * const value, const u32 vlen); + + extern struct kv * +kv_create_str(const char * const key, const void * const value, const u32 vlen); + + extern struct kv * +kv_create_str_str(const char * const key, const char * const value); + + extern struct kv * +kv_create_kref(const struct kref * const kref, const void * const value, const u32 vlen); + +// a static kv with klen == 0 + extern const struct kv * +kv_null(void); + + extern struct kv * +kv_dup(const struct kv * const kv); + + extern struct kv * +kv_dup_key(const struct kv * const kv); + + extern struct kv * +kv_dup2(const struct kv * const from, struct kv * const to); + + extern struct kv * +kv_dup2_key(const struct kv * const from, struct kv * const to); + + extern struct kv * +kv_dup2_key_prefix(const struct kv * const from, struct kv * const to, const u32 plen); + + extern bool +kv_match(const struct kv * const key1, const struct kv * const key2); + + extern bool +kv_match_hash(const struct kv * const key1, const struct kv * const key2); + + extern bool +kv_match_full(const struct kv * const kv1, const struct kv * const kv2); + + extern bool +kv_match_kv128(const struct kv * const sk, const u8 * const kv128); + + extern int +kv_compare(const struct kv * const kv1, const struct kv * const kv2); + + extern int +kv_k128_compare(const struct kv * const sk, const u8 * const k128); + + extern int +kv_kv128_compare(const struct kv * const sk, const u8 * const kv128); + + extern void +kv_qsort(struct kv ** const kvs, const size_t nr); + + extern u32 +kv_key_lcp(const struct kv * const key1, const struct kv * const key2); + + extern u32 +kv_key_lcp_skip(const struct kv * const key1, const struct kv * const key2, const u32 lcp0); + + extern void +kv_psort(struct kv ** const kvs, const u64 nr, const u64 tlo, const u64 thi); + + extern void * +kv_vptr(struct kv * const kv); + + extern void * +kv_kptr(struct kv * const kv); + + extern const void * +kv_vptr_c(const struct kv * const kv); + + extern const void * +kv_kptr_c(const struct kv * const kv); + + extern void +kv_print(const struct kv * const kv, const char * const cmd, FILE * const out); +// }}} kv + +// mm {{{ +typedef struct kv * (* kvmap_mm_in_func)(struct kv * kv, void * priv); +typedef struct kv * (* kvmap_mm_out_func)(struct kv * kv, struct kv * out); +typedef void (* kvmap_mm_free_func)(struct kv * kv, void * priv); + +// manage internal kv data of kvmap +struct kvmap_mm { + // to create a private copy of "kv" + // see put() functions + kvmap_mm_in_func in; + // to duplicate a private copy of "kv" to "out" + // see get() and iter_peek() functions + kvmap_mm_out_func out; + // to free a kv + // see del() and put() functions + kvmap_mm_free_func free; + void * priv; +}; + + extern struct kv * +kvmap_mm_in_noop(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_out_noop(struct kv * const kv, struct kv * const out); + + extern void +kvmap_mm_free_noop(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_in_dup(struct kv * const kv, void * const priv); + + extern struct kv * +kvmap_mm_out_dup(struct kv * const kv, struct kv * const out); + + extern void +kvmap_mm_free_free(struct kv * const kv, void * const priv); + +// the default mm +extern const struct kvmap_mm kvmap_mm_dup; // in:Dup, out:Dup, free:Free +extern const struct kvmap_mm kvmap_mm_ndf; // in:Noop, out:Dup, free:Free +// }}} mm + +// ref {{{ +typedef int (*kref_kv_cmp_func)(const struct kref *, const struct kv *); + +// ptr and len only + extern void +kref_ref_raw(struct kref * const kref, const u8 * const ptr, const u32 len); + +// this calculates hash32 + extern void +kref_ref_hash32(struct kref * const kref, const u8 * const ptr, const u32 len); + + extern void +kref_update_hash32(struct kref * const kref); + + extern void +kref_ref_kv(struct kref * const kref, const struct kv * const kv); + + extern void +kref_ref_kv_hash32(struct kref * const kref, const struct kv * const kv); + + extern bool +kref_match(const struct kref * const k1, const struct kref * const k2); + + extern bool +kref_kv_match(const struct kref * const kref, const struct kv * const k); + + extern int +kref_compare(const struct kref * const kref1, const struct kref * const kref2); + + extern int +kref_kv_compare(const struct kref * const kref, const struct kv * const k); + + extern u32 +kref_lcp(const struct kref * const k1, const struct kref * const k2); + + extern u32 +kref_kv_lcp(const struct kref * const kref, const struct kv * const kv); + + extern int +kref_k128_compare(const struct kref * const sk, const u8 * const k128); + + extern int +kref_kv128_compare(const struct kref * const sk, const u8 * const kv128); + + extern const struct kref * +kref_null(void); + + extern void +kvref_ref_kv(struct kvref * const ref, struct kv * const kv); + + extern struct kv * +kvref_dup2_kv(struct kvref * const ref, struct kv * const to); + + extern struct kv * +kvref_dup2_key(struct kvref * const ref, struct kv * const to); + + extern int +kvref_kv_compare(const struct kvref * const ref, const struct kv * const kv); +// }}} ref + +// kv128 {{{ + extern size_t +kv128_estimate_kv(const struct kv * const kv); + + extern u8 * +kv128_encode_kv(const struct kv * const kv, u8 * const out, size_t * const pesize); + + extern struct kv * +kv128_decode_kv(const u8 * const ptr, struct kv * const out, size_t * const pesize); + + extern size_t +kv128_size(const u8 * const ptr); +// }}} kv128 + +// }}} kv + +// kvmap {{{ + +// kvmap_api {{{ +typedef void (* kv_inp_func)(struct kv * const curr, void * const priv); + +// the merge function should: +// 1: return NULL if the origin kv is not changed at all +// 2: return kv0 if updates has been applied in-place +// 3: return a different kv if the original kv must be replaced +// In an in-memory kvmap, 2==1 and no further action is needed +// In a persistent kv store with a memtable, 2 will need an insertion if kv0 is not from the memtable +typedef struct kv * (* kv_merge_func)(struct kv * const kv0, void * const priv); + +struct kvmap_api { + // feature bits + bool hashkey; // true: caller needs to provide correct hash in kv/kref + bool ordered; // true: has iter_seek + bool threadsafe; // true: support thread_safe access + bool readonly; // true: no put() and del() + bool irefsafe; // true: iter's kref/kvref can be safely accessed after iter_seek/iter_skip/iter_park + bool unique; // provide unique keys, especially for iterators + bool refpark; // ref has park() and resume() + bool async; // XXX for testing KVell + + // put (aka put/upsert): return true on success; false on error + // mm.in() controls how things move into the kvmap; the default mm make a copy with malloc() + // mm.free() controls how old kv get disposed when replaced + bool (* put) (void * const ref, struct kv * const kv); + // get: search and return a kv if found, or NULL if not + // with the default mm: malloc() if out == NULL; otherwise, use out as buffer + // with custom kvmap_mm: mm.out() controls buffer; use with caution + // caller should use the returned ptr even if out is provided + struct kv * (* get) (void * const ref, const struct kref * const key, struct kv * const out); + // probe: return true on found, false on not found + bool (* probe) (void * const ref, const struct kref * const key); + // del: return true on something deleted, false on not found + // mm.free() controls how old kv get disposed when replaced + bool (* del) (void * const ref, const struct kref * const key); + // inp: inplace operation if key exists; otherwise return false; uf() is always executed even with NULL key + // inpr/inpw acquires r/w locks respectively. + // Note that in inpw() you can only change the value. + bool (* inpr) (void * const ref, const struct kref * const key, kv_inp_func uf, void * const priv); + bool (* inpw) (void * const ref, const struct kref * const key, kv_inp_func uf, void * const priv); + // merge: put+callback on old/new keys; another name: read-modify-write + // return true if successfull; return false on error + bool (* merge) (void * const ref, const struct kref * const key, kv_merge_func uf, void * const priv); + // delete-range: delete all keys from start (inclusive) to end (exclusive) + u64 (* delr) (void * const ref, const struct kref * const start, const struct kref * const end); + // make everything persist; for persistent maps only + void (* sync) (void * const ref); + + // general guidelines for thread-safe iters: + // - it is assumed that the key under the cursor is locked/freezed/immutable + // - once created one must call iter_seek to make it valid + // - the ownership of ref is given to the iter so ref should not be used until iter_destroy + // - creating and use more than one iter based on a ref can cause deadlocks + void * (* iter_create) (void * const ref); + // move the cursor to the first key >= search-key; + void (* iter_seek) (void * const iter, const struct kref * const key); + // check if the cursor points to a valid key + bool (* iter_valid) (void * const iter); + // return the current key; copy to out if (out != NULL) + // mm.out() controls copy-out + struct kv * (* iter_peek) (void * const iter, struct kv * const out); + // similar to peek but does not copy; return false if iter is invalid + bool (* iter_kref) (void * const iter, struct kref * const kref); + // similar to iter_kref but also provide the value + bool (* iter_kvref) (void * const iter, struct kvref * const kvref); + // iter_retain makes kref or kvref of the current iter remain valid until released + // the returned opaque pointer should be provided when releasing the hold + u64 (* iter_retain) (void * const iter); + void (* iter_release) (void * const iter, const u64 opaque); + // skip one element + void (* iter_skip1) (void * const iter); + // skip nr elements + void (* iter_skip) (void * const iter, const u32 nr); + // iter_next == iter_peek + iter_skip1 + struct kv * (* iter_next) (void * const iter, struct kv * const out); + // perform inplace opeation if the current key is valid; return false if no current key + // the uf() is always executed even with NULL key + bool (* iter_inp) (void * const iter, kv_inp_func uf, void * const priv); + // invalidate the iter to release any resources or locks + // afterward, must call seek() again before accessing data + void (* iter_park) (void * const iter); + // destroy iter + void (* iter_destroy) (void * const iter); + + // misc: + // create refs for maps if required; always use use kvmap_ref() and kvmap_unref() + // if there are ref/unref functions, ref-ptr should be used as map for all kv operations + void * (* ref) (void * map); + // return the original map + void * (* unref) (void * ref); + // pause access without unref; must call resume later before access index again + void (* park) (void * ref); + // resume access of ref; must be paired with a park() + void (* resume) (void * ref); + + // UNSAFE functions: + // empty the map + void (* clean) (void * map); + // erase everything + void (* destroy) (void * map); + // for debugging + void (* fprint) (void * map, FILE * const out); +}; + +// registry +struct kvmap_api_reg { + int nargs; // number of arguments after name + const char * name; + const char * args_msg; // see ...helper_message + // multiple apis may share one create function + // arguments: name (e.g., "rdb"), mm (usually NULL), the remaining args + void * (*create)(const char *, const struct kvmap_mm *, char **); + const struct kvmap_api * api; +}; + +// call this function to register a kvmap_api + extern void +kvmap_api_register(const int nargs, const char * const name, const char * const args_msg, + void * (*create)(const char *, const struct kvmap_mm *, char **), const struct kvmap_api * const api); + + extern void +kvmap_api_helper_message(void); + + extern int +kvmap_api_helper(int argc, char ** const argv, const struct kvmap_mm * const mm, + const struct kvmap_api ** const api_out, void ** const map_out); +// }}} kvmap_api + +// helpers {{{ + extern void +kvmap_inp_steal_kv(struct kv * const kv, void * const priv); + + extern void * +kvmap_ref(const struct kvmap_api * const api, void * const map); + + extern void * +kvmap_unref(const struct kvmap_api * const api, void * const ref); + + extern struct kv * +kvmap_kv_get(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, struct kv * const out); + + extern bool +kvmap_kv_probe(const struct kvmap_api * const api, void * const ref, + const struct kv * const key); + + extern bool +kvmap_kv_put(const struct kvmap_api * const api, void * const ref, + struct kv * const kv); + + extern bool +kvmap_kv_del(const struct kvmap_api * const api, void * const ref, + const struct kv * const key); + + extern bool +kvmap_kv_inpr(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv); + + extern bool +kvmap_kv_inpw(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_inp_func uf, void * const priv); + + extern bool +kvmap_kv_merge(const struct kvmap_api * const api, void * const ref, + const struct kv * const key, kv_merge_func uf, void * const priv); + + extern u64 +kvmap_kv_delr(const struct kvmap_api * const api, void * const ref, + const struct kv * const start, const struct kv * const end); + + extern void +kvmap_kv_iter_seek(const struct kvmap_api * const api, void * const iter, + const struct kv * const key); + + extern struct kv * +kvmap_raw_get(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, struct kv * const out); + + extern bool +kvmap_raw_probe(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr); + + extern bool +kvmap_raw_del(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr); + + extern bool +kvmap_raw_inpr(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv); + + extern bool +kvmap_raw_inpw(const struct kvmap_api * const api, void * const ref, + const u32 len, const u8 * const ptr, kv_inp_func uf, void * const priv); + + extern void +kvmap_raw_iter_seek(const struct kvmap_api * const api, void * const iter, + const u32 len, const u8 * const ptr); + + extern u64 +kvmap_dump_keys(const struct kvmap_api * const api, void * const map, const int fd); + + extern bool +kvmap_kv64_get(const struct kvmap_api * const api, void * const ref, + const u64 key, u64 * const out); + + extern bool +kvmap_kv64_probe(const struct kvmap_api * const api, void * const ref, + const u64 key); + + extern bool +kvmap_kv64_put(const struct kvmap_api * const api, void * const ref, + const u64 key, const u64 value); + + extern bool +kvmap_kv64_del(const struct kvmap_api * const api, void * const ref, + const u64 key); + + extern void +kvmap_kv64_iter_seek(const struct kvmap_api * const api, void * const iter, + const u64 key); + + extern bool +kvmap_kv64_iter_peek(const struct kvmap_api * const api, void * const iter, + u64 * const key_out, u64 * const value_out); +// }}} helpers + +// }}} kvmap + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/test/MassTrie-beta/wormhole/lib.c b/test/MassTrie-beta/wormhole/lib.c new file mode 100644 index 00000000..06d45f6d --- /dev/null +++ b/test/MassTrie-beta/wormhole/lib.c @@ -0,0 +1,3026 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include "lib.h" +#include "ctypes.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // va_start + +#if defined(__linux__) +#include +#include // malloc_usable_size +#elif defined(__APPLE__) && defined(__MACH__) +#include +#include +#elif defined(__FreeBSD__) +#include +#include +#endif // OS + +#if defined(__FreeBSD__) +#include +#endif +// }}} headers + +// math {{{ + inline u64 +mhash64(const u64 v) +{ + return v * 11400714819323198485lu; +} + + inline u32 +mhash32(const u32 v) +{ + return v * 2654435761u; +} + +// From Daniel Lemire's blog (2013, lemire.me) + u64 +gcd64(u64 a, u64 b) +{ + if (a == 0) + return b; + if (b == 0) + return a; + + const u32 shift = (u32)__builtin_ctzl(a | b); + a >>= __builtin_ctzl(a); + do { + b >>= __builtin_ctzl(b); + if (a > b) { + const u64 t = b; + b = a; + a = t; + } + b = b - a; + } while (b); + return a << shift; +} +// }}} math + +// random {{{ +// Lehmer's generator is 2x faster than xorshift +/** + * D. H. Lehmer, Mathematical methods in large-scale computing units. + * Proceedings of a Second Symposium on Large Scale Digital Calculating + * Machinery; + * Annals of the Computation Laboratory, Harvard Univ. 26 (1951), pp. 141-146. + * + * P L'Ecuyer, Tables of linear congruential generators of different sizes and + * good lattice structure. Mathematics of Computation of the American + * Mathematical + * Society 68.225 (1999): 249-260. + */ +struct lehmer_u64 { + union { + u128 v128; + u64 v64[2]; + }; +}; + +static __thread struct lehmer_u64 rseed_u128 = {.v64 = {4294967291, 1549556881}}; + + static inline u64 +lehmer_u64_next(struct lehmer_u64 * const s) +{ + const u64 r = s->v64[1]; + s->v128 *= 0xda942042e4dd58b5lu; + return r; +} + + static inline void +lehmer_u64_seed(struct lehmer_u64 * const s, const u64 seed) +{ + s->v128 = (((u128)(~seed)) << 64) | (seed | 1); + (void)lehmer_u64_next(s); +} + + inline u64 +random_u64(void) +{ + return lehmer_u64_next(&rseed_u128); +} + + inline void +srandom_u64(const u64 seed) +{ + lehmer_u64_seed(&rseed_u128, seed); +} + + inline double +random_double(void) +{ + // random between [0.0 - 1.0] + const u64 r = random_u64(); + return ((double)r) * (1.0 / ((double)(~0lu))); +} +// }}} random + +// timing {{{ + inline u64 +time_nsec(void) +{ + struct timespec ts; + // MONO_RAW is 5x to 10x slower than MONO + clock_gettime(CLOCK_MONOTONIC, &ts); + return ((u64)ts.tv_sec) * 1000000000lu + ((u64)ts.tv_nsec); +} + + inline double +time_sec(void) +{ + const u64 nsec = time_nsec(); + return ((double)nsec) * 1.0e-9; +} + + inline u64 +time_diff_nsec(const u64 last) +{ + return time_nsec() - last; +} + + inline double +time_diff_sec(const double last) +{ + return time_sec() - last; +} + +// need char str[64] + void +time_stamp(char * str, const size_t size) +{ + time_t now; + struct tm nowtm; + time(&now); + localtime_r(&now, &nowtm); + strftime(str, size, "%F %T %z", &nowtm); +} + + void +time_stamp2(char * str, const size_t size) +{ + time_t now; + struct tm nowtm; + time(&now); + localtime_r(&now, &nowtm); + strftime(str, size, "%F-%H-%M-%S%z", &nowtm); +} +// }}} timing + +// cpucache {{{ + inline void +cpu_pause(void) +{ +#if defined(__x86_64__) + _mm_pause(); +#elif defined(__aarch64__) + // nop +#endif +} + + inline void +cpu_mfence(void) +{ + atomic_thread_fence(MO_SEQ_CST); +} + +// compiler fence + inline void +cpu_cfence(void) +{ + atomic_thread_fence(MO_ACQ_REL); +} + + inline void +cpu_prefetch0(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 0); +} + + inline void +cpu_prefetch1(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 1); +} + + inline void +cpu_prefetch2(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 2); +} + + inline void +cpu_prefetch3(const void * const ptr) +{ + __builtin_prefetch(ptr, 0, 3); +} + + inline void +cpu_prefetchw(const void * const ptr) +{ + __builtin_prefetch(ptr, 1, 0); +} +// }}} cpucache + +// crc32c {{{ + inline u32 +crc32c_u8(const u32 crc, const u8 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u8(crc, v); +#elif defined(__aarch64__) + return __crc32cb(crc, v); +#endif +} + + inline u32 +crc32c_u16(const u32 crc, const u16 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u16(crc, v); +#elif defined(__aarch64__) + return __crc32ch(crc, v); +#endif +} + + inline u32 +crc32c_u32(const u32 crc, const u32 v) +{ +#if defined(__x86_64__) + return _mm_crc32_u32(crc, v); +#elif defined(__aarch64__) + return __crc32cw(crc, v); +#endif +} + + inline u32 +crc32c_u64(const u32 crc, const u64 v) +{ +#if defined(__x86_64__) + return (u32)_mm_crc32_u64(crc, v); +#elif defined(__aarch64__) + return (u32)__crc32cd(crc, v); +#endif +} + + inline u32 +crc32c_inc_123(const u8 * buf, u32 nr, u32 crc) +{ + if (nr == 1) + return crc32c_u8(crc, buf[0]); + + crc = crc32c_u16(crc, *(u16 *)buf); + return (nr == 2) ? crc : crc32c_u8(crc, buf[2]); +} + + inline u32 +crc32c_inc_x4(const u8 * buf, u32 nr, u32 crc) +{ + //debug_assert((nr & 3) == 0); + const u32 nr8 = nr >> 3; +#pragma nounroll + for (u32 i = 0; i < nr8; i++) + crc = crc32c_u64(crc, ((u64*)buf)[i]); + + if (nr & 4u) + crc = crc32c_u32(crc, ((u32*)buf)[nr8<<1]); + return crc; +} + + u32 +crc32c_inc(const u8 * buf, u32 nr, u32 crc) +{ + crc = crc32c_inc_x4(buf, nr, crc); + const u32 nr123 = nr & 3u; + return nr123 ? crc32c_inc_123(buf + nr - nr123, nr123, crc) : crc; +} +// }}} crc32c + +// debug {{{ + void +debug_break(void) +{ + usleep(100); +} + +static u64 * debug_watch_u64 = NULL; + + static void +watch_u64_handler(const int sig) +{ + (void)sig; + const u64 v = debug_watch_u64 ? (*debug_watch_u64) : 0; + fprintf(stderr, "[USR1] %lu (0x%lx)\n", v, v); +} + + void +watch_u64_usr1(u64 * const ptr) +{ + debug_watch_u64 = ptr; + struct sigaction sa = {}; + sa.sa_handler = watch_u64_handler; + sigemptyset(&(sa.sa_mask)); + sa.sa_flags = SA_RESTART; + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + fprintf(stderr, "Failed to set signal handler for SIGUSR1\n"); + } else { + fprintf(stderr, "to watch> kill -s SIGUSR1 %d\n", getpid()); + } +} + +static void * debug_bt_state = NULL; +#if defined(BACKTRACE) && defined(__linux__) +// TODO: get exec path on MacOS and FreeBSD + +#include +static char debug_filepath[1024] = {}; + + static void +debug_bt_error_cb(void * const data, const char * const msg, const int errnum) +{ + (void)data; + if (msg) + dprintf(2, "libbacktrace: %s %s\n", msg, strerror(errnum)); +} + + static int +debug_bt_print_cb(void * const data, const uintptr_t pc, + const char * const file, const int lineno, const char * const func) +{ + u32 * const plevel = (typeof(plevel))data; + if (file || func || lineno) { + dprintf(2, "[%u]0x%012lx " TERMCLR(35) "%s" TERMCLR(31) ":" TERMCLR(34) "%d" TERMCLR(0)" %s\n", + *plevel, pc, file ? file : "???", lineno, func ? func : "???"); + } else if (pc) { + dprintf(2, "[%u]0x%012lx ??\n", *plevel, pc); + } + (*plevel)++; + return 0; +} + +__attribute__((constructor)) + static void +debug_backtrace_init(void) +{ + const ssize_t len = readlink("/proc/self/exe", debug_filepath, 1023); + // disable backtrace + if (len < 0 || len >= 1023) + return; + + debug_filepath[len] = '\0'; + debug_bt_state = backtrace_create_state(debug_filepath, 1, debug_bt_error_cb, NULL); +} +#endif // BACKTRACE + + static void +debug_wait_gdb(void * const bt_state) +{ + if (bt_state) { +#if defined(BACKTRACE) + dprintf(2, "Backtrace :\n"); + u32 level = 0; + backtrace_full(debug_bt_state, 1, debug_bt_print_cb, debug_bt_error_cb, &level); +#endif // BACKTRACE + } else { // fallback to execinfo if no backtrace or initialization failed + void *array[64]; + const int size = backtrace(array, 64); + dprintf(2, "Backtrace (%d):\n", size - 1); + backtrace_symbols_fd(array + 1, size - 1, 2); + } + + abool v = true; + char timestamp[32]; + time_stamp(timestamp, 32); + char threadname[32] = {}; + thread_get_name(pthread_self(), threadname, 32); + strcat(threadname, "(!!)"); + thread_set_name(pthread_self(), threadname); + char hostname[32]; + gethostname(hostname, 32); + + const char * const pattern = "[Waiting GDB] %1$s %2$s @ %3$s\n" + " Attach me: " TERMCLR(31) "sudo -Hi gdb -p %4$d" TERMCLR(0) "\n"; + char buf[256]; + sprintf(buf, pattern, timestamp, threadname, hostname, getpid()); + write(2, buf, strlen(buf)); + + // to continue: gdb> set var v = 0 + // to kill from shell: $ kill %pid; kill -CONT %pid + + // uncomment this line to surrender the shell on error + // kill(getpid(), SIGSTOP); // stop burning cpu, once + + static au32 nr_waiting = 0; + const u32 seq = atomic_fetch_add_explicit(&nr_waiting, 1, MO_RELAXED); + if (seq == 0) { + sprintf(buf, "/run/user/%u/.debug_wait_gdb_pid", getuid()); + const int pidfd = open(buf, O_CREAT|O_TRUNC|O_WRONLY, 00644); + if (pidfd >= 0) { + dprintf(pidfd, "%u", getpid()); + close(pidfd); + } + } + +#pragma nounroll + while (atomic_load_explicit(&v, MO_CONSUME)) + sleep(1); +} + +#ifndef NDEBUG + void +debug_assert(const bool v) +{ + if (!v) + debug_wait_gdb(debug_bt_state); +} +#endif + +__attribute__((noreturn)) + void +debug_die(void) +{ + debug_wait_gdb(debug_bt_state); + exit(0); +} + +__attribute__((noreturn)) + void +debug_die_perror(void) +{ + perror(NULL); + debug_die(); +} + +#if !defined(NOSIGNAL) +// signal handler for wait_gdb on fatal errors + static void +wait_gdb_handler(const int sig, siginfo_t * const info, void * const context) +{ + (void)info; + (void)context; + char buf[64] = "[SIGNAL] "; + strcat(buf, strsignal(sig)); + write(2, buf, strlen(buf)); + debug_wait_gdb(NULL); +} + +// setup hooks for catching fatal errors +__attribute__((constructor)) + static void +debug_init(void) +{ + void * stack = pages_alloc_4kb(16); + //fprintf(stderr, "altstack %p\n", stack); + stack_t ss = {.ss_sp = stack, .ss_flags = 0, .ss_size = PGSZ*16}; + if (sigaltstack(&ss, NULL)) + fprintf(stderr, "sigaltstack failed\n"); + + struct sigaction sa = {.sa_sigaction = wait_gdb_handler, .sa_flags = SA_SIGINFO | SA_ONSTACK}; + sigemptyset(&(sa.sa_mask)); + const int fatals[] = {SIGSEGV, SIGFPE, SIGILL, SIGBUS, 0}; + for (int i = 0; fatals[i]; i++) { + if (sigaction(fatals[i], &sa, NULL) == -1) { + fprintf(stderr, "Failed to set signal handler for %s\n", strsignal(fatals[i])); + fflush(stderr); + } + } +} + +__attribute__((destructor)) + static void +debug_exit(void) +{ + // to get rid of valgrind warnings + stack_t ss = {.ss_flags = SS_DISABLE}; + stack_t oss = {}; + sigaltstack(&ss, &oss); + if (oss.ss_sp) + pages_unmap(oss.ss_sp, PGSZ * 16); +} +#endif // !defined(NOSIGNAL) + + void +debug_dump_maps(FILE * const out) +{ + FILE * const in = fopen("/proc/self/smaps", "r"); + char * line0 = yalloc(1024); + size_t size0 = 1024; + while (!feof(in)) { + const ssize_t r1 = getline(&line0, &size0, in); + if (r1 < 0) break; + fprintf(out, "%s", line0); + } + free(line0); + fflush(out); + fclose(in); +} + +static pid_t perf_pid = 0; + +#if defined(__linux__) +__attribute__((constructor)) + static void +debug_perf_init(void) +{ + const pid_t ppid = getppid(); + char tmp[256] = {}; + sprintf(tmp, "/proc/%d/cmdline", ppid); + FILE * const fc = fopen(tmp, "r"); + const size_t nr = fread(tmp, 1, sizeof(tmp) - 1, fc); + fclose(fc); + // look for "perf record" + if (nr < 12) + return; + tmp[nr] = '\0'; + for (u64 i = 0; i < nr; i++) + if (tmp[i] == 0) + tmp[i] = ' '; + + char * const perf = strstr(tmp, "perf record"); + if (perf) { + fprintf(stderr, "%s: perf detected\n", __func__); + perf_pid = ppid; + } +} +#endif // __linux__ + + bool +debug_perf_switch(void) +{ + if (perf_pid > 0) { + kill(perf_pid, SIGUSR2); + return true; + } else { + return false; + } +} +// }}} debug + +// mm {{{ +#ifdef ALLOCFAIL + bool +alloc_fail(void) +{ +#define ALLOCFAIL_RECP ((64lu)) +#define ALLOCFAIL_MAGIC ((ALLOCFAIL_RECP / 3lu)) + return ((random_u64() % ALLOCFAIL_RECP) == ALLOCFAIL_MAGIC); +} + +#ifdef MALLOCFAIL +extern void * __libc_malloc(size_t size); + void * +malloc(size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_malloc(size); +} + +extern void * __libc_calloc(size_t nmemb, size_t size); + void * +calloc(size_t nmemb, size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_calloc(nmemb, size); +} + +extern void *__libc_realloc(void *ptr, size_t size); + + void * +realloc(void *ptr, size_t size) +{ + if (alloc_fail()) + return NULL; + return __libc_realloc(ptr, size); +} +#endif // MALLOC_FAIL +#endif // ALLOC_FAIL + + void * +xalloc(const size_t align, const size_t size) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + void * p; + return (posix_memalign(&p, align, size) == 0) ? p : NULL; +} + +// alloc cache-line aligned address + void * +yalloc(const size_t size) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + void * p; + return (posix_memalign(&p, 64, size) == 0) ? p : NULL; +} + + void ** +malloc_2d(const size_t nr, const size_t size) +{ + const size_t size1 = nr * sizeof(void *); + const size_t size2 = nr * size; + void ** const mem = malloc(size1 + size2); + u8 * const mem2 = ((u8 *)mem) + size1; + for (size_t i = 0; i < nr; i++) + mem[i] = mem2 + (i * size); + return mem; +} + + inline void ** +calloc_2d(const size_t nr, const size_t size) +{ + void ** const ret = malloc_2d(nr, size); + memset(ret[0], 0, nr * size); + return ret; +} + + inline void +pages_unmap(void * const ptr, const size_t size) +{ +#ifndef HEAPCHECKING + munmap(ptr, size); +#else + (void)size; + free(ptr); +#endif +} + + void +pages_lock(void * const ptr, const size_t size) +{ + static bool use_mlock = true; + if (use_mlock) { + const int ret = mlock(ptr, size); + if (ret != 0) { + use_mlock = false; + fprintf(stderr, "%s: mlock disabled\n", __func__); + } + } +} + +#ifndef HEAPCHECKING + static void * +pages_do_alloc(const size_t size, const int flags) +{ + // vi /etc/security/limits.conf + // * - memlock unlimited + void * const p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (p == MAP_FAILED) + return NULL; + + pages_lock(p, size); + return p; +} + +#if defined(__linux__) && defined(MAP_HUGETLB) + +#if defined(MAP_HUGE_SHIFT) +#define PAGES_FLAGS_1G ((MAP_HUGETLB | (30 << MAP_HUGE_SHIFT))) +#define PAGES_FLAGS_2M ((MAP_HUGETLB | (21 << MAP_HUGE_SHIFT))) +#else // MAP_HUGE_SHIFT +#define PAGES_FLAGS_1G ((MAP_HUGETLB)) +#define PAGES_FLAGS_2M ((MAP_HUGETLB)) +#endif // MAP_HUGE_SHIFT + +#else +#define PAGES_FLAGS_1G ((0)) +#define PAGES_FLAGS_2M ((0)) +#endif // __linux__ + +#endif // HEAPCHECKING + + inline void * +pages_alloc_1gb(const size_t nr_1gb) +{ + const u64 sz = nr_1gb << 30; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS | PAGES_FLAGS_1G); +#else + void * const p = xalloc(1lu << 21, sz); // Warning: valgrind fails with 30 + if (p) + memset(p, 0, sz); + return p; +#endif +} + + inline void * +pages_alloc_2mb(const size_t nr_2mb) +{ + const u64 sz = nr_2mb << 21; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS | PAGES_FLAGS_2M); +#else + void * const p = xalloc(1lu << 21, sz); + if (p) + memset(p, 0, sz); + return p; +#endif +} + + inline void * +pages_alloc_4kb(const size_t nr_4kb) +{ + const size_t sz = nr_4kb << 12; +#ifndef HEAPCHECKING + return pages_do_alloc(sz, MAP_PRIVATE | MAP_ANONYMOUS); +#else + void * const p = xalloc(1lu << 12, sz); + if (p) + memset(p, 0, sz); + return p; +#endif +} + + void * +pages_alloc_best(const size_t size, const bool try_1gb, u64 * const size_out) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + // 1gb huge page: at least 0.25GB + if (try_1gb) { + if (size >= (1lu << 28)) { + const size_t nr_1gb = bits_round_up(size, 30) >> 30; + void * const p1 = pages_alloc_1gb(nr_1gb); + if (p1) { + *size_out = nr_1gb << 30; + return p1; + } + } + } + + // 2mb huge page: at least 0.5MB + if (size >= (1lu << 19)) { + const size_t nr_2mb = bits_round_up(size, 21) >> 21; + void * const p2 = pages_alloc_2mb(nr_2mb); + if (p2) { + *size_out = nr_2mb << 21; + return p2; + } + } + + const size_t nr_4kb = bits_round_up(size, 12) >> 12; + void * const p3 = pages_alloc_4kb(nr_4kb); + if (p3) + *size_out = nr_4kb << 12; + return p3; +} +// }}} mm + +// process/thread {{{ +static u32 process_ncpu; +#if defined(__FreeBSD__) +typedef cpuset_t cpu_set_t; +#elif defined(__APPLE__) && defined(__MACH__) +typedef u64 cpu_set_t; +#define CPU_SETSIZE ((64)) +#define CPU_COUNT(__cpu_ptr__) (__builtin_popcountl(*__cpu_ptr__)) +#define CPU_ISSET(__cpu_idx__, __cpu_ptr__) (((*__cpu_ptr__) >> __cpu_idx__) & 1lu) +#define CPU_ZERO(__cpu_ptr__) ((*__cpu_ptr__) = 0) +#define CPU_SET(__cpu_idx__, __cpu_ptr__) ((*__cpu_ptr__) |= (1lu << __cpu_idx__)) +#define CPU_CLR(__cpu_idx__, __cpu_ptr__) ((*__cpu_ptr__) &= ~(1lu << __cpu_idx__)) +#define pthread_attr_setaffinity_np(...) ((void)0) +#endif + +__attribute__((constructor)) + static void +process_init(void) +{ + // Linux's default is 1024 cpus + process_ncpu = (u32)sysconf(_SC_NPROCESSORS_CONF); + if (process_ncpu > CPU_SETSIZE) { + fprintf(stderr, "%s: can use only %zu cores\n", + __func__, (size_t)CPU_SETSIZE); + process_ncpu = CPU_SETSIZE; + } + thread_set_name(pthread_self(), "main"); +} + + static inline int +thread_getaffinity_set(cpu_set_t * const cpuset) +{ +#if defined(__linux__) + return sched_getaffinity(0, sizeof(*cpuset), cpuset); +#elif defined(__FreeBSD__) + return cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(*cpuset), cpuset); +#elif defined(__APPLE__) && defined(__MACH__) + *cpuset = (1lu << process_ncpu) - 1; + return (int)process_ncpu; // TODO +#endif // OS +} + + static inline int +thread_setaffinity_set(const cpu_set_t * const cpuset) +{ +#if defined(__linux__) + return sched_setaffinity(0, sizeof(*cpuset), cpuset); +#elif defined(__FreeBSD__) + return cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(*cpuset), cpuset); +#elif defined(__APPLE__) && defined(__MACH__) + (void)cpuset; // TODO + return 0; +#endif // OS +} + + void +thread_get_name(const pthread_t pt, char * const name, const size_t len) +{ +#if defined(__linux__) + pthread_getname_np(pt, name, len); +#elif defined(__FreeBSD__) + pthread_get_name_np(pt, name, len); +#elif defined(__APPLE__) && defined(__MACH__) + (void)pt; + (void)len; + strcpy(name, "unknown"); // TODO +#endif // OS +} + + void +thread_set_name(const pthread_t pt, const char * const name) +{ +#if defined(__linux__) + pthread_setname_np(pt, name); +#elif defined(__FreeBSD__) + pthread_set_name_np(pt, name); +#elif defined(__APPLE__) && defined(__MACH__) + (void)pt; + (void)name; // TODO +#endif // OS +} + +// kB + long +process_get_rss(void) +{ + struct rusage rs; + getrusage(RUSAGE_SELF, &rs); + return rs.ru_maxrss; +} + + u32 +process_affinity_count(void) +{ + cpu_set_t set; + if (thread_getaffinity_set(&set) != 0) + return process_ncpu; + + const u32 nr = (u32)CPU_COUNT(&set); + return nr ? nr : process_ncpu; +} + + u32 +process_getaffinity_list(const u32 max, u32 * const cores) +{ + memset(cores, 0, max * sizeof(cores[0])); + cpu_set_t set; + if (thread_getaffinity_set(&set) != 0) + return 0; + + const u32 nr_affinity = (u32)CPU_COUNT(&set); + const u32 nr = nr_affinity < max ? nr_affinity : max; + u32 j = 0; + for (u32 i = 0; i < process_ncpu; i++) { + if (CPU_ISSET(i, &set)) + cores[j++] = i; + + if (j >= nr) + break; + } + return j; +} + + void +thread_setaffinity_list(const u32 nr, const u32 * const list) +{ + cpu_set_t set; + CPU_ZERO(&set); + for (u32 i = 0; i < nr; i++) + if (list[i] < process_ncpu) + CPU_SET(list[i], &set); + thread_setaffinity_set(&set); +} + + void +thread_pin(const u32 cpu) +{ + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(cpu % process_ncpu, &set); + thread_setaffinity_set(&set); +} + + u64 +process_cpu_time_usec(void) +{ + struct rusage rs; + getrusage(RUSAGE_SELF, &rs); + const u64 usr = (((u64)rs.ru_utime.tv_sec) * 1000000lu) + ((u64)rs.ru_utime.tv_usec); + const u64 sys = (((u64)rs.ru_stime.tv_sec) * 1000000lu) + ((u64)rs.ru_stime.tv_usec); + return usr + sys; +} + +struct fork_join_info { + u32 total; + u32 ncores; + u32 * cores; + void *(*func)(void *); + bool args; + union { + void * arg1; + void ** argn; + }; + union { + struct { au32 ferr, jerr; }; + au64 xerr; + }; +}; + +// DON'T CHANGE! +#define FORK_JOIN_RANK_BITS ((16)) // 16 +#define FORK_JOIN_MAX ((1u << FORK_JOIN_RANK_BITS)) + +/* + * fj(6): T0 + * / \ + * T0 T4 + * / \ / + * T0 T2 T4 + * / \ / \ / \ + * t0 t1 t2 t3 t4 t5 + */ + +// recursive tree fork-join + static void * +thread_do_fork_join_worker(void * const ptr) +{ + struct entry13 fjp = {.ptr = ptr}; + // GCC: Without explicitly casting from fjp.fji (a 45-bit u64 value), + // the high bits will get truncated, which is always CORRECT in gcc. + // Don't use gcc. + struct fork_join_info * const fji = u64_to_ptr(fjp.e3); + const u32 rank = (u32)fjp.e1; + + const u32 nchild = (u32)__builtin_ctz(rank ? rank : bits_p2_up_u32(fji->total)); + debug_assert(nchild <= FORK_JOIN_RANK_BITS); + pthread_t tids[FORK_JOIN_RANK_BITS]; + if (nchild) { + cpu_set_t set; + CPU_ZERO(&set); + pthread_attr_t attr; + pthread_attr_init(&attr); + //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); // Joinable by default + // fork top-down + for (u32 i = nchild - 1; i < nchild; i--) { + const u32 cr = rank + (1u << i); // child's rank + if (cr >= fji->total) + continue; // should not break + const u32 core = fji->cores[(cr < fji->ncores) ? cr : (cr % fji->ncores)]; + CPU_SET(core, &set); + pthread_attr_setaffinity_np(&attr, sizeof(set), &set); + fjp.e1 = (u16)cr; + const int r = pthread_create(&tids[i], &attr, thread_do_fork_join_worker, fjp.ptr); + CPU_CLR(core, &set); + if (unlikely(r)) { // fork failed + memset(&tids[0], 0, sizeof(tids[0]) * (i+1)); + u32 nmiss = (1u << (i + 1)) - 1; + if ((rank + nmiss) >= fji->total) + nmiss = fji->total - 1 - rank; + (void)atomic_fetch_add_explicit(&fji->ferr, nmiss, MO_RELAXED); + break; + } + } + pthread_attr_destroy(&attr); + } + + char thname0[16]; + char thname1[16]; + thread_get_name(pthread_self(), thname0, 16); + snprintf(thname1, 16, "%.8s_%u", thname0, rank); + thread_set_name(pthread_self(), thname1); + + void * const ret = fji->func(fji->args ? fji->argn[rank] : fji->arg1); + + thread_set_name(pthread_self(), thname0); + // join bottom-up + for (u32 i = 0; i < nchild; i++) { + const u32 cr = rank + (1u << i); // child rank + if (cr >= fji->total) + break; // safe to break + if (tids[i]) { + const int r = pthread_join(tids[i], NULL); + if (unlikely(r)) { // error + //fprintf(stderr, "pthread_join %u..%u = %d: %s\n", rank, cr, r, strerror(r)); + (void)atomic_fetch_add_explicit(&fji->jerr, 1, MO_RELAXED); + } + } + } + return ret; +} + + u64 +thread_fork_join(u32 nr, void *(*func) (void *), const bool args, void * const argx) +{ + if (unlikely(nr > FORK_JOIN_MAX)) { + fprintf(stderr, "%s reduce nr to %u\n", __func__, FORK_JOIN_MAX); + nr = FORK_JOIN_MAX; + } + + u32 cores[CPU_SETSIZE]; + u32 ncores = process_getaffinity_list(process_ncpu, cores); + if (unlikely(ncores == 0)) { // force to use all cores + ncores = process_ncpu; + for (u32 i = 0; i < process_ncpu; i++) + cores[i] = i; + } + if (unlikely(nr == 0)) + nr = ncores; + + // the compiler does not know fji can change since we cast &fji into fjp + struct fork_join_info fji = {.total = nr, .cores = cores, .ncores = ncores, + .func = func, .args = args, .arg1 = argx}; + const struct entry13 fjp = entry13(0, (u64)(&fji)); + + // save current affinity + cpu_set_t set0; + thread_getaffinity_set(&set0); + + // master thread shares thread0's core + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(fji.cores[0], &set); + thread_setaffinity_set(&set); + + const u64 t0 = time_nsec(); + (void)thread_do_fork_join_worker(fjp.ptr); + const u64 dt = time_diff_nsec(t0); + + // restore original affinity + thread_setaffinity_set(&set0); + + // check and report errors (unlikely) + if (atomic_load_explicit(&fji.xerr, MO_CONSUME)) + fprintf(stderr, "%s errors: fork %u join %u\n", __func__, fji.ferr, fji.jerr); + return dt; +} + + int +thread_create_at(const u32 cpu, pthread_t * const thread, + void *(*start_routine) (void *), void * const arg) +{ + const u32 cpu_id = (cpu < process_ncpu) ? cpu : (cpu % process_ncpu); + pthread_attr_t attr; + pthread_attr_init(&attr); + //pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + cpu_set_t set; + + CPU_ZERO(&set); + CPU_SET(cpu_id, &set); + pthread_attr_setaffinity_np(&attr, sizeof(set), &set); + const int r = pthread_create(thread, &attr, start_routine, arg); + pthread_attr_destroy(&attr); + return r; +} +// }}} process/thread + +// locking {{{ + +// spinlock {{{ +#if defined(__linux__) +#define SPINLOCK_PTHREAD +#endif // __linux__ + +#if defined(SPINLOCK_PTHREAD) +static_assert(sizeof(pthread_spinlock_t) <= sizeof(spinlock), "spinlock size"); +#else // SPINLOCK_PTHREAD +static_assert(sizeof(au32) <= sizeof(spinlock), "spinlock size"); +#endif // SPINLOCK_PTHREAD + + void +spinlock_init(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_init(p, PTHREAD_PROCESS_PRIVATE); +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + atomic_store_explicit(p, 0, MO_RELEASE); +#endif // SPINLOCK_PTHREAD +} + + inline void +spinlock_lock(spinlock * const lock) +{ +#if defined(CORR) +#pragma nounroll + while (!spinlock_trylock(lock)) + corr_yield(); +#else // CORR +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_lock(p); // return value ignored +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; +#pragma nounroll + do { + if (atomic_fetch_sub_explicit(p, 1, MO_ACQUIRE) == 0) + return; +#pragma nounroll + do { + cpu_pause(); + } while (atomic_load_explicit(p, MO_CONSUME)); + } while (true); +#endif // SPINLOCK_PTHREAD +#endif // CORR +} + + inline bool +spinlock_trylock(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + return !pthread_spin_trylock(p); +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + return atomic_fetch_sub_explicit(p, 1, MO_ACQUIRE) == 0; +#endif // SPINLOCK_PTHREAD +} + + inline void +spinlock_unlock(spinlock * const lock) +{ +#if defined(SPINLOCK_PTHREAD) + pthread_spinlock_t * const p = (typeof(p))lock; + pthread_spin_unlock(p); // return value ignored +#else // SPINLOCK_PTHREAD + au32 * const p = (typeof(p))lock; + atomic_store_explicit(p, 0, MO_RELEASE); +#endif // SPINLOCK_PTHREAD +} +// }}} spinlock + +// pthread mutex {{{ +static_assert(sizeof(pthread_mutex_t) <= sizeof(mutex), "mutexlock size"); + inline void +mutex_init(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_init(p, NULL); +} + + inline void +mutex_lock(mutex * const lock) +{ +#if defined(CORR) +#pragma nounroll + while (!mutex_trylock(lock)) + corr_yield(); +#else + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_lock(p); // return value ignored +#endif +} + + inline bool +mutex_trylock(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + return !pthread_mutex_trylock(p); // return value ignored +} + + inline void +mutex_unlock(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_unlock(p); // return value ignored +} + + inline void +mutex_deinit(mutex * const lock) +{ + pthread_mutex_t * const p = (typeof(p))lock; + pthread_mutex_destroy(p); +} +// }}} pthread mutex + +// rwdep {{{ +// poor man's lockdep for rwlock +// per-thread lock list +// it calls debug_die() when local double-(un)locking is detected +// cyclic dependencies can be manually identified by looking at the two lists below in gdb +#ifdef RWDEP +#define RWDEP_NR ((16)) +__thread const rwlock * rwdep_readers[RWDEP_NR] = {}; +__thread const rwlock * rwdep_writers[RWDEP_NR] = {}; + + static void +rwdep_check(const rwlock * const lock) +{ + debug_assert(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == lock) + debug_die(); + if (rwdep_writers[i] == lock) + debug_die(); + } +} +#endif // RWDEP + + static void +rwdep_lock_read(const rwlock * const lock) +{ +#ifdef RWDEP + rwdep_check(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == NULL) { + rwdep_readers[i] = lock; + return; + } + } +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_unlock_read(const rwlock * const lock) +{ +#ifdef RWDEP + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_readers[i] == lock) { + rwdep_readers[i] = NULL; + return; + } + } + debug_die(); +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_lock_write(const rwlock * const lock) +{ +#ifdef RWDEP + rwdep_check(lock); + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_writers[i] == NULL) { + rwdep_writers[i] = lock; + return; + } + } +#else + (void)lock; +#endif // RWDEP +} + + static void +rwdep_unlock_write(const rwlock * const lock) +{ +#ifdef RWDEP + for (u64 i = 0; i < RWDEP_NR; i++) { + if (rwdep_writers[i] == lock) { + rwdep_writers[i] = NULL; + return; + } + } + debug_die(); +#else + (void)lock; +#endif // RWDEP +} +// }}} rwlockdep + +// rwlock {{{ +typedef au32 lock_t; +typedef u32 lock_v; +static_assert(sizeof(lock_t) == sizeof(lock_v), "lock size"); +static_assert(sizeof(lock_t) <= sizeof(rwlock), "lock size"); + +#define RWLOCK_WSHIFT ((sizeof(lock_t) * 8 - 1)) +#define RWLOCK_WBIT ((((lock_v)1) << RWLOCK_WSHIFT)) + + inline void +rwlock_init(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + atomic_store_explicit(pvar, 0, MO_RELEASE); +} + + inline bool +rwlock_trylock_read(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + if ((atomic_fetch_add_explicit(pvar, 1, MO_ACQUIRE) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } else { + atomic_fetch_sub_explicit(pvar, 1, MO_RELAXED); + return false; + } +} + + inline bool +rwlock_trylock_read_lp(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + if (atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT) { + cpu_pause(); + return false; + } + return rwlock_trylock_read(lock); +} + +// actually nr + 1 + inline bool +rwlock_trylock_read_nr(rwlock * const lock, u16 nr) +{ + lock_t * const pvar = (typeof(pvar))lock; + if ((atomic_fetch_add_explicit(pvar, 1, MO_ACQUIRE) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } + +#pragma nounroll + do { // someone already locked; wait for a little while + cpu_pause(); + if ((atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT) == 0) { + rwdep_lock_read(lock); + return true; + } + } while (nr--); + + atomic_fetch_sub_explicit(pvar, 1, MO_RELAXED); + return false; +} + + inline void +rwlock_lock_read(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; +#pragma nounroll + do { + if (rwlock_trylock_read(lock)) + return; +#pragma nounroll + do { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } while (atomic_load_explicit(pvar, MO_CONSUME) >> RWLOCK_WSHIFT); + } while (true); +} + + inline void +rwlock_unlock_read(rwlock * const lock) +{ + rwdep_unlock_read(lock); + lock_t * const pvar = (typeof(pvar))lock; + atomic_fetch_sub_explicit(pvar, 1, MO_RELEASE); +} + + inline bool +rwlock_trylock_write(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + lock_v v0 = atomic_load_explicit(pvar, MO_CONSUME); + if ((v0 == 0) && atomic_compare_exchange_weak_explicit(pvar, &v0, RWLOCK_WBIT, MO_ACQUIRE, MO_RELAXED)) { + rwdep_lock_write(lock); + return true; + } else { + return false; + } +} + +// actually nr + 1 + inline bool +rwlock_trylock_write_nr(rwlock * const lock, u16 nr) +{ +#pragma nounroll + do { + if (rwlock_trylock_write(lock)) + return true; + cpu_pause(); + } while (nr--); + return false; +} + + inline void +rwlock_lock_write(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; +#pragma nounroll + do { + if (rwlock_trylock_write(lock)) + return; +#pragma nounroll + do { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } while (atomic_load_explicit(pvar, MO_CONSUME)); + } while (true); +} + + inline bool +rwlock_trylock_write_hp(rwlock * const lock) +{ + lock_t * const pvar = (typeof(pvar))lock; + lock_v v0 = atomic_load_explicit(pvar, MO_CONSUME); + if (v0 >> RWLOCK_WSHIFT) + return false; + + if (atomic_compare_exchange_weak_explicit(pvar, &v0, v0|RWLOCK_WBIT, MO_ACQUIRE, MO_RELAXED)) { + rwdep_lock_write(lock); + // WBIT successfully marked; must wait for readers to leave + if (v0) { // saw active readers +#pragma nounroll + while (atomic_load_explicit(pvar, MO_CONSUME) != RWLOCK_WBIT) { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } + } + return true; + } else { + return false; + } +} + + inline bool +rwlock_trylock_write_hp_nr(rwlock * const lock, u16 nr) +{ +#pragma nounroll + do { + if (rwlock_trylock_write_hp(lock)) + return true; + cpu_pause(); + } while (nr--); + return false; +} + + inline void +rwlock_lock_write_hp(rwlock * const lock) +{ +#pragma nounroll + while (!rwlock_trylock_write_hp(lock)) { +#if defined(CORR) + corr_yield(); +#else + cpu_pause(); +#endif + } +} + + inline void +rwlock_unlock_write(rwlock * const lock) +{ + rwdep_unlock_write(lock); + lock_t * const pvar = (typeof(pvar))lock; + atomic_fetch_sub_explicit(pvar, RWLOCK_WBIT, MO_RELEASE); +} + + inline void +rwlock_write_to_read(rwlock * const lock) +{ + rwdep_unlock_write(lock); + rwdep_lock_read(lock); + lock_t * const pvar = (typeof(pvar))lock; + // +R -W + atomic_fetch_add_explicit(pvar, ((lock_v)1) - RWLOCK_WBIT, MO_ACQ_REL); +} + +#undef RWLOCK_WSHIFT +#undef RWLOCK_WBIT +// }}} rwlock + +// }}} locking + +// coroutine {{{ + +// asm {{{ +#if defined(__x86_64__) +// number pushes in co_switch_stack +#define CO_CONTEXT_SIZE ((6)) + +// for switch/exit: pass a return value to the target +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_switch_stack;" + ".type co_switch_stack, @function;" + "co_switch_stack:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_switch_stack;" + "_co_switch_stack:" +#else +#error Supported platforms: Linux/FreeBSD/Apple +#endif // OS + "push %rbp; push %rbx; push %r12;" + "push %r13; push %r14; push %r15;" + "mov %rsp, (%rdi);" + "mov %rsi, %rsp;" + "pop %r15; pop %r14; pop %r13;" + "pop %r12; pop %rbx; pop %rbp;" + "mov %rdx, %rax;" + "retq;" + ); + +#elif defined(__aarch64__) +// number pushes in co_switch_stack +#define CO_CONTEXT_SIZE ((20)) +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_switch_stack;" + ".type co_switch_stack, @function;" + "co_switch_stack:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_switch_stack;" + "_co_switch_stack:" +#else +#error supported platforms: Linux/FreeBSD/Apple +#endif // OS + "sub x8, sp, 160;" + "str x8, [x0];" + "stp x30, x19, [x8]; ldp x30, x19, [x1];" + "stp x20, x21, [x8, 16]; ldp x20, x21, [x1, 16];" + "stp x22, x23, [x8, 32]; ldp x22, x23, [x1, 32];" + "stp x24, x25, [x8, 48]; ldp x24, x25, [x1, 48];" + "stp x26, x27, [x8, 64]; ldp x26, x27, [x1, 64];" + "stp x28, x29, [x8, 80]; ldp x28, x29, [x1, 80];" + "stp d8, d9, [x8, 96]; ldp d8, d9, [x1, 96];" + "stp d10, d11, [x8, 112]; ldp d10, d11, [x1, 112];" + "stp d12, d13, [x8, 128]; ldp d12, d13, [x1, 128];" + "stp d14, d15, [x8, 144]; ldp d14, d15, [x1, 144];" + "add sp, x1, 160;" + "mov x0, x2;" + "br x30;" + ); + +extern void co_entry_aarch64(void); +asm ( + ".align 16;" +#if defined(__linux__) || defined(__FreeBSD__) + ".global co_entry_aarch64;" + ".type co_entry_aarch64, @function;" + "co_entry_aarch64:" +#elif defined(__APPLE__) && defined(__MACH__) + ".global _co_entry_aarch64;" + "_co_entry_aarch64:" +#else +#error supported platforms: Linux/FreeBSD/Apple +#endif // OS + "ldr x8, [sp, 0];" + "blr x8;" + "ldr x8, [sp, 8];" + "blr x8;" + "ldr x8, [sp, 16];" + "blr x8;" + ); +#else +#error supported CPUs: x86_64 or AArch64 +#endif // co_switch_stack x86_64 and aarch64 +// }}} asm + +// co {{{ +struct co { + u64 rsp; + void * priv; + u64 * host; // set host to NULL to exit + size_t stksz; +}; + +// not atomic: no concurrent access +// volatile: avoid caching of co_curr +static __thread struct co * volatile co_curr = NULL; // NULL in host + +// the stack sits under the struct co + static void +co_init(struct co * const co, void * func, void * priv, u64 * const host, + const u64 stksz, void * func_exit) +{ + debug_assert((stksz & 0x3f) == 0); // a multiple of 64 bytes + u64 * rsp = ((u64 *)co) - 4; + rsp[0] = (u64)func; + rsp[1] = (u64)func_exit; + rsp[2] = (u64)debug_die; + rsp[3] = 0; + + rsp -= CO_CONTEXT_SIZE; + +#if defined(__aarch64__) + rsp[0] = (u64)co_entry_aarch64; +#endif + + co->rsp = (u64)rsp; + co->priv = priv; + co->host = host; + co->stksz = stksz; +} + + static void +co_exit0(void) +{ + co_exit(0); +} + + struct co * +co_create(const u64 stacksize, void * func, void * priv, u64 * const host) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct co); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct co * const co = (typeof(co))(mem + stksz); + co_init(co, func, priv, host, stksz, co_exit0); + return co; +} + + inline void +co_reuse(struct co * const co, void * func, void * priv, u64 * const host) +{ + co_init(co, func, priv, host, co->stksz, co_exit0); +} + + inline struct co * +co_fork(void * func, void * priv) +{ + return co_curr ? co_create(co_curr->stksz, func, priv, co_curr->host) : NULL; +} + + inline void * +co_priv(void) +{ + return co_curr ? co_curr->priv : NULL; +} + +// the host calls this to enter a coroutine. + inline u64 +co_enter(struct co * const to, const u64 retval) +{ + debug_assert(co_curr == NULL); // must entry from the host + debug_assert(to && to->host); + u64 * const save = to->host; + co_curr = to; + const u64 ret = co_switch_stack(save, to->rsp, retval); + co_curr = NULL; + return ret; +} + +// switch from a coroutine to another coroutine +// co_curr must be valid +// the target will resume and receive the retval + inline u64 +co_switch_to(struct co * const to, const u64 retval) +{ + debug_assert(co_curr); + debug_assert(co_curr != to); + debug_assert(to && to->host); + struct co * const save = co_curr; + co_curr = to; + return co_switch_stack(&(save->rsp), to->rsp, retval); +} + +// switch from a coroutine to the host routine +// co_yield is now a c++ keyword... + inline u64 +co_back(const u64 retval) +{ + debug_assert(co_curr); + struct co * const save = co_curr; + co_curr = NULL; + return co_switch_stack(&(save->rsp), *(save->host), retval); +} + +#ifdef CO_STACK_CHECK + static void +co_stack_check(const u8 * const mem, const u64 stksz) +{ + const u64 * const mem64 = (typeof(mem64))mem; + const u64 size64 = stksz / sizeof(u64); + for (u64 i = 0; i < size64; i++) { + if (mem64[i] != 0x5c5c5c5c5c5c5c5clu) { + fprintf(stderr, "%s co stack usage: %lu/%lu\n", __func__, stksz - (i * sizeof(u64)), stksz); + break; + } + } +} +#endif // CO_STACK_CHECK + +// return to host and set host to NULL +__attribute__((noreturn)) + void +co_exit(const u64 retval) +{ + debug_assert(co_curr); +#ifdef CO_STACK_CHECK + const u64 stksz = co_curr->stksz; + u8 * const mem = ((u8 *)co_curr) - stksz; + co_stack_check(mem, stksz); +#endif // CO_STACK_CHECK + const u64 hostrsp = *(co_curr->host); + co_curr->host = NULL; + struct co * const save = co_curr; + co_curr = NULL; + (void)co_switch_stack(&(save->rsp), hostrsp, retval); + // return to co_enter + debug_die(); +} + +// host is set to NULL on exit + inline bool +co_valid(struct co * const co) +{ + return co->host != NULL; +} + +// return NULL on host + inline struct co * +co_self(void) +{ + return co_curr; +} + + inline void +co_destroy(struct co * const co) +{ + u8 * const mem = ((u8 *)co) - co->stksz; + free(mem); +} +// }}} co + +// corr {{{ +struct corr { + struct co co; + struct corr * next; + struct corr * prev; +}; + +// initial and link guest to the run-queue + struct corr * +corr_create(const u64 stacksize, void * func, void * priv, u64 * const host) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct corr); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct corr * const co = (typeof(co))(mem + stksz); + co_init(&(co->co), func, priv, host, stksz, corr_exit); + co->next = co; + co->prev = co; + return co; +} + + struct corr * +corr_link(const u64 stacksize, void * func, void * priv, struct corr * const prev) +{ + const u64 stksz = bits_round_up(stacksize, 6); + const size_t alloc_size = stksz + sizeof(struct corr); + u8 * const mem = yalloc(alloc_size); + if (mem == NULL) + return NULL; + +#ifdef CO_STACK_CHECK + memset(mem, 0x5c, stksz); +#endif // CO_STACK_CHECK + + struct corr * const co = (typeof(co))(mem + stksz); + co_init(&(co->co), func, priv, prev->co.host, stksz, corr_exit); + co->next = prev->next; + co->prev = prev; + co->prev->next = co; + co->next->prev = co; + return co; +} + + inline void +corr_reuse(struct corr * const co, void * func, void * priv, u64 * const host) +{ + co_init(&(co->co), func, priv, host, co->co.stksz, corr_exit); + co->next = co; + co->prev = co; +} + + inline void +corr_relink(struct corr * const co, void * func, void * priv, struct corr * const prev) +{ + co_init(&(co->co), func, priv, prev->co.host, co->co.stksz, corr_exit); + co->next = prev->next; + co->prev = prev; + co->prev->next = co; + co->next->prev = co; +} + + inline void +corr_enter(struct corr * const co) +{ + (void)co_enter(&(co->co), 0); +} + + inline void +corr_yield(void) +{ + struct corr * const curr = (typeof(curr))co_curr; + if (curr && (curr->next != curr)) + (void)co_switch_to(&(curr->next->co), 0); +} + +__attribute__((noreturn)) + inline void +corr_exit(void) +{ + debug_assert(co_curr); +#ifdef CO_STACK_CHECK + const u64 stksz = co_curr->stksz; + const u8 * const mem = ((u8 *)(co_curr)) - stksz; + co_stack_check(mem, stksz); +#endif // CO_STACK_CHECK + + struct corr * const curr = (typeof(curr))co_curr; + if (curr->next != curr) { // have more corr + struct corr * const next = curr->next; + struct corr * const prev = curr->prev; + next->prev = prev; + prev->next = next; + curr->next = NULL; + curr->prev = NULL; + curr->co.host = NULL; // invalidate + (void)co_switch_to(&(next->co), 0); + } else { // the last corr + co_exit0(); + } + debug_die(); +} + + inline void +corr_destroy(struct corr * const co) +{ + co_destroy(&(co->co)); +} +// }}} corr + +// }}} co + +// bits {{{ + inline u32 +bits_reverse_u32(const u32 v) +{ + const u32 v2 = __builtin_bswap32(v); + const u32 v3 = ((v2 & 0xf0f0f0f0u) >> 4) | ((v2 & 0x0f0f0f0fu) << 4); + const u32 v4 = ((v3 & 0xccccccccu) >> 2) | ((v3 & 0x33333333u) << 2); + const u32 v5 = ((v4 & 0xaaaaaaaau) >> 1) | ((v4 & 0x55555555u) << 1); + return v5; +} + + inline u64 +bits_reverse_u64(const u64 v) +{ + const u64 v2 = __builtin_bswap64(v); + const u64 v3 = ((v2 & 0xf0f0f0f0f0f0f0f0lu) >> 4) | ((v2 & 0x0f0f0f0f0f0f0f0flu) << 4); + const u64 v4 = ((v3 & 0xcccccccccccccccclu) >> 2) | ((v3 & 0x3333333333333333lu) << 2); + const u64 v5 = ((v4 & 0xaaaaaaaaaaaaaaaalu) >> 1) | ((v4 & 0x5555555555555555lu) << 1); + return v5; +} + + inline u64 +bits_rotl_u64(const u64 v, const u8 n) +{ + const u8 sh = n & 0x3f; + return (v << sh) | (v >> (64 - sh)); +} + + inline u64 +bits_rotr_u64(const u64 v, const u8 n) +{ + const u8 sh = n & 0x3f; + return (v >> sh) | (v << (64 - sh)); +} + + inline u32 +bits_rotl_u32(const u32 v, const u8 n) +{ + const u8 sh = n & 0x1f; + return (v << sh) | (v >> (32 - sh)); +} + + inline u32 +bits_rotr_u32(const u32 v, const u8 n) +{ + const u8 sh = n & 0x1f; + return (v >> sh) | (v << (32 - sh)); +} + + inline u64 +bits_p2_up_u64(const u64 v) +{ + // clz(0) is undefined + return (v > 1) ? (1lu << (64 - __builtin_clzl(v - 1lu))) : v; +} + + inline u32 +bits_p2_up_u32(const u32 v) +{ + // clz(0) is undefined + return (v > 1) ? (1u << (32 - __builtin_clz(v - 1u))) : v; +} + + inline u64 +bits_p2_down_u64(const u64 v) +{ + return v ? (1lu << (63 - __builtin_clzl(v))) : v; +} + + inline u32 +bits_p2_down_u32(const u32 v) +{ + return v ? (1u << (31 - __builtin_clz(v))) : v; +} + + inline u64 +bits_round_up(const u64 v, const u8 power) +{ + return (v + (1lu << power) - 1lu) >> power << power; +} + + inline u64 +bits_round_up_a(const u64 v, const u64 a) +{ + return (v + a - 1) / a * a; +} + + inline u64 +bits_round_down(const u64 v, const u8 power) +{ + return v >> power << power; +} + + inline u64 +bits_round_down_a(const u64 v, const u64 a) +{ + return v / a * a; +} +// }}} bits + +// vi128 {{{ +#if defined(__GNUC__) && __GNUC__ >= 7 +#define FALLTHROUGH __attribute__ ((fallthrough)) +#else +#define FALLTHROUGH ((void)0) +#endif /* __GNUC__ >= 7 */ + + inline u32 +vi128_estimate_u32(const u32 v) +{ + static const u8 t[] = {5,5,5,5, + 4,4,4,4,4,4,4, 3,3,3,3,3,3,3, + 2,2,2,2,2,2,2, 1,1,1,1,1,1,1}; + return v ? t[__builtin_clz(v)] : 2; + // 0 -> [0x80 0x00] the first byte is non-zero + + // nz bit range -> enc length offset in t[] + // 0 -> 2 special case + // 1 to 7 -> 1 31 to 25 + // 8 to 14 -> 2 24 to 18 + // 15 to 21 -> 3 17 to 11 + // 22 to 28 -> 4 10 to 4 + // 29 to 32 -> 5 3 to 0 +} + + u8 * +vi128_encode_u32(u8 * dst, u32 v) +{ + switch (vi128_estimate_u32(v)) { + case 5: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 4: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 3: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 2: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 1: + *(dst++) = (u8)v; + break; + default: + debug_die(); + break; + } + return dst; +} + + const u8 * +vi128_decode_u32(const u8 * src, u32 * const out) +{ + debug_assert(*src); + u32 r = 0; + for (u32 shift = 0; shift < 32; shift += 7) { + const u8 byte = *(src++); + r |= (((u32)(byte & 0x7f)) << shift); + if ((byte & 0x80) == 0) { // No more bytes to consume + *out = r; + return src; + } + } + *out = 0; + return NULL; // invalid +} + + inline u32 +vi128_estimate_u64(const u64 v) +{ + static const u8 t[] = {10, + 9,9,9,9,9,9,9, 8,8,8,8,8,8,8, 7,7,7,7,7,7,7, + 6,6,6,6,6,6,6, 5,5,5,5,5,5,5, 4,4,4,4,4,4,4, + 3,3,3,3,3,3,3, 2,2,2,2,2,2,2, 1,1,1,1,1,1,1}; + return v ? t[__builtin_clzl(v)] : 2; +} + +// return ptr after the generated bytes + u8 * +vi128_encode_u64(u8 * dst, u64 v) +{ + switch (vi128_estimate_u64(v)) { + case 10: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 9: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 8: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 7: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 6: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 5: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 4: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 3: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 2: + *(dst++) = (u8)(v | 0x80); v >>= 7; FALLTHROUGH; + case 1: + *(dst++) = (u8)v; + break; + default: + debug_die(); + break; + } + return dst; +} + +// return ptr after the consumed bytes + const u8 * +vi128_decode_u64(const u8 * src, u64 * const out) +{ + u64 r = 0; + for (u32 shift = 0; shift < 64; shift += 7) { + const u8 byte = *(src++); + r |= (((u64)(byte & 0x7f)) << shift); + if ((byte & 0x80) == 0) { // No more bytes to consume + *out = r; + return src; + } + } + *out = 0; + return NULL; // invalid +} + +#undef FALLTHROUGH +// }}} vi128 + +// misc {{{ + inline struct entry13 +entry13(const u16 e1, const u64 e3) +{ + debug_assert((e3 >> 48) == 0); + return (struct entry13){.v64 = (e3 << 16) | e1}; +} + + inline void +entry13_update_e3(struct entry13 * const e, const u64 e3) +{ + debug_assert((e3 >> 48) == 0); + *e = entry13(e->e1, e3); +} + + inline void * +u64_to_ptr(const u64 v) +{ + return (void *)v; +} + + inline u64 +ptr_to_u64(const void * const ptr) +{ + return (u64)ptr; +} + +// portable malloc_usable_size + inline size_t +m_usable_size(void * const ptr) +{ +#if defined(__linux__) || defined(__FreeBSD__) + const size_t sz = malloc_usable_size(ptr); +#elif defined(__APPLE__) && defined(__MACH__) + const size_t sz = malloc_size(ptr); +#endif // OS + +#ifndef HEAPCHECKING + // valgrind and asan may return unaligned usable size + debug_assert((sz & 0x7lu) == 0); +#endif // HEAPCHECKING + + return sz; +} + + inline size_t +fdsize(const int fd) +{ + struct stat st; + st.st_size = 0; + if (fstat(fd, &st) != 0) + return 0; + + if (S_ISBLK(st.st_mode)) { +#if defined(__linux__) + ioctl(fd, BLKGETSIZE64, &st.st_size); +#elif defined(__APPLE__) && defined(__MACH__) + u64 blksz = 0; + u64 nblks = 0; + ioctl(fd, DKIOCGETBLOCKSIZE, &blksz); + ioctl(fd, DKIOCGETBLOCKCOUNT, &nblks); + st.st_size = (ssize_t)(blksz * nblks); +#elif defined(__FreeBSD__) + ioctl(fd, DIOCGMEDIASIZE, &st.st_size); +#endif // OS + } + + return (size_t)st.st_size; +} + + u32 +memlcp(const u8 * const p1, const u8 * const p2, const u32 max) +{ + const u32 max64 = max & (~7u); + u32 clen = 0; + while (clen < max64) { + const u64 v1 = *(const u64 *)(p1+clen); + const u64 v2 = *(const u64 *)(p2+clen); + const u64 x = v1 ^ v2; + if (x) + return clen + (u32)(__builtin_ctzl(x) >> 3); + + clen += sizeof(u64); + } + + if ((clen + sizeof(u32)) <= max) { + const u32 v1 = *(const u32 *)(p1+clen); + const u32 v2 = *(const u32 *)(p2+clen); + const u32 x = v1 ^ v2; + if (x) + return clen + (u32)(__builtin_ctz(x) >> 3); + + clen += sizeof(u32); + } + + while ((clen < max) && (p1[clen] == p2[clen])) + clen++; + return clen; +} + +static double logger_t0 = 0.0; + +__attribute__((constructor)) + static void +logger_init(void) +{ + logger_t0 = time_sec(); +} + +__attribute__ ((format (printf, 2, 3))) + void +logger_printf(const int fd, const char * const fmt, ...) +{ + char buf[4096]; + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + dprintf(fd, "%010.3lf %08x %s", time_diff_sec(logger_t0), crc32c_u64(0x12345678, (u64)pthread_self()), buf); +} +// }}} misc + +// astk {{{ +// atomic stack +struct acell { struct acell * next; }; + +// extract ptr from m value + static inline struct acell * +astk_ptr(const u64 m) +{ + return (struct acell *)(m >> 16); +} + +// calculate the new magic + static inline u64 +astk_m1(const u64 m0, struct acell * const ptr) +{ + return ((m0 + 1) & 0xfffflu) | (((u64)ptr) << 16); +} + +// calculate the new magic + static inline u64 +astk_m1_unsafe(struct acell * const ptr) +{ + return ((u64)ptr) << 16; +} + + static bool +astk_try_push(au64 * const pmagic, struct acell * const first, struct acell * const last) +{ + u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + last->next = astk_ptr(m0); + const u64 m1 = astk_m1(m0, first); + return atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_RELEASE, MO_RELAXED); +} + + static void +astk_push_safe(au64 * const pmagic, struct acell * const first, struct acell * const last) +{ + while (!astk_try_push(pmagic, first, last)); +} + + static void +astk_push_unsafe(au64 * const pmagic, struct acell * const first, + struct acell * const last) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + last->next = astk_ptr(m0); + const u64 m1 = astk_m1_unsafe(first); + atomic_store_explicit(pmagic, m1, MO_RELAXED); +} + +//// can fail for two reasons: (1) NULL: no available object; (2) ~0lu: contention +// static void * +//astk_try_pop(au64 * const pmagic) +//{ +// u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); +// struct acell * const ret = astk_ptr(m0); +// if (ret == NULL) +// return NULL; +// +// const u64 m1 = astk_m1(m0, ret->next); +// if (atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_ACQUIRE, MO_RELAXED)) +// return ret; +// else +// return (void *)(~0lu); +//} + + static void * +astk_pop_safe(au64 * const pmagic) +{ + do { + u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + struct acell * const ret = astk_ptr(m0); + if (ret == NULL) + return NULL; + + const u64 m1 = astk_m1(m0, ret->next); + if (atomic_compare_exchange_weak_explicit(pmagic, &m0, m1, MO_ACQUIRE, MO_RELAXED)) + return ret; + } while (true); +} + + static void * +astk_pop_unsafe(au64 * const pmagic) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + struct acell * const ret = astk_ptr(m0); + if (ret == NULL) + return NULL; + + const u64 m1 = astk_m1_unsafe(ret->next); + atomic_store_explicit(pmagic, m1, MO_RELAXED); + return (void *)ret; +} + + static void * +astk_peek_unsafe(au64 * const pmagic) +{ + const u64 m0 = atomic_load_explicit(pmagic, MO_CONSUME); + return astk_ptr(m0); +} +// }}} astk + +// slab {{{ +#define SLAB_OBJ0_OFFSET ((64)) +struct slab { + au64 magic; // hi 48: ptr, lo 16: seq + u64 padding1[7]; + + // 2nd line + struct acell * head_active; // list of blocks in use or in magic + struct acell * head_backup; // list of unused full blocks + u64 nr_ready; // UNSAFE only! number of objects under magic + u64 padding2[5]; + + // 3rd line const + u64 obj_size; // const: aligned size of each object + u64 blk_size; // const: size of each memory block + u64 objs_per_slab; // const: number of objects in a slab + u64 obj0_offset; // const: offset of the first object in a block + u64 padding3[4]; + + // 4th line + union { + mutex lock; + u64 padding4[8]; + }; +}; +static_assert(sizeof(struct slab) == 256, "sizeof(struct slab) != 256"); + + static void +slab_add(struct slab * const slab, struct acell * const blk, const bool is_safe) +{ + // insert into head_active + blk->next = slab->head_active; + slab->head_active = blk; + + u8 * const base = ((u8 *)blk) + slab->obj0_offset; + struct acell * iter = (typeof(iter))base; // [0] + for (u64 i = 1; i < slab->objs_per_slab; i++) { + struct acell * const next = (typeof(next))(base + (i * slab->obj_size)); + iter->next = next; + iter = next; + } + + // base points to the first block; iter points to the last block + if (is_safe) { // other threads can poll magic + astk_push_safe(&slab->magic, (struct acell *)base, iter); + } else { // unsafe + astk_push_unsafe(&slab->magic, (struct acell *)base, iter); + slab->nr_ready += slab->objs_per_slab; + } +} + +// critical section; call with lock + static bool +slab_expand(struct slab * const slab, const bool is_safe) +{ + struct acell * const old = slab->head_backup; + if (old) { // pop old from backup and add + slab->head_backup = old->next; + slab_add(slab, old, is_safe); + } else { // more core + size_t blk_size; + struct acell * const new = pages_alloc_best(slab->blk_size, true, &blk_size); + (void)blk_size; + if (new == NULL) + return false; + + slab_add(slab, new, is_safe); + } + return true; +} + +// return 0 on failure; otherwise, obj0_offset + static u64 +slab_check_sizes(const u64 obj_size, const u64 blk_size) +{ + // obj must be non-zero and 8-byte aligned + // blk must be at least of page size and power of 2 + if ((!obj_size) || (obj_size % 8lu) || (blk_size < 4096lu) || (blk_size & (blk_size - 1))) + return 0; + + // each slab should have at least one object + const u64 obj0_offset = (obj_size & (obj_size - 1)) ? SLAB_OBJ0_OFFSET : obj_size; + if (obj0_offset >= blk_size || (blk_size - obj0_offset) < obj_size) + return 0; + + return obj0_offset; +} + + static void +slab_init_internal(struct slab * const slab, const u64 obj_size, const u64 blk_size, const u64 obj0_offset) +{ + memset(slab, 0, sizeof(*slab)); + slab->obj_size = obj_size; + slab->blk_size = blk_size; + slab->objs_per_slab = (blk_size - obj0_offset) / obj_size; + debug_assert(slab->objs_per_slab); // >= 1 + slab->obj0_offset = obj0_offset; + mutex_init(&(slab->lock)); +} + + struct slab * +slab_create(const u64 obj_size, const u64 blk_size) +{ + const u64 obj0_offset = slab_check_sizes(obj_size, blk_size); + if (!obj0_offset) + return NULL; + + struct slab * const slab = yalloc(sizeof(*slab)); + if (slab == NULL) + return NULL; + + slab_init_internal(slab, obj_size, blk_size, obj0_offset); + return slab; +} + +// unsafe + bool +slab_reserve_unsafe(struct slab * const slab, const u64 nr) +{ + while (slab->nr_ready < nr) + if (!slab_expand(slab, false)) + return false; + return true; +} + + void * +slab_alloc_unsafe(struct slab * const slab) +{ + void * ret = astk_pop_unsafe(&slab->magic); + if (ret == NULL) { + if (!slab_expand(slab, false)) + return NULL; + ret = astk_pop_unsafe(&slab->magic); + } + debug_assert(ret); + slab->nr_ready--; + return ret; +} + + void * +slab_alloc_safe(struct slab * const slab) +{ + void * ret = astk_pop_safe(&slab->magic); + if (ret) + return ret; + + mutex_lock(&slab->lock); + do { + ret = astk_pop_safe(&slab->magic); // may already have new objs + if (ret) + break; + if (!slab_expand(slab, true)) + break; + } while (true); + mutex_unlock(&slab->lock); + return ret; +} + + void +slab_free_unsafe(struct slab * const slab, void * const ptr) +{ + debug_assert(ptr); + astk_push_unsafe(&slab->magic, ptr, ptr); + slab->nr_ready++; +} + + void +slab_free_safe(struct slab * const slab, void * const ptr) +{ + astk_push_safe(&slab->magic, ptr, ptr); +} + +// UNSAFE + void +slab_free_all(struct slab * const slab) +{ + slab->magic = 0; + slab->nr_ready = 0; // backup does not count + + if (slab->head_active) { + struct acell * iter = slab->head_active; + while (iter->next) + iter = iter->next; + // now iter points to the last blk + iter->next = slab->head_backup; // active..backup + slab->head_backup = slab->head_active; // backup gets all + slab->head_active = NULL; // empty active + } +} + +// unsafe + u64 +slab_get_nalloc(struct slab * const slab) +{ + struct acell * iter = slab->head_active; + u64 n = 0; + while (iter) { + n++; + iter = iter->next; + } + n *= slab->objs_per_slab; + + iter = astk_peek_unsafe(&slab->magic); + while (iter) { + n--; + iter = iter->next; + } + return n; +} + + static void +slab_deinit(struct slab * const slab) +{ + debug_assert(slab); + struct acell * iter = slab->head_active; + while (iter) { + struct acell * const next = iter->next; + pages_unmap(iter, slab->blk_size); + iter = next; + } + iter = slab->head_backup; + while (iter) { + struct acell * const next = iter->next; + pages_unmap(iter, slab->blk_size); + iter = next; + } +} + + void +slab_destroy(struct slab * const slab) +{ + slab_deinit(slab); + free(slab); +} +// }}} slab + +// string {{{ +static union { u16 v16; u8 v8[2]; } strdec_table[100]; + +__attribute__((constructor)) + static void +strdec_init(void) +{ + for (u8 i = 0; i < 100; i++) { + const u8 hi = (typeof(hi))('0' + (i / 10)); + const u8 lo = (typeof(lo))('0' + (i % 10)); + strdec_table[i].v8[0] = hi; + strdec_table[i].v8[1] = lo; + } +} + +// output 10 bytes + void +strdec_32(void * const out, const u32 v) +{ + u32 vv = v; + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 4; i <= 4; i--) { // x5 + ptr[i] = strdec_table[vv % 100].v16; + vv /= 100u; + } +} + +// output 20 bytes + void +strdec_64(void * const out, const u64 v) +{ + u64 vv = v; + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 9; i <= 9; i--) { // x10 + ptr[i] = strdec_table[vv % 100].v16; + vv /= 100; + } +} + +static const u8 strhex_table_16[16] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + +#if defined(__x86_64__) + static inline m128 +strhex_helper(const u64 v) +{ + static const u8 mask1[16] = {15,7,14,6,13,5,12,4,11,3,10,2,9,1,8,0}; + + const m128 tmp = _mm_set_epi64x((s64)(v>>4), (s64)v); // mm want s64 + const m128 hilo = _mm_and_si128(tmp, _mm_set1_epi8(0xf)); + const m128 bin = _mm_shuffle_epi8(hilo, _mm_load_si128((void *)mask1)); + const m128 str = _mm_shuffle_epi8(_mm_load_si128((const void *)strhex_table_16), bin); + return str; +} +#elif defined(__aarch64__) + static inline m128 +strhex_helper(const u64 v) +{ + static const u8 mask1[16] = {15,7,14,6,13,5,12,4,11,3,10,2,9,1,8,0}; + u64 v2[2] = {v, v>>4}; + const m128 tmp = vld1q_u8((u8 *)v2); + const m128 hilo = vandq_u8(tmp, vdupq_n_u8(0xf)); + const m128 bin = vqtbl1q_u8(hilo, vld1q_u8(mask1)); + const m128 str = vqtbl1q_u8(vld1q_u8(strhex_table_16), bin); + return str; +} +#else +static u16 strhex_table_256[256]; + +__attribute__((constructor)) + static void +strhex_init(void) +{ + for (u64 i = 0; i < 256; i++) + strhex_table_256[i] = (((u16)strhex_table_16[i & 0xf]) << 8) | (strhex_table_16[i>>4]); +} +#endif // __x86_64__ + +// output 8 bytes + void +strhex_32(void * const out, u32 v) +{ +#if defined(__x86_64__) + const m128 str = strhex_helper((u64)v); + _mm_storel_epi64(out, _mm_srli_si128(str, 8)); +#elif defined(__aarch64__) + const m128 str = strhex_helper((u64)v); + vst1q_lane_u64(out, vreinterpretq_u64_u8(str), 1); +#else + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 0; i < 4; i++) { + ptr[3-i] = strhex_table_256[v & 0xff]; + v >>= 8; + } +#endif +} + +// output 16 bytes // buffer must be aligned by 16B + void +strhex_64(void * const out, u64 v) +{ +#if defined(__x86_64__) + const m128 str = strhex_helper(v); + _mm_storeu_si128(out, str); +#elif defined(__aarch64__) + const m128 str = strhex_helper(v); + vst1q_u8(out, str); +#else + u16 * const ptr = (typeof(ptr))out; + for (u64 i = 0; i < 8; i++) { + ptr[7-i] = strhex_table_256[v & 0xff]; + v >>= 8; + } +#endif +} + +// string to u64 + inline u64 +a2u64(const void * const str) +{ + return strtoull(str, NULL, 10); +} + + inline u32 +a2u32(const void * const str) +{ + return (u32)strtoull(str, NULL, 10); +} + + inline s64 +a2s64(const void * const str) +{ + return strtoll(str, NULL, 10); +} + + inline s32 +a2s32(const void * const str) +{ + return (s32)strtoll(str, NULL, 10); +} + + void +str_print_hex(FILE * const out, const void * const data, const u32 len) +{ + const u8 * const ptr = data; + const u32 strsz = len * 3; + u8 * const buf = malloc(strsz); + for (u32 i = 0; i < len; i++) { + buf[i*3] = ' '; + buf[i*3+1] = strhex_table_16[ptr[i]>>4]; + buf[i*3+2] = strhex_table_16[ptr[i] & 0xf]; + } + fwrite(buf, strsz, 1, out); + free(buf); +} + + void +str_print_dec(FILE * const out, const void * const data, const u32 len) +{ + const u8 * const ptr = data; + const u32 strsz = len * 4; + u8 * const buf = malloc(strsz); + for (u32 i = 0; i < len; i++) { + const u8 v = ptr[i]; + buf[i*4] = ' '; + const u8 v1 = v / 100u; + const u8 v23 = v % 100u; + buf[i*4+1] = (u8)'0' + v1; + buf[i*4+2] = (u8)'0' + (v23 / 10u); + buf[i*4+3] = (u8)'0' + (v23 % 10u); + } + fwrite(buf, strsz, 1, out); + free(buf); +} + +// returns a NULL-terminated list of string tokens. +// After use you only need to free the returned pointer (char **). + char ** +strtoks(const char * const str, const char * const delim) +{ + if (str == NULL) + return NULL; + size_t nptr_alloc = 32; + char ** tokens = malloc(sizeof(tokens[0]) * nptr_alloc); + if (tokens == NULL) + return NULL; + const size_t bufsize = strlen(str) + 1; + char * const buf = malloc(bufsize); + if (buf == NULL) + goto fail_buf; + + memcpy(buf, str, bufsize); + char * saveptr = NULL; + char * tok = strtok_r(buf, delim, &saveptr); + size_t ntoks = 0; + while (tok) { + if (ntoks >= nptr_alloc) { + nptr_alloc += 32; + char ** const r = realloc(tokens, sizeof(tokens[0]) * nptr_alloc); + if (r == NULL) + goto fail_realloc; + + tokens = r; + } + tokens[ntoks] = tok; + ntoks++; + tok = strtok_r(NULL, delim, &saveptr); + } + tokens[ntoks] = NULL; + const size_t nptr = ntoks + 1; // append a NULL + const size_t rsize = (sizeof(tokens[0]) * nptr) + bufsize; + char ** const r = realloc(tokens, rsize); + if (r == NULL) + goto fail_realloc; + + tokens = r; + char * const dest = (char *)(&(tokens[nptr])); + memcpy(dest, buf, bufsize); + for (u64 i = 0; i < ntoks; i++) + tokens[i] += (dest - buf); + + free(buf); + return tokens; + +fail_realloc: + free(buf); +fail_buf: + free(tokens); + return NULL; +} + + u32 +strtoks_count(const char * const * const toks) +{ + if (!toks) + return 0; + u32 n = 0; + while (toks[n++]); + return n; +} +// }}} string + +// qsbr {{{ +#define QSBR_STATES_NR ((23)) // shard capacity; valid values are 3*8-1 == 23; 5*8-1 == 39; 7*8-1 == 55 +#define QSBR_SHARD_BITS ((5)) // 2^n shards +#define QSBR_SHARD_NR (((1u) << QSBR_SHARD_BITS)) +#define QSBR_SHARD_MASK ((QSBR_SHARD_NR - 1)) + +struct qsbr_ref_real { +#ifdef QSBR_DEBUG + pthread_t ptid; // 8 + u32 status; // 4 + u32 nbt; // 4 (number of backtrace frames) +#define QSBR_DEBUG_BTNR ((14)) + void * backtrace[QSBR_DEBUG_BTNR]; +#endif + au64 qstate; // user updates it + au64 * pptr; // internal only + struct qsbr_ref_real * park; +}; + +static_assert(sizeof(struct qsbr_ref) == sizeof(struct qsbr_ref_real), "sizeof qsbr_ref"); + +// Quiescent-State-Based Reclamation RCU +struct qsbr { + struct qsbr_ref_real target; + u64 padding0[5]; + struct qshard { + au64 bitmap; + au64 ptrs[QSBR_STATES_NR]; + } shards[QSBR_SHARD_NR]; +}; + + struct qsbr * +qsbr_create(void) +{ + struct qsbr * const q = yalloc(sizeof(*q)); + memset(q, 0, sizeof(*q)); + return q; +} + + static inline struct qshard * +qsbr_shard(struct qsbr * const q, void * const ptr) +{ + const u32 sid = crc32c_u64(0, (u64)ptr) & QSBR_SHARD_MASK; + debug_assert(sid < QSBR_SHARD_NR); + return &(q->shards[sid]); +} + + static inline void +qsbr_write_qstate(struct qsbr_ref_real * const ref, const u64 v) +{ + atomic_store_explicit(&ref->qstate, v, MO_RELAXED); +} + + bool +qsbr_register(struct qsbr * const q, struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + struct qshard * const shard = qsbr_shard(q, ref); + qsbr_write_qstate(ref, 0); + + do { + u64 bits = atomic_load_explicit(&shard->bitmap, MO_CONSUME); + const u32 pos = (u32)__builtin_ctzl(~bits); + if (unlikely(pos >= QSBR_STATES_NR)) + return false; + + const u64 bits1 = bits | (1lu << pos); + if (atomic_compare_exchange_weak_explicit(&shard->bitmap, &bits, bits1, MO_ACQUIRE, MO_RELAXED)) { + atomic_store_explicit(&shard->ptrs[pos], (u64)ref, MO_RELAXED); + //shard->ptrs[pos] = ref; + + ref->pptr = &(shard->ptrs[pos]); + ref->park = &q->target; +#ifdef QSBR_DEBUG + ref->ptid = (u64)pthread_self(); + ref->tid = 0; + ref->status = 1; + ref->nbt = backtrace(ref->backtrace, QSBR_DEBUG_BTNR); +#endif + return true; + } + } while (true); +} + + void +qsbr_unregister(struct qsbr * const q, struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + struct qshard * const shard = qsbr_shard(q, ref); + const u32 pos = (u32)(ref->pptr - shard->ptrs); + debug_assert(pos < QSBR_STATES_NR); + debug_assert(shard->bitmap & (1lu << pos)); + + atomic_store_explicit(&shard->ptrs[pos], (u64)(&q->target), MO_RELAXED); + //shard->ptrs[pos] = &q->target; + (void)atomic_fetch_and_explicit(&shard->bitmap, ~(1lu << pos), MO_RELEASE); +#ifdef QSBR_DEBUG + ref->tid = 0; + ref->ptid = 0; + ref->status = 0xffff; // unregistered + ref->nbt = 0; +#endif + ref->pptr = NULL; + // wait for qsbr_wait to leave if it's working on the shard + while (atomic_load_explicit(&shard->bitmap, MO_CONSUME) >> 63) + cpu_pause(); +} + + inline void +qsbr_update(struct qsbr_ref * const qref, const u64 v) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + debug_assert((*ref->pptr) == (u64)ref); // must be unparked + // rcu update does not require release or acquire order + qsbr_write_qstate(ref, v); +} + + inline void +qsbr_park(struct qsbr_ref * const qref) +{ + cpu_cfence(); + struct qsbr_ref_real * const ref = (typeof(ref))qref; + atomic_store_explicit(ref->pptr, (u64)ref->park, MO_RELAXED); +#ifdef QSBR_DEBUG + ref->status = 0xfff; // parked +#endif +} + + inline void +qsbr_resume(struct qsbr_ref * const qref) +{ + struct qsbr_ref_real * const ref = (typeof(ref))qref; + atomic_store_explicit(ref->pptr, (u64)ref, MO_RELAXED); +#ifdef QSBR_DEBUG + ref->status = 0xf; // resumed +#endif + cpu_cfence(); +} + +// waiters needs external synchronization + void +qsbr_wait(struct qsbr * const q, const u64 target) +{ + cpu_cfence(); + qsbr_write_qstate(&q->target, target); + u64 cbits = 0; // check-bits; each bit corresponds to a shard + u64 bms[QSBR_SHARD_NR]; // copy of all bitmap + // take an unsafe snapshot of active users + for (u32 i = 0; i < QSBR_SHARD_NR; i++) { + bms[i] = atomic_load_explicit(&q->shards[i].bitmap, MO_CONSUME); + if (bms[i]) + cbits |= (1lu << i); // set to 1 if [i] has ptrs + } + + while (cbits) { + for (u64 ctmp = cbits; ctmp; ctmp &= (ctmp - 1)) { + // shard id + const u32 i = (u32)__builtin_ctzl(ctmp); + struct qshard * const shard = &(q->shards[i]); + const u64 bits1 = atomic_fetch_or_explicit(&(shard->bitmap), 1lu << 63, MO_ACQUIRE); + for (u64 bits = bms[i]; bits; bits &= (bits - 1)) { + const u64 bit = bits & -bits; // extract lowest bit + if ((bits1 & bit) == 0) { + bms[i] &= ~bit; + } else { + au64 * pptr = &(shard->ptrs[__builtin_ctzl(bit)]); + struct qsbr_ref_real * const ptr = (typeof(ptr))atomic_load_explicit(pptr, MO_RELAXED); + if (atomic_load_explicit(&(ptr->qstate), MO_CONSUME) == target) + bms[i] &= ~bit; + } + } + (void)atomic_fetch_and_explicit(&(shard->bitmap), ~(1lu << 63), MO_RELEASE); + if (bms[i] == 0) + cbits &= ~(1lu << i); + } +#if defined(CORR) + corr_yield(); +#endif + } + debug_assert(cbits == 0); + cpu_cfence(); +} + + void +qsbr_destroy(struct qsbr * const q) +{ + if (q) + free(q); +} +#undef QSBR_STATES_NR +#undef QSBR_BITMAP_NR +// }}} qsbr + +// vim:fdm=marker diff --git a/test/MassTrie-beta/wormhole/lib.h b/test/MassTrie-beta/wormhole/lib.h new file mode 100644 index 00000000..40a2f40d --- /dev/null +++ b/test/MassTrie-beta/wormhole/lib.h @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +// includes {{{ +// C headers +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// POSIX headers +#include +#include +#include + +// Linux headers +#include +#include +#include +#include + +// SIMD +#if defined(__x86_64__) +#include +#elif defined(__aarch64__) +#include +#include +#endif +// }}} includes + +#ifdef __cplusplus +extern "C" { +#endif + +// types {{{ +#ifndef typeof +#define typeof __typeof__ +#endif +#ifndef asm +#define asm __asm__ +#endif +typedef char s8; +typedef short s16; +typedef int s32; +typedef long s64; +typedef __int128_t s128; +static_assert(sizeof(s8) == 1, "sizeof(s8)"); +static_assert(sizeof(s16) == 2, "sizeof(s16)"); +static_assert(sizeof(s32) == 4, "sizeof(s32)"); +static_assert(sizeof(s64) == 8, "sizeof(s64)"); +static_assert(sizeof(s128) == 16, "sizeof(s128)"); + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long u64; +typedef __uint128_t u128; +static_assert(sizeof(u8) == 1, "sizeof(u8)"); +static_assert(sizeof(u16) == 2, "sizeof(u16)"); +static_assert(sizeof(u32) == 4, "sizeof(u32)"); +static_assert(sizeof(u64) == 8, "sizeof(u64)"); +static_assert(sizeof(u128) == 16, "sizeof(u128)"); + +#if defined(__x86_64__) +typedef __m128i m128; +#if defined(__AVX2__) +typedef __m256i m256; +#endif // __AVX2__ +#if defined(__AVX512F__) +typedef __m512i m512; +#endif // __AVX512F__ +#elif defined(__aarch64__) +typedef uint8x16_t m128; +#else +#error Need x86_64 or AArch64. +#endif +// }}} types + +// defs {{{ +#define likely(____x____) __builtin_expect(____x____, 1) +#define unlikely(____x____) __builtin_expect(____x____, 0) + +// ansi colors +// 3X:fg; 4X:bg; 9X:light fg; 10X:light bg; +// X can be one of the following colors: +// 0:black; 1:red; 2:green; 3:yellow; +// 4:blue; 5:magenta; 6:cyan; 7:white; +#define TERMCLR(____code____) "\x1b[" #____code____ "m" +// }}} defs + +// const {{{ +#define PGBITS ((12)) +#define PGSZ ((1lu << PGBITS)) +// }}} const + +// math {{{ + extern u64 +mhash64(const u64 v); + + extern u32 +mhash32(const u32 v); + + extern u64 +gcd64(u64 a, u64 b); +// }}} math + +// random {{{ + extern u64 +random_u64(void); + + extern void +srandom_u64(const u64 seed); + + extern double +random_double(void); +// }}} random + +// timing {{{ + extern u64 +time_nsec(void); + + extern double +time_sec(void); + + extern u64 +time_diff_nsec(const u64 last); + + extern double +time_diff_sec(const double last); + + extern void +time_stamp(char * str, const size_t size); + + extern void +time_stamp2(char * str, const size_t size); +// }}} timing + +// cpucache {{{ + extern void +cpu_pause(void); + + extern void +cpu_mfence(void); + + extern void +cpu_cfence(void); + + extern void +cpu_prefetch0(const void * const ptr); + + extern void +cpu_prefetch1(const void * const ptr); + + extern void +cpu_prefetch2(const void * const ptr); + + extern void +cpu_prefetch3(const void * const ptr); + + extern void +cpu_prefetchw(const void * const ptr); +// }}} cpucache + +// crc32c {{{ + extern u32 +crc32c_u8(const u32 crc, const u8 v); + + extern u32 +crc32c_u16(const u32 crc, const u16 v); + + extern u32 +crc32c_u32(const u32 crc, const u32 v); + + extern u32 +crc32c_u64(const u32 crc, const u64 v); + +// 1 <= nr <= 3 + extern u32 +crc32c_inc_123(const u8 * buf, u32 nr, u32 crc); + +// nr % 4 == 0 + extern u32 +crc32c_inc_x4(const u8 * buf, u32 nr, u32 crc); + + extern u32 +crc32c_inc(const u8 * buf, u32 nr, u32 crc); +// }}} crc32c + +// debug {{{ + extern void +debug_break(void); + + extern void +debug_backtrace(void); + + extern void +watch_u64_usr1(u64 * const ptr); + +#ifndef NDEBUG + extern void +debug_assert(const bool v); +#else +#define debug_assert(expr) ((void)0) +#endif + +__attribute__((noreturn)) + extern void +debug_die(void); + +__attribute__((noreturn)) + extern void +debug_die_perror(void); + + extern void +debug_dump_maps(FILE * const out); + + extern bool +debug_perf_switch(void); +// }}} debug + +// mm {{{ +#ifdef ALLOCFAIL + extern bool +alloc_fail(void); +#endif + + extern void * +xalloc(const size_t align, const size_t size); + + extern void * +yalloc(const size_t size); + + extern void ** +malloc_2d(const size_t nr, const size_t size); + + extern void ** +calloc_2d(const size_t nr, const size_t size); + + extern void +pages_unmap(void * const ptr, const size_t size); + + extern void +pages_lock(void * const ptr, const size_t size); + +/* hugepages */ +// force posix allocators: -DVALGRIND_MEMCHECK + extern void * +pages_alloc_4kb(const size_t nr_4kb); + + extern void * +pages_alloc_2mb(const size_t nr_2mb); + + extern void * +pages_alloc_1gb(const size_t nr_1gb); + + extern void * +pages_alloc_best(const size_t size, const bool try_1gb, u64 * const size_out); +// }}} mm + +// process/thread {{{ + extern void +thread_get_name(const pthread_t pt, char * const name, const size_t len); + + extern void +thread_set_name(const pthread_t pt, const char * const name); + + extern long +process_get_rss(void); + + extern u32 +process_affinity_count(void); + + extern u32 +process_getaffinity_list(const u32 max, u32 * const cores); + + extern void +thread_setaffinity_list(const u32 nr, const u32 * const list); + + extern void +thread_pin(const u32 cpu); + + extern u64 +process_cpu_time_usec(void); + +// if args == true, argx is void ** +// if args == false, argx is void * + extern u64 +thread_fork_join(u32 nr, void *(*func) (void *), const bool args, void * const argx); + + extern int +thread_create_at(const u32 cpu, pthread_t * const thread, void *(*start_routine) (void *), void * const arg); +// }}} process/thread + +// locking {{{ +typedef union { + u32 opaque; +} spinlock; + + extern void +spinlock_init(spinlock * const lock); + + extern void +spinlock_lock(spinlock * const lock); + + extern bool +spinlock_trylock(spinlock * const lock); + + extern void +spinlock_unlock(spinlock * const lock); + +typedef union { + u32 opaque; +} rwlock; + + extern void +rwlock_init(rwlock * const lock); + + extern bool +rwlock_trylock_read(rwlock * const lock); + +// low-priority reader-lock; use with trylock_write_hp + extern bool +rwlock_trylock_read_lp(rwlock * const lock); + + extern bool +rwlock_trylock_read_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_read(rwlock * const lock); + + extern void +rwlock_unlock_read(rwlock * const lock); + + extern bool +rwlock_trylock_write(rwlock * const lock); + + extern bool +rwlock_trylock_write_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_write(rwlock * const lock); + +// writer has higher priority; new readers are blocked + extern bool +rwlock_trylock_write_hp(rwlock * const lock); + + extern bool +rwlock_trylock_write_hp_nr(rwlock * const lock, u16 nr); + + extern void +rwlock_lock_write_hp(rwlock * const lock); + + extern void +rwlock_unlock_write(rwlock * const lock); + + extern void +rwlock_write_to_read(rwlock * const lock); + +typedef union { + u64 opqaue[8]; +} mutex; + + extern void +mutex_init(mutex * const lock); + + extern void +mutex_lock(mutex * const lock); + + extern bool +mutex_trylock(mutex * const lock); + + extern void +mutex_unlock(mutex * const lock); + + extern void +mutex_deinit(mutex * const lock); +// }}} locking + +// coroutine {{{ +extern u64 co_switch_stack(u64 * const saversp, const u64 newrsp, const u64 retval); + +struct co; + + extern struct co * +co_create(const u64 stacksize, void * func, void * priv, u64 * const host); + + extern void +co_reuse(struct co * const co, void * func, void * priv, u64 * const host); + + extern struct co * +co_fork(void * func, void * priv); + + extern void * +co_priv(void); + + extern u64 +co_enter(struct co * const to, const u64 retval); + + extern u64 +co_switch_to(struct co * const to, const u64 retval); + + extern u64 +co_back(const u64 retval); + + extern void +co_exit(const u64 retval); + + extern bool +co_valid(struct co * const co); + + extern struct co * +co_self(void); + + extern void +co_destroy(struct co * const co); + +struct corr; + + extern struct corr * +corr_create(const u64 stacksize, void * func, void * priv, u64 * const host); + + extern struct corr * +corr_link(const u64 stacksize, void * func, void * priv, struct corr * const prev); + + extern void +corr_reuse(struct corr * const co, void * func, void * priv, u64 * const host); + + extern void +corr_relink(struct corr * const co, void * func, void * priv, struct corr * const prev); + + extern void +corr_enter(struct corr * const co); + + extern void +corr_yield(void); + + extern void +corr_exit(void); + + extern void +corr_destroy(struct corr * const co); +// }}} coroutine + +// bits {{{ + extern u32 +bits_reverse_u32(const u32 v); + + extern u64 +bits_reverse_u64(const u64 v); + + extern u64 +bits_rotl_u64(const u64 v, const u8 n); + + extern u64 +bits_rotr_u64(const u64 v, const u8 n); + + extern u32 +bits_rotl_u32(const u32 v, const u8 n); + + extern u32 +bits_rotr_u32(const u32 v, const u8 n); + + extern u64 +bits_p2_up_u64(const u64 v); + + extern u32 +bits_p2_up_u32(const u32 v); + + extern u64 +bits_p2_down_u64(const u64 v); + + extern u32 +bits_p2_down_u32(const u32 v); + + extern u64 +bits_round_up(const u64 v, const u8 power); + + extern u64 +bits_round_up_a(const u64 v, const u64 a); + + extern u64 +bits_round_down(const u64 v, const u8 power); + + extern u64 +bits_round_down_a(const u64 v, const u64 a); +// }}} bits + +// vi128 {{{ + extern u32 +vi128_estimate_u32(const u32 v); + + extern u8 * +vi128_encode_u32(u8 * dst, u32 v); + + extern const u8 * +vi128_decode_u32(const u8 * src, u32 * const out); + + extern u32 +vi128_estimate_u64(const u64 v); + + extern u8 * +vi128_encode_u64(u8 * dst, u64 v); + + extern const u8 * +vi128_decode_u64(const u8 * src, u64 * const out); +// }}} vi128 + +// misc {{{ +// TODO: only works on little endian? +struct entry13 { // what a beautiful name + union { + u16 e1; + struct { // easy for debugging + u64 e1_64:16; + u64 e3:48; + }; + u64 v64; + void * ptr; + }; +}; + +static_assert(sizeof(struct entry13) == 8, "sizeof(entry13) != 8"); + +// directly access read .e1 and .e3 +// directly write .e1 +// use entry13_update() to update the entire entry + + extern struct entry13 +entry13(const u16 e1, const u64 e3); + + extern void +entry13_update_e3(struct entry13 * const e, const u64 e3); + + extern void * +u64_to_ptr(const u64 v); + + extern u64 +ptr_to_u64(const void * const ptr); + + extern size_t +m_usable_size(void * const ptr); + + extern size_t +fdsize(const int fd); + + extern u32 +memlcp(const u8 * const p1, const u8 * const p2, const u32 max); + +__attribute__ ((format (printf, 2, 3))) + extern void +logger_printf(const int fd, const char * const fmt, ...); +// }}} misc + +// slab {{{ +struct slab; + + extern struct slab * +slab_create(const u64 obj_size, const u64 blk_size); + + extern bool +slab_reserve_unsafe(struct slab * const slab, const u64 nr); + + extern void * +slab_alloc_unsafe(struct slab * const slab); + + extern void * +slab_alloc_safe(struct slab * const slab); + + extern void +slab_free_unsafe(struct slab * const slab, void * const ptr); + + extern void +slab_free_safe(struct slab * const slab, void * const ptr); + + extern void +slab_free_all(struct slab * const slab); + + extern u64 +slab_get_nalloc(struct slab * const slab); + + extern void +slab_destroy(struct slab * const slab); +// }}} slab + +// string {{{ +// XXX strdec_ and strhex_ functions does not append the trailing '\0' to the output string +// size of out should be >= 10 + extern void +strdec_32(void * const out, const u32 v); + +// size of out should be >= 20 + extern void +strdec_64(void * const out, const u64 v); + +// size of out should be >= 8 + extern void +strhex_32(void * const out, const u32 v); + +// size of out should be >= 16 + extern void +strhex_64(void * const out, const u64 v); + + extern u64 +a2u64(const void * const str); + + extern u32 +a2u32(const void * const str); + + extern s64 +a2s64(const void * const str); + + extern s32 +a2s32(const void * const str); + + extern void +str_print_hex(FILE * const out, const void * const data, const u32 len); + + extern void +str_print_dec(FILE * const out, const void * const data, const u32 len); + +// user should free returned ptr (and nothing else) after use + extern char ** +strtoks(const char * const str, const char * const delim); + + extern u32 +strtoks_count(const char * const * const toks); +// }}} string + +// qsbr {{{ +// QSBR vs EBR (Quiescent-State vs Epoch Based Reclaimation) +// QSBR: readers just use qsbr_update -> qsbr_update -> ... repeatedly +// EBR: readers use qsbr_update -> qsbr_park -> qsbr_resume -> qsbr_update -> ... +// The advantage of EBR is qsbr_park can happen much earlier than the next qsbr_update +// The disadvantage is the extra cost, a pair of park/resume is used in every iteration +struct qsbr; +struct qsbr_ref { +#ifdef QSBR_DEBUG + u64 debug[16]; +#endif + u64 opaque[3]; +}; + + extern struct qsbr * +qsbr_create(void); + +// every READER accessing the shared data must first register itself with the qsbr + extern bool +qsbr_register(struct qsbr * const q, struct qsbr_ref * const qref); + + extern void +qsbr_unregister(struct qsbr * const q, struct qsbr_ref * const qref); + +// For READER: mark the beginning of critical section; like rcu_read_lock() + extern void +qsbr_update(struct qsbr_ref * const qref, const u64 v); + +// temporarily stop access the shared data to avoid blocking writers +// READER can use qsbr_park (like rcu_read_unlock()) in conjunction with qsbr_update +// qsbr_park is roughly equivalent to qsbr_unregister, but faster + extern void +qsbr_park(struct qsbr_ref * const qref); + +// undo the effect of qsbr_park; must use it between qsbr_park and qsbr_update +// qsbr_resume is roughly equivalent to qsbr_register, but faster + extern void +qsbr_resume(struct qsbr_ref * const qref); + +// WRITER: wait until all the readers have announced v=target with qsbr_update + extern void +qsbr_wait(struct qsbr * const q, const u64 target); + + extern void +qsbr_destroy(struct qsbr * const q); +// }}} qsbr + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/test/MassTrie-beta/wormhole/libwh.so b/test/MassTrie-beta/wormhole/libwh.so new file mode 100644 index 00000000..2ecd7e7e Binary files /dev/null and b/test/MassTrie-beta/wormhole/libwh.so differ diff --git a/test/MassTrie-beta/wormhole/stresstest.c b/test/MassTrie-beta/wormhole/stresstest.c new file mode 100644 index 00000000..93fb6f05 --- /dev/null +++ b/test/MassTrie-beta/wormhole/stresstest.c @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2016-2020 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +#include "lib.h" +#include "kv.h" +#include "wh.h" +#include "ctypes.h" + +struct stress_info { + u64 nkeys; + u32 nloader; + u32 nunldr; + u32 nth; + u32 cpt; + bool has_iter; + + au64 seqno; + struct kv ** keys; + + const struct kvmap_api * api; + void * map; + au64 tot; + au64 wfail; + u64 endtime; +}; + + static void * +stress_load_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + srandom_u64(time_nsec() * time_nsec() / time_nsec()); + void * const ref = kvmap_ref(si->api, si->map); + const u64 seq = atomic_fetch_add(&si->seqno, 1); + const u64 n0 = si->nkeys / si->nloader * seq; + const u64 nz = (seq == (si->nloader - 1)) ? si->nkeys : (si->nkeys / si->nloader * (seq + 1)); + //printf("load worker %lu %lu\n", n0, nz-1); + + char * buf = malloc(128); + debug_assert(buf); + u64 * buf64 = (typeof(buf64))buf; + for (u64 i = n0; i < nz; i++) { + const u32 klen = (u32)(random_u64() & 0x3flu) + 8; + const u32 klen8 = (klen + 7) >> 3; + /* + buf64[0] = bswap_64(i); // little endian + for (u64 j = 1; j < klen8; j++) + buf64[j] = random_u64(); + */ + const u64 rkey = random_u64(); + for (u32 j = 0; j < klen8; j++) + buf64[j] = (rkey >> j) & 0x0101010101010101lu; + + si->keys[i] = kv_create(buf, klen, buf, 8); + if (si->keys[i] == NULL) + exit(0); + kvmap_kv_put(si->api, ref, si->keys[i]); + } + free(buf); + kvmap_unref(si->api, ref); + return NULL; +} + + static void * +stress_unload_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + const u64 seq = atomic_fetch_add(&si->seqno, 1); + const u64 n0 = si->nkeys / si->nunldr * seq; + const u64 nz = (seq == (si->nunldr - 1)) ? si->nkeys : (si->nkeys / si->nunldr * (seq + 1)); + + void * const ref = kvmap_ref(si->api, si->map); + for (u64 i = n0; i < nz; i++) { + kvmap_kv_del(si->api, ref, si->keys[i]); + free(si->keys[i]); + } + kvmap_unref(si->api, ref); + return NULL; +} + + static void +stress_inp_plus1(struct kv * const kv0, void * const priv) +{ + (void)priv; + if (kv0) { // can be NULL + u64 * ptr = kv_vptr(kv0); + ++(*ptr); + } +} + + static struct kv * +stress_merge_plus1(struct kv * const kv0, void * const priv) +{ + (void)priv; + if (kv0) { // can be NULL + u64 * ptr = kv_vptr(kv0); + ++(*ptr); + return kv0; + } else { + u64 * ptr = kv_vptr((struct kv *)priv); + *ptr = 0; + return priv; + } +} + + static void +stress_func(struct stress_info * const si) +{ + srandom_u64(time_nsec() * time_nsec() / time_nsec()); + const struct kvmap_api * const api = si->api; + void * ref = kvmap_ref(api, si->map); + struct kv * next = si->keys[random_u64() % si->nkeys]; + u64 rnext = random_u64() % si->nkeys; + struct kv * const tmp = malloc(128); + struct kref tmpkref; + struct kvref tmpkvref; + debug_assert(tmp); + void * iter = NULL; + if (api->iter_park) { + iter = api->iter_create(ref); + api->iter_park(iter); + } + u64 wfail1 = 0; + u64 nops = 0; +#define BATCHSIZE ((4096)) + do { + for (u64 i = 0; i < BATCHSIZE; i++) { + // reading kv keys leads to unnecessary cache misses + // use prefetch to minimize overhead on workload generation + struct kv * const key = next; + next = si->keys[rnext]; + cpu_prefetch0(next); + cpu_prefetch0(((u8 *)next) + 64); + rnext = random_u64() % si->nkeys; + cpu_prefetch0(&(si->keys[rnext])); + + // do probe + // customize your benchmark: do a mix of wh operations with switch-cases + const u64 r = random_u64() % 16; + switch (r) { + case 0: + kvmap_kv_probe(api, ref, key); + break; + case 1: + kvmap_kv_get(api, ref, key, tmp); + break; + case 2: + if (si->has_iter) { + if (api->iter_park == NULL) + iter = api->iter_create(ref); + debug_assert(iter); + kvmap_kv_iter_seek(api, iter, key); + api->iter_next(iter, tmp); + api->iter_peek(iter, tmp); + api->iter_skip(iter, 2); + // this is unsafe; only reader's lock is acquired + if (api->iter_inp) + api->iter_inp(iter, stress_inp_plus1, NULL); + // kref + if (api->iter_kref) + api->iter_kref(iter, &tmpkref); + // kvref + if (api->iter_kvref) + api->iter_kvref(iter, &tmpkvref); + // done + if (api->iter_park) + api->iter_park(iter); + else + api->iter_destroy(iter); + } + break; + case 3: + if (api->refpark) { + api->park(ref); + api->resume(ref); + } + break; + case 4: + if (api->iter_park) + api->iter_destroy(iter); + (void)kvmap_unref(api, ref); + ref = kvmap_ref(api, si->map); + if (api->iter_park) + iter = api->iter_create(ref); + break; + case 5: + if (api->merge) { + kv_dup2_key(key, tmp); + tmp->vlen = 8; + kvmap_kv_merge(api, ref, key, stress_merge_plus1, tmp); + } + break; + case 6: + if ((random_u64() & 0x7fffu) == 0x22 && api->delr) + (void)kvmap_kv_delr(api, ref, si->keys[rnext], (rnext + 10) < si->nkeys ? si->keys[rnext + 10] : NULL); + else + kvmap_kv_probe(api, ref, key); + break; + case 7: case 8: case 9: + (void)kvmap_kv_del(api, ref, key); + break; + case 10: case 11: + if (api->inpw) + kvmap_kv_inpw(api, ref, key, stress_inp_plus1, NULL); + break; + case 12: case 13: case 14: case 15: + if (!kvmap_kv_put(api, ref, key)) + wfail1++; + break; + default: + break; + } + } + nops += BATCHSIZE; + } while (time_nsec() < si->endtime); + si->wfail += wfail1; + if (api->iter_park) + api->iter_destroy(iter); + kvmap_unref(api, ref); + free(tmp); + si->tot += nops; +} + + static void +stress_co_worker(void) +{ + struct stress_info * const si = (typeof(si))co_priv(); + debug_assert(si); + stress_func(si); +} + + static void * +stress_thread_worker(void * ptr) +{ + struct stress_info * const si = (typeof(si))ptr; + if (si->cpt) { + u64 hostrsp = 0; + struct corr * crs[32]; + do { // to work smoothly with ALLOCFAIL + crs[0] = corr_create(16*PGSZ, stress_co_worker, si, &hostrsp); + } while (crs[0] == NULL); + for (u32 j = 1; j < si->cpt; j++) { + do { // to work smoothly with ALLOCFAIL + crs[j] = corr_link(16*PGSZ, stress_co_worker, si, crs[j-1]); + } while (crs[j] == NULL); + } + + corr_enter(crs[0]); + for (u32 j = 0; j < si->cpt; j++) + corr_destroy(crs[j]); + } else { + stress_func(si); + } + return NULL; +} + + int +main(int argc, char ** argv) +{ + struct stress_info si = {.nkeys = 10000, .nloader = 1, .nunldr = 1, .nth = 1, .cpt = 0}; + argc--; + argv++; + int n = -1; + if ((n = kvmap_api_helper(argc, argv, NULL, &si.api, &si.map)) < 0) { + fprintf(stderr, "usage: api ... [<#keys>=10000 [<#load-threads>=1 [<#unload-threads>=1 [<#threads>=1 [<#co-per-thread>=0 (disabled) [=1 [=1]]]]]]]\n"); + kvmap_api_helper_message(); + exit(0); + } + argc -= n; + argv += n; + + const bool has_point = si.api->get && si.api->probe && si.api->del && si.api->put; + if (!has_point) { + fprintf(stderr, "api not supported\n"); + exit(0); + } + if (!si.api->inpw) + fprintf(stderr, "api->inpw function not found: ignored\n"); + if (!si.api->merge) + fprintf(stderr, "api->merge function not found: ignored\n"); + if (!si.api->delr) + fprintf(stderr, "api->delr function not found: ignored\n"); + + si.has_iter = si.api->iter_create && si.api->iter_seek && si.api->iter_peek && + si.api->iter_skip && si.api->iter_next && si.api->iter_destroy; + if (!si.has_iter) + fprintf(stderr, "iter functions not complete: ignored\n"); + + // generate keys + if (argc >= 1) + si.nkeys = a2u64(argv[0]); + si.keys = malloc(sizeof(struct kv *) * si.nkeys); + debug_assert(si.keys); + if (argc >= 2) + si.nloader = a2u32(argv[1]); + if (argc >= 3) + si.nunldr = a2u32(argv[2]); + if (argc >= 4) + si.nth = a2u32(argv[3]); + if (argc >= 5) + si.cpt = a2u32(argv[4]); + if (si.cpt > 32) + si.cpt = 32; +#if !defined(CORR) + if (si.cpt > 1) + fprintf(stderr, TERMCLR(35) "CORR not enabled. Compile with -DCORR to enable it.\n" TERMCLR(0)); +#endif // CORR + const u64 nr = (argc >= 6) ? a2u64(argv[5]) : 1; // default 1 + const u64 ne = (argc >= 7) ? a2u64(argv[6]) : 1; // default 1 + printf("stresstest: nkeys %lu ldr %u uldr %u th %u cpt %u r %lu e %lu\n", + si.nkeys, si.nloader, si.nunldr, si.nth, si.cpt, nr, ne); + + for (u64 e = 0; e < ne; e++) { + si.seqno = 0; + const u64 dtl = thread_fork_join(si.nloader, (void *)stress_load_worker, false, &si); + printf("load th %u mops %.2lf\n", si.nloader, ((double)si.nkeys) * 1e3 / ((double)dtl)); + if (si.api->fprint) + si.api->fprint(si.map, stdout); + + debug_perf_switch(); + for (u64 r = 0; r < nr; r++) { + si.tot = 0; + si.wfail = 0; + si.endtime = time_nsec() + 2000000000lu; + const u64 dt = thread_fork_join(si.nth, (void *)stress_thread_worker, false, &si); + const double mops = ((double)si.tot) * 1e3 / ((double)dt); + char ts[64]; + time_stamp(ts, 64); + const long rss = process_get_rss(); + printf("%s e %lu r %lu th %u cpt %u tot %lu mops %.2lf rss %ldkB wfail %lu\n", + ts, e, r, si.nth, si.cpt, si.tot, mops, rss, si.wfail); + debug_perf_switch(); + } + si.seqno = 0; + if (si.nunldr == 0) { // use clean + const u64 t0 = time_nsec(); + si.api->clean(si.map); + const u64 dtu = time_diff_nsec(t0); + for (u64 i = 0; i < si.nkeys; i++) + free(si.keys[i]); + printf("clean mops %.2lf\n", ((double)si.nkeys) *1e3 / ((double)dtu)); + } else { + const u64 dtu = thread_fork_join(si.nunldr, (void *)stress_unload_worker, false, &si); + printf("unload th %u mops %.2lf\n", si.nunldr, ((double)si.nkeys) *1e3 / ((double)dtu)); + } + } + + free(si.keys); + si.api->destroy(si.map); + return 0; +} diff --git a/test/MassTrie-beta/wormhole/stresstest.out b/test/MassTrie-beta/wormhole/stresstest.out new file mode 100644 index 00000000..874d359c Binary files /dev/null and b/test/MassTrie-beta/wormhole/stresstest.out differ diff --git a/test/MassTrie-beta/wormhole/wh.c b/test/MassTrie-beta/wormhole/wh.c new file mode 100644 index 00000000..1d31e231 --- /dev/null +++ b/test/MassTrie-beta/wormhole/wh.c @@ -0,0 +1,3876 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#define _GNU_SOURCE + +// headers {{{ +#include // static_assert +#include "lib.h" +#include "ctypes.h" +#include "kv.h" +#include "wh.h" +// }}} headers + +// def {{{ +#define WH_HMAPINIT_SIZE ((1u << 12)) // 10: 16KB/64KB 12: 64KB/256KB 14: 256KB/1MB +#define WH_SLABMETA_SIZE ((1lu << 21)) // 2MB + +#ifndef HEAPCHECKING +#define WH_SLABLEAF_SIZE ((1lu << 21)) // 2MB is ok +#else +#define WH_SLABLEAF_SIZE ((1lu << 21)) // 2MB for valgrind +#endif + +#define WH_KPN ((128u)) // keys per node; power of 2 +#define WH_HDIV (((1u << 16)) / WH_KPN) +#define WH_MID ((WH_KPN >> 1)) // ideal cut point for split, the closer the better +#define WH_BKT_NR ((8)) +#define WH_KPN2 ((WH_KPN + WH_KPN)) + +#define WH_KPN_MRG (((WH_KPN + WH_MID) >> 1 )) // 3/4 + +// FO is fixed at 256. Don't change it +#define WH_FO ((256u)) // index fan-out +// number of bits in a bitmap +#define WH_BMNR ((WH_FO >> 6)) // number of u64 +// }}} def + +// struct {{{ +struct wormmeta { + struct entry13 k13; // kref+klen + struct entry13 l13; // lmost+bitmin+bitmax + struct entry13 r13; // rmost+hash32_lo + struct entry13 p13; // lpath+hash32_hi + u64 bitmap[0]; // 4 if bitmin != bitmax +}; +static_assert(sizeof(struct wormmeta) == 32, "sizeof(wormmeta) != 32"); + +struct wormkv64 { u64 key; void * ptr; }; // u64 keys (whu64) + +struct wormleaf { + // first line + rwlock leaflock; + spinlock sortlock; // to protect the seemingly "read-only" iter_seek + au64 lv; // version (dont use the first u64) + struct wormleaf * prev; // prev leaf + struct wormleaf * next; // next leaf + struct kv * anchor; + + u32 nr_sorted; + u32 nr_keys; + u64 reserved[2]; + + struct entry13 hs[WH_KPN]; // sorted by hashes + u8 ss[WH_KPN]; // sorted by keys +}; + +struct wormslot { u16 t[WH_BKT_NR]; }; +static_assert(sizeof(struct wormslot) == 16, "sizeof(wormslot) != 16"); + +struct wormmbkt { struct wormmeta * e[WH_BKT_NR]; }; +static_assert(sizeof(struct wormmbkt) == 64, "sizeof(wormmbkt) != 64"); + +struct wormhmap { + au64 hv; + struct wormslot * wmap; + struct wormmbkt * pmap; + u32 mask; + u32 maxplen; + u64 msize; + + struct slab * slab1; + struct slab * slab2; + struct kv * pbuf; +}; +static_assert(sizeof(struct wormhmap) == 64, "sizeof(wormhmap) != 64"); + +struct wormhole { + // 1 line + union { + au64 hmap_ptr; // safe + struct wormhmap * hmap; // unsafe + }; + u64 padding0[6]; + struct wormleaf * leaf0; // usually not used + // 1 line + struct kvmap_mm mm; + struct qsbr * qsbr; + struct slab * slab_leaf; + struct kv * pbuf; + u32 leaftype; + u32 padding1; + // 2 lines + struct wormhmap hmap2[2]; + // fifth line + rwlock metalock; + u32 padding2[15]; +}; + +struct wormhole_iter { + struct wormref * ref; // safe-iter only + struct wormhole * map; + struct wormleaf * leaf; + u32 is; +}; + +struct wormref { + struct wormhole * map; + struct qsbr_ref qref; +}; +// }}} struct + +// helpers {{{ + +// meta {{{ + static inline struct kv * +wormmeta_keyref_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->k13.e3); +} + + static inline u16 +wormmeta_klen_load(const struct wormmeta * const meta) +{ + return meta->k13.e1; +} + + static inline struct wormleaf * +wormmeta_lmost_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->l13.e3 & (~0x3flu)); +} + + static inline u32 +wormmeta_bitmin_load(const struct wormmeta * const meta) +{ + return (u32)(meta->l13.v64 & 0x1fflu); +} + + static inline u32 +wormmeta_bitmax_load(const struct wormmeta * const meta) +{ + return (u32)((meta->l13.v64 >> 9) & 0x1fflu); +} + + static inline u32 +wormmeta_hash32_load(const struct wormmeta * const meta) +{ + return ((u32)meta->r13.e1) | (((u32)meta->p13.e1) << 16); +} + + static inline struct wormleaf * +wormmeta_rmost_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->r13.e3); +} + + static inline struct wormleaf * +wormmeta_lpath_load(const struct wormmeta * const meta) +{ + return u64_to_ptr(meta->p13.e3); +} + +// internal + static inline void +wormmeta_lpath_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + entry13_update_e3(&meta->p13, ptr_to_u64(leaf)); +} + +// also updates leaf_klen_eq and + static inline void +wormmeta_lmost_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + const u64 minmax = meta->l13.v64 & 0x3fffflu; + meta->l13.v64 = (((u64)leaf) << 16) | minmax; + + const bool leaf_klen_eq = leaf->anchor->klen == wormmeta_klen_load(meta); + wormmeta_lpath_store(meta, leaf_klen_eq ? leaf : leaf->prev); +} + + static inline void +wormmeta_bitmin_store(struct wormmeta * const meta, const u32 bitmin) +{ + meta->l13.v64 = (meta->l13.v64 & (~0x1fflu)) | bitmin; +} + + static inline void +wormmeta_bitmax_store(struct wormmeta * const meta, const u32 bitmax) +{ + meta->l13.v64 = (meta->l13.v64 & (~0x3fe00lu)) | (bitmax << 9); +} + + static inline void +wormmeta_rmost_store(struct wormmeta * const meta, struct wormleaf * const leaf) +{ + entry13_update_e3(&meta->r13, ptr_to_u64(leaf)); +} + +// for wormmeta_alloc + static void +wormmeta_init(struct wormmeta * const meta, struct wormleaf * const lrmost, + struct kv * const keyref, const u32 alen, const u32 bit) +{ + keyref->refcnt++; // shared + + const u32 plen = keyref->klen; + debug_assert(plen <= UINT16_MAX); + meta->k13 = entry13((u16)plen, ptr_to_u64(keyref)); + meta->l13.v64 = (ptr_to_u64(lrmost) << 16) | (bit << 9) | bit; + + const u32 hash32 = keyref->hashlo; + meta->r13 = entry13((u16)hash32, ptr_to_u64(lrmost)); + + const bool leaf_klen_eq = alen == plen; + meta->p13 = entry13((u16)(hash32 >> 16), ptr_to_u64(leaf_klen_eq ? lrmost : lrmost->prev)); +} +// }}} meta + +// meta-bitmap {{{ + static inline bool +wormmeta_bm_test(const struct wormmeta * const meta, const u32 id) +{ + debug_assert(id < WH_FO); + const u32 bitmin = wormmeta_bitmin_load(meta); + const u32 bitmax = wormmeta_bitmax_load(meta); + if (bitmin == bitmax) { // half node + return bitmin == id; + } else { // full node + return (bool)((meta->bitmap[id >> 6u] >> (id & 0x3fu)) & 1lu); + } +} + +// meta must be a full node + static void +wormmeta_bm_set(struct wormmeta * const meta, const u32 id) +{ + // need to replace meta + u64 * const ptr = &(meta->bitmap[id >> 6u]); + const u64 bit = 1lu << (id & 0x3fu); + if ((*ptr) & bit) + return; + + (*ptr) |= bit; + + // min + if (id < wormmeta_bitmin_load(meta)) + wormmeta_bitmin_store(meta, id); + + // max + const u32 oldmax = wormmeta_bitmax_load(meta); + if (oldmax == WH_FO || id > oldmax) + wormmeta_bitmax_store(meta, id); +} + +// find the lowest bit > id0 +// return WH_FO if not found + static inline u32 +wormmeta_bm_gt(const struct wormmeta * const meta, const u32 id0) +{ + u32 ix = id0 >> 6; + u64 bits = meta->bitmap[ix] & ~((1lu << (id0 & 0x3fu)) - 1lu); + if (bits) + return (ix << 6) + (u32)__builtin_ctzl(bits); + + while (++ix < WH_BMNR) { + bits = meta->bitmap[ix]; + if (bits) + return (ix << 6) + (u32)__builtin_ctzl(bits); + } + + return WH_FO; +} + +// find the highest bit that is lower than the id0 +// return WH_FO if not found + static inline u32 +wormmeta_bm_lt(const struct wormmeta * const meta, const u32 id0) +{ + u32 ix = id0 >> 6; + u64 bits = meta->bitmap[ix] & ((1lu << (id0 & 0x3fu)) - 1lu); + if (bits) + return (ix << 6) + 63u - (u32)__builtin_clzl(bits); + + while (ix--) { + bits = meta->bitmap[ix]; + if (bits) + return (ix << 6) + 63u - (u32)__builtin_clzl(bits); + } + + return WH_FO; +} + +// meta must be a full node + static inline void +wormmeta_bm_clear(struct wormmeta * const meta, const u32 id) +{ + debug_assert(wormmeta_bitmin_load(meta) < wormmeta_bitmax_load(meta)); + meta->bitmap[id >> 6u] &= (~(1lu << (id & 0x3fu))); + + // min + if (id == wormmeta_bitmin_load(meta)) + wormmeta_bitmin_store(meta, wormmeta_bm_gt(meta, id)); + + // max + if (id == wormmeta_bitmax_load(meta)) + wormmeta_bitmax_store(meta, wormmeta_bm_lt(meta, id)); +} +// }}} meta-bitmap + +// key/prefix {{{ + static inline u16 +wormhole_pkey(const u32 hash32) +{ + const u16 pkey0 = ((u16)hash32) ^ ((u16)(hash32 >> 16)); + return pkey0 ? pkey0 : 1; +} + + static inline u32 +wormhole_bswap(const u32 hashlo) +{ + return __builtin_bswap32(hashlo); +} + + static inline bool +wormhole_key_meta_match(const struct kv * const key, const struct wormmeta * const meta) +{ + return (key->klen == wormmeta_klen_load(meta)) + && (!memcmp(key->kv, wormmeta_keyref_load(meta)->kv, key->klen)); +} + +// called by get_kref_slot + static inline bool +wormhole_kref_meta_match(const struct kref * const kref, + const struct wormmeta * const meta) +{ + return (kref->len == wormmeta_klen_load(meta)) + && (!memcmp(kref->ptr, wormmeta_keyref_load(meta)->kv, kref->len)); +} + +// called from meta_down ... get_kref1_slot +// will access rmost, prefetching is effective here + static inline bool +wormhole_kref1_meta_match(const struct kref * const kref, + const struct wormmeta * const meta, const u8 cid) +{ + const u8 * const keybuf = wormmeta_keyref_load(meta)->kv; + const u32 plen = kref->len; + return ((plen + 1) == wormmeta_klen_load(meta)) + && (!memcmp(kref->ptr, keybuf, plen)) + && (keybuf[plen] == cid); +} + +// warning: be careful with buffer overflow + static inline void +wormhole_prefix(struct kv * const pfx, const u32 klen) +{ + pfx->klen = klen; + kv_update_hash(pfx); +} + +// for split + static inline void +wormhole_prefix_inc1(struct kv * const pfx) +{ + pfx->hashlo = crc32c_u8(pfx->hashlo, pfx->kv[pfx->klen]); + pfx->klen++; +} + +// meta_lcp only + static inline void +wormhole_kref_inc(struct kref * const kref, const u32 len0, + const u32 crc, const u32 inc) +{ + kref->hash32 = crc32c_inc(kref->ptr + len0, inc, crc); + kref->len = len0 + inc; +} + +// meta_lcp only + static inline void +wormhole_kref_inc_123(struct kref * const kref, const u32 len0, + const u32 crc, const u32 inc) +{ + kref->hash32 = crc32c_inc_123(kref->ptr + len0, inc, crc); + kref->len = len0 + inc; +} +// }}} key/prefix + +// alloc {{{ + static inline struct kv * +wormhole_alloc_akey(const size_t klen) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + return malloc(sizeof(struct kv) + klen); +} + + static inline void +wormhole_free_akey(struct kv * const akey) +{ + free(akey); +} + + static inline struct kv * +wormhole_alloc_mkey(const size_t klen) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return NULL; +#endif + return malloc(sizeof(struct kv) + klen); +} + + static inline void +wormhole_free_mkey(struct kv * const mkey) +{ + free(mkey); +} + + static struct wormleaf * +wormleaf_alloc(struct wormhole * const map, struct wormleaf * const prev, + struct wormleaf * const next, struct kv * const anchor) +{ + struct wormleaf * const leaf = slab_alloc_safe(map->slab_leaf); + if (leaf == NULL) + return NULL; + + rwlock_init(&(leaf->leaflock)); + spinlock_init(&(leaf->sortlock)); + + // keep the old version; new version will be assigned by split functions + //leaf->lv = 0; + + leaf->prev = prev; + leaf->next = next; + leaf->anchor = anchor; + + leaf->nr_keys = 0; + leaf->nr_sorted = 0; + + // hs requires zero init. + memset(leaf->hs, 0, sizeof(leaf->hs[0]) * WH_KPN); + return leaf; +} + + static void +wormleaf_free(struct slab * const slab, struct wormleaf * const leaf) +{ + debug_assert(leaf->leaflock.opaque == 0); + wormhole_free_akey(leaf->anchor); + slab_free_safe(slab, leaf); +} + + static struct wormmeta * +wormmeta_alloc(struct wormhmap * const hmap, struct wormleaf * const lrmost, + struct kv * const keyref, const u32 alen, const u32 bit) +{ + debug_assert(alen <= UINT16_MAX); + debug_assert(lrmost && keyref); + + struct wormmeta * const meta = slab_alloc_unsafe(hmap->slab1); + if (meta == NULL) + return NULL; + + wormmeta_init(meta, lrmost, keyref, alen, bit); + return meta; +} + + static inline bool +wormhole_slab_reserve(struct wormhole * const map, const u32 nr) +{ +#ifdef ALLOCFAIL + if (alloc_fail()) + return false; +#endif + for (u32 i = 0; i < 2; i++) { + if (!(map->hmap2[i].slab1 && map->hmap2[i].slab2)) + continue; + if (!slab_reserve_unsafe(map->hmap2[i].slab1, nr)) + return false; + if (!slab_reserve_unsafe(map->hmap2[i].slab2, nr)) + return false; + } + return true; +} + + static void +wormmeta_keyref_release(struct wormmeta * const meta) +{ + struct kv * const keyref = wormmeta_keyref_load(meta); + debug_assert(keyref->refcnt); + keyref->refcnt--; + if (keyref->refcnt == 0) + wormhole_free_mkey(keyref); +} + + static void +wormmeta_free(struct wormhmap * const hmap, struct wormmeta * const meta) +{ + wormmeta_keyref_release(meta); + slab_free_unsafe(hmap->slab1, meta); +} +// }}} alloc + +// lock {{{ + static void +wormleaf_lock_write(struct wormleaf * const leaf, struct wormref * const ref) +{ + if (!rwlock_trylock_write(&(leaf->leaflock))) { + wormhole_park(ref); + rwlock_lock_write(&(leaf->leaflock)); + wormhole_resume(ref); + } +} + + static void +wormleaf_lock_read(struct wormleaf * const leaf, struct wormref * const ref) +{ + if (!rwlock_trylock_read(&(leaf->leaflock))) { + wormhole_park(ref); + rwlock_lock_read(&(leaf->leaflock)); + wormhole_resume(ref); + } +} + + static void +wormleaf_unlock_write(struct wormleaf * const leaf) +{ + rwlock_unlock_write(&(leaf->leaflock)); +} + + static void +wormleaf_unlock_read(struct wormleaf * const leaf) +{ + rwlock_unlock_read(&(leaf->leaflock)); +} + + static void +wormhmap_lock(struct wormhole * const map, struct wormref * const ref) +{ + if (!rwlock_trylock_write(&(map->metalock))) { + wormhole_park(ref); + rwlock_lock_write(&(map->metalock)); + wormhole_resume(ref); + } +} + + static inline void +wormhmap_unlock(struct wormhole * const map) +{ + rwlock_unlock_write(&(map->metalock)); +} +// }}} lock + +// hmap-version {{{ + static inline struct wormhmap * +wormhmap_switch(struct wormhole * const map, struct wormhmap * const hmap) +{ + return (hmap == map->hmap2) ? (hmap + 1) : (hmap - 1); +} + + static inline struct wormhmap * +wormhmap_load(struct wormhole * const map) +{ + return (struct wormhmap *)atomic_load_explicit(&(map->hmap_ptr), MO_ACQUIRE); +} + + static inline void +wormhmap_store(struct wormhole * const map, struct wormhmap * const hmap) +{ + atomic_store_explicit(&(map->hmap_ptr), (u64)hmap, MO_RELEASE); +} + + static inline u64 +wormhmap_version_load(const struct wormhmap * const hmap) +{ + // no concurrent access + return atomic_load_explicit(&(hmap->hv), MO_ACQUIRE); +} + + static inline void +wormhmap_version_store(struct wormhmap * const hmap, const u64 v) +{ + atomic_store_explicit(&(hmap->hv), v, MO_RELEASE); +} + + static inline u64 +wormleaf_version_load(struct wormleaf * const leaf) +{ + return atomic_load_explicit(&(leaf->lv), MO_CONSUME); +} + + static inline void +wormleaf_version_store(struct wormleaf * const leaf, const u64 v) +{ + atomic_store_explicit(&(leaf->lv), v, MO_RELEASE); +} +// }}} hmap-version + +// co {{{ + static inline void +wormhmap_prefetch_pmap(const struct wormhmap * const hmap, const u32 idx) +{ +#if defined(CORR) + (void)hmap; + (void)idx; +#else + cpu_prefetch0(&(hmap->pmap[idx])); +#endif +} + + static inline struct wormmeta * +wormhmap_get_meta(const struct wormhmap * const hmap, const u32 mid, const u32 i) +{ + struct wormmeta * const meta = hmap->pmap[mid].e[i]; +#if defined(CORR) + cpu_prefetch0(meta); + corr_yield(); +#endif + return meta; +} + + static inline void +wormleaf_prefetch(struct wormleaf * const leaf, const u32 hashlo) +{ + const u32 i = wormhole_pkey(hashlo) / WH_HDIV; +#if defined(CORR) + cpu_prefetch0(leaf); + cpu_prefetch0(&(leaf->hs[i-4])); + cpu_prefetch0(&(leaf->hs[i+4])); + corr_yield(); +#else + cpu_prefetch0(&(leaf->hs[i])); +#endif +} + + static inline bool +wormhole_kref_kv_match(const struct kref * const key, const struct kv * const curr) +{ +#if defined(CORR) + const u8 * const ptr = (typeof(ptr))curr; + cpu_prefetch0(ptr); + cpu_prefetch0(ptr + 64); + if (key->len > 56) { + cpu_prefetch0(ptr + 128); + cpu_prefetch0(ptr + 192); + } + corr_yield(); +#endif + return kref_kv_match(key, curr); +} + + static inline void +wormhole_qsbr_update_pause(struct wormref * const ref, const u64 v) +{ + qsbr_update(&ref->qref, v); +#if defined(CORR) + corr_yield(); +#endif +} +// }}} co + +// }}} helpers + +// hmap {{{ +// hmap is the MetaTrieHT of Wormhole + static bool +wormhmap_init(struct wormhmap * const hmap, struct kv * const pbuf) +{ + const u64 wsize = sizeof(hmap->wmap[0]) * WH_HMAPINIT_SIZE; + const u64 psize = sizeof(hmap->pmap[0]) * WH_HMAPINIT_SIZE; + u64 msize = wsize + psize; + u8 * const mem = pages_alloc_best(msize, true, &msize); + if (mem == NULL) + return false; + + hmap->pmap = (typeof(hmap->pmap))mem; + hmap->wmap = (typeof(hmap->wmap))(mem + psize); + hmap->msize = msize; + hmap->mask = WH_HMAPINIT_SIZE - 1; + wormhmap_version_store(hmap, 0); + hmap->maxplen = 0; + hmap->pbuf = pbuf; + return true; +} + + static inline void +wormhmap_deinit(struct wormhmap * const hmap) +{ + if (hmap->pmap) { + pages_unmap(hmap->pmap, hmap->msize); + hmap->pmap = NULL; + hmap->wmap = NULL; + } +} + + static inline m128 +wormhmap_zero(void) +{ +#if defined(__x86_64__) + return _mm_setzero_si128(); +#elif defined(__aarch64__) + return vdupq_n_u8(0); +#endif +} + + static inline m128 +wormhmap_m128_pkey(const u16 pkey) +{ +#if defined(__x86_64__) + return _mm_set1_epi16((short)pkey); +#elif defined(__aarch64__) + return vreinterpretq_u8_u16(vdupq_n_u16(pkey)); +#endif +} + + static inline u32 +wormhmap_match_mask(const struct wormslot * const s, const m128 skey) +{ +#if defined(__x86_64__) + const m128 sv = _mm_load_si128((const void *)s); + return (u32)_mm_movemask_epi8(_mm_cmpeq_epi16(skey, sv)); +#elif defined(__aarch64__) + const uint16x8_t sv = vld1q_u16((const u16 *)s); // load 16 bytes at s + const uint16x8_t cmp = vceqq_u16(vreinterpretq_u16_u8(skey), sv); // cmpeq => 0xffff or 0x0000 + static const uint16x8_t mbits = {0x3, 0xc, 0x30, 0xc0, 0x300, 0xc00, 0x3000, 0xc000}; + return (u32)vaddvq_u16(vandq_u16(cmp, mbits)); +#endif +} + + static inline bool +wormhmap_match_any(const struct wormslot * const s, const m128 skey) +{ +#if defined(__x86_64__) + return wormhmap_match_mask(s, skey) != 0; +#elif defined(__aarch64__) + const uint16x8_t sv = vld1q_u16((const u16 *)s); // load 16 bytes at s + const uint16x8_t cmp = vceqq_u16(vreinterpretq_u16_u8(skey), sv); // cmpeq => 0xffff or 0x0000 + return vaddvq_u32(vreinterpretq_u32_u16(cmp)) != 0; +#endif +} + +// meta_lcp only + static inline bool +wormhmap_peek(const struct wormhmap * const hmap, const u32 hash32) +{ + const m128 sk = wormhmap_m128_pkey(wormhole_pkey(hash32)); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + return wormhmap_match_any(&(hmap->wmap[midx]), sk) + || wormhmap_match_any(&(hmap->wmap[midy]), sk); +} + + static inline struct wormmeta * +wormhmap_get_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kv * const key) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + if (likely(wormhole_key_meta_match(key, meta))) + return meta; + mask ^= (3u << i2); + } + return NULL; +} + + static struct wormmeta * +wormhmap_get(const struct wormhmap * const hmap, const struct kv * const key) +{ + const u32 hash32 = key->hashlo; + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_slot(hmap, midx, skey, key); + if (r) + return r; + return wormhmap_get_slot(hmap, midy, skey, key); +} + +// for meta_lcp only + static inline struct wormmeta * +wormhmap_get_kref_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kref * const kref) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + if (likely(wormhole_kref_meta_match(kref, meta))) + return meta; + + mask ^= (3u << i2); + } + return NULL; +} + +// for meta_lcp only + static inline struct wormmeta * +wormhmap_get_kref(const struct wormhmap * const hmap, const struct kref * const kref) +{ + const u32 hash32 = kref->hash32; + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_kref_slot(hmap, midx, skey, kref); + if (r) + return r; + return wormhmap_get_kref_slot(hmap, midy, skey, kref); +} + +// for meta_down only + static inline struct wormmeta * +wormhmap_get_kref1_slot(const struct wormhmap * const hmap, const u32 mid, + const m128 skey, const struct kref * const kref, const u8 cid) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta * const meta = wormhmap_get_meta(hmap, mid, i2>>1); + //cpu_prefetch0(wormmeta_rmost_load(meta)); // will access + if (likely(wormhole_kref1_meta_match(kref, meta, cid))) + return meta; + + mask ^= (3u << i2); + } + return NULL; +} + +// for meta_down only + static inline struct wormmeta * +wormhmap_get_kref1(const struct wormhmap * const hmap, + const struct kref * const kref, const u8 cid) +{ + const u32 hash32 = crc32c_u8(kref->hash32, cid); + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + + struct wormmeta * const r = wormhmap_get_kref1_slot(hmap, midx, skey, kref, cid); + if (r) + return r; + return wormhmap_get_kref1_slot(hmap, midy, skey, kref, cid); +} + + static inline u32 +wormhmap_slot_count(const struct wormslot * const slot) +{ + const u32 mask = wormhmap_match_mask(slot, wormhmap_zero()); + return mask ? ((u32)__builtin_ctz(mask) >> 1) : 8; +} + + static inline void +wormhmap_squeeze(const struct wormhmap * const hmap) +{ + struct wormslot * const wmap = hmap->wmap; + struct wormmbkt * const pmap = hmap->pmap; + const u32 mask = hmap->mask; + const u64 nrs64 = ((u64)(hmap->mask)) + 1; // must use u64; u32 can overflow + for (u64 si64 = 0; si64 < nrs64; si64++) { // # of buckets + const u32 si = (u32)si64; + u32 ci = wormhmap_slot_count(&(wmap[si])); + for (u32 ei = ci - 1; ei < WH_BKT_NR; ei--) { + struct wormmeta * const meta = pmap[si].e[ei]; + const u32 sj = wormmeta_hash32_load(meta) & mask; // first hash + if (sj == si) + continue; + + // move + const u32 ej = wormhmap_slot_count(&(wmap[sj])); + if (ej < WH_BKT_NR) { // has space at home location + wmap[sj].t[ej] = wmap[si].t[ei]; + pmap[sj].e[ej] = pmap[si].e[ei]; + const u32 ni = ci - 1; + if (ei < ni) { + wmap[si].t[ei] = wmap[si].t[ni]; + pmap[si].e[ei] = pmap[si].e[ni]; + } + wmap[si].t[ni] = 0; + pmap[si].e[ni] = NULL; + ci--; + } + } + } +} + + static void +wormhmap_expand(struct wormhmap * const hmap) +{ + // sync expand + const u32 mask0 = hmap->mask; + if (mask0 == UINT32_MAX) + debug_die(); + const u32 nr0 = mask0 + 1; + const u32 mask1 = mask0 + nr0; + const u64 nr1 = ((u64)nr0) << 1; // must use u64; u32 can overflow + const u64 wsize = nr1 * sizeof(hmap->wmap[0]); + const u64 psize = nr1 * sizeof(hmap->pmap[0]); + u64 msize = wsize + psize; + u8 * mem = pages_alloc_best(msize, true, &msize); + if (mem == NULL) { + // We are at a very deep call stack from wormhole_put(). + // Gracefully handling the failure requires lots of changes. + // Currently we simply wait for available memory + // TODO: gracefully return with insertion failure + char ts[64]; + time_stamp(ts, 64); + fprintf(stderr, "%s %s sleep-wait for memory allocation %lukB\n", + __func__, ts, msize >> 10); + do { + sleep(1); + mem = pages_alloc_best(msize, true, &msize); + } while (mem == NULL); + time_stamp(ts, 64); + fprintf(stderr, "%s %s memory allocation done\n", __func__, ts); + } + + struct wormhmap hmap1 = *hmap; + hmap1.pmap = (typeof(hmap1.pmap))mem; + hmap1.wmap = (typeof(hmap1.wmap))(mem + psize); + hmap1.msize = msize; + hmap1.mask = mask1; + + const struct wormslot * const wmap0 = hmap->wmap; + const struct wormmbkt * const pmap0 = hmap->pmap; + + for (u32 s = 0; s < nr0; s++) { + const struct wormmbkt * const bkt = &pmap0[s]; + for (u32 i = 0; (i < WH_BKT_NR) && bkt->e[i]; i++) { + const struct wormmeta * const meta = bkt->e[i]; + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 idx0 = hash32 & mask0; + const u32 idx1 = ((idx0 == s) ? hash32 : wormhole_bswap(hash32)) & mask1; + + const u32 n = wormhmap_slot_count(&(hmap1.wmap[idx1])); + debug_assert(n < 8); + hmap1.wmap[idx1].t[n] = wmap0[s].t[i]; + hmap1.pmap[idx1].e[n] = bkt->e[i]; + } + } + pages_unmap(hmap->pmap, hmap->msize); + hmap->pmap = hmap1.pmap; + hmap->wmap = hmap1.wmap; + hmap->msize = hmap1.msize; + hmap->mask = hmap1.mask; + wormhmap_squeeze(hmap); +} + + static bool +wormhmap_cuckoo(struct wormhmap * const hmap, const u32 mid0, + struct wormmeta * const e0, const u16 s0, const u32 depth) +{ + const u32 ii = wormhmap_slot_count(&(hmap->wmap[mid0])); + if (ii < WH_BKT_NR) { + hmap->wmap[mid0].t[ii] = s0; + hmap->pmap[mid0].e[ii] = e0; + return true; + } else if (depth == 0) { + return false; + } + + // depth > 0 + struct wormmbkt * const bkt = &(hmap->pmap[mid0]); + u16 * const sv = &(hmap->wmap[mid0].t[0]); + for (u32 i = 0; i < WH_BKT_NR; i++) { + const struct wormmeta * const meta = bkt->e[i]; + debug_assert(meta); + const u32 hash32 = wormmeta_hash32_load(meta); + + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const u32 midt = (midx != mid0) ? midx : midy; + if (midt != mid0) { // possible + // no penalty if moving someone back to its 1st hash location + const u32 depth1 = (midt == midx) ? depth : (depth - 1); + if (wormhmap_cuckoo(hmap, midt, bkt->e[i], sv[i], depth1)) { + bkt->e[i] = e0; + sv[i] = s0; + return true; + } + } + } + return false; +} + + static void +wormhmap_set(struct wormhmap * const hmap, struct wormmeta * const meta) +{ + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 midx = hash32 & hmap->mask; + wormhmap_prefetch_pmap(hmap, midx); + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + wormhmap_prefetch_pmap(hmap, midy); + const u16 pkey = wormhole_pkey(hash32); + // insert with cuckoo + if (likely(wormhmap_cuckoo(hmap, midx, meta, pkey, 1))) + return; + if (wormhmap_cuckoo(hmap, midy, meta, pkey, 1)) + return; + if (wormhmap_cuckoo(hmap, midx, meta, pkey, 2)) + return; + + // expand + wormhmap_expand(hmap); + + wormhmap_set(hmap, meta); +} + + static bool +wormhmap_del_slot(struct wormhmap * const hmap, const u32 mid, + const struct wormmeta * const meta, const m128 skey) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + const struct wormmeta * const meta1 = hmap->pmap[mid].e[i2>>1]; + if (likely(meta == meta1)) { + const u32 i = i2 >> 1; + const u32 j = wormhmap_slot_count(&(hmap->wmap[mid])) - 1; + hmap->wmap[mid].t[i] = hmap->wmap[mid].t[j]; + hmap->pmap[mid].e[i] = hmap->pmap[mid].e[j]; + hmap->wmap[mid].t[j] = 0; + hmap->pmap[mid].e[j] = NULL; + return true; + } + mask -= (3u << i2); + } + return false; +} + + static bool +wormhmap_del(struct wormhmap * const hmap, const struct wormmeta * const meta) +{ + const u32 hash32 = wormmeta_hash32_load(meta); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + return wormhmap_del_slot(hmap, midx, meta, skey) + || wormhmap_del_slot(hmap, midy, meta, skey); +} + + static bool +wormhmap_replace_slot(struct wormhmap * const hmap, const u32 mid, + const struct wormmeta * const old, const m128 skey, struct wormmeta * const new) +{ + u32 mask = wormhmap_match_mask(&(hmap->wmap[mid]), skey); + while (mask) { + const u32 i2 = (u32)__builtin_ctz(mask); + struct wormmeta ** const pslot = &hmap->pmap[mid].e[i2>>1]; + if (likely(old == *pslot)) { + *pslot = new; + return true; + } + mask -= (3u << i2); + } + return false; +} + + static bool +wormhmap_replace(struct wormhmap * const hmap, const struct wormmeta * const old, struct wormmeta * const new) +{ + const u32 hash32 = wormmeta_hash32_load(old); + const u32 midx = hash32 & hmap->mask; + const u32 midy = wormhole_bswap(hash32) & hmap->mask; + const m128 skey = wormhmap_m128_pkey(wormhole_pkey(hash32)); + return wormhmap_replace_slot(hmap, midx, old, skey, new) + || wormhmap_replace_slot(hmap, midy, old, skey, new); +} +// }}} hmap + +// create {{{ +// it's unsafe + static bool +wormhole_create_leaf0(struct wormhole * const map) +{ + const bool sr = wormhole_slab_reserve(map, 1); + if (unlikely(!sr)) + return false; + + // create leaf of empty key + struct kv * const anchor = wormhole_alloc_akey(0); + if (anchor == NULL) + return false; + kv_dup2(kv_null(), anchor); + + struct wormleaf * const leaf0 = wormleaf_alloc(map, NULL, NULL, anchor); + if (leaf0 == NULL) { + wormhole_free_akey(anchor); + return false; + } + + struct kv * const mkey = wormhole_alloc_mkey(0); + if (mkey == NULL) { + wormleaf_free(map->slab_leaf, leaf0); + return false; + } + + wormhole_prefix(mkey, 0); + mkey->refcnt = 0; + // create meta of empty key + for (u32 i = 0; i < 2; i++) { + if (map->hmap2[i].slab1) { + struct wormmeta * const m0 = wormmeta_alloc(&map->hmap2[i], leaf0, mkey, 0, WH_FO); + debug_assert(m0); // already reserved enough + wormhmap_set(&(map->hmap2[i]), m0); + } + } + + map->leaf0 = leaf0; + return true; +} + + static struct wormhole * +wormhole_create_internal(const struct kvmap_mm * const mm, const u32 nh) +{ + struct wormhole * const map = yalloc(sizeof(*map)); + if (map == NULL) + return NULL; + memset(map, 0, sizeof(*map)); + // mm + map->mm = mm ? (*mm) : kvmap_mm_dup; + + // pbuf for meta-merge + map->pbuf = yalloc(1lu << 16); // 64kB + if (map->pbuf == NULL) + goto fail; + + // hmap + for (u32 i = 0; i < nh; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (!wormhmap_init(hmap, map->pbuf)) + goto fail; + + hmap->slab1 = slab_create(sizeof(struct wormmeta), WH_SLABMETA_SIZE); + if (hmap->slab1 == NULL) + goto fail; + + hmap->slab2 = slab_create(sizeof(struct wormmeta) + (sizeof(u64) * WH_BMNR), WH_SLABMETA_SIZE); + if (hmap->slab2 == NULL) + goto fail; + } + + // leaf slab + map->slab_leaf = slab_create(sizeof(struct wormleaf), WH_SLABLEAF_SIZE); + if (map->slab_leaf == NULL) + goto fail; + + // qsbr + map->qsbr = qsbr_create(); + if (map->qsbr == NULL) + goto fail; + + // leaf0 + if (!wormhole_create_leaf0(map)) + goto fail; + + rwlock_init(&(map->metalock)); + wormhmap_store(map, &map->hmap2[0]); + return map; + +fail: + if (map->qsbr) + qsbr_destroy(map->qsbr); + + if (map->slab_leaf) + slab_destroy(map->slab_leaf); + + for (u32 i = 0; i < nh; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (hmap->slab1) + slab_destroy(hmap->slab1); + if (hmap->slab2) + slab_destroy(hmap->slab2); + wormhmap_deinit(hmap); + } + + if (map->pbuf) + free(map->pbuf); + + free(map); + return NULL; +} + + struct wormhole * +wormhole_create(const struct kvmap_mm * const mm) +{ + return wormhole_create_internal(mm, 2); +} + + struct wormhole * +whunsafe_create(const struct kvmap_mm * const mm) +{ + return wormhole_create_internal(mm, 1); +} +// }}} create + +// jump {{{ + +// lcp {{{ +// search in the hash table for the Longest Prefix Match of the search key +// The corresponding wormmeta node is returned and the LPM is recorded in kref + static struct wormmeta * +wormhole_meta_lcp(const struct wormhmap * const hmap, struct kref * const kref, const u32 klen) +{ + // invariant: lo <= lcp < (lo + gd) + // ending condition: gd == 1 + u32 gd = (hmap->maxplen < klen ? hmap->maxplen : klen) + 1u; + u32 lo = 0; + u32 loh = KV_CRC32C_SEED; + +#define META_LCP_GAP_1 ((7u)) + while (META_LCP_GAP_1 < gd) { + const u32 inc = gd >> 3 << 2; // x4 + const u32 hash32 = crc32c_inc_x4(kref->ptr + lo, inc, loh); + if (wormhmap_peek(hmap, hash32)) { + loh = hash32; + lo += inc; + gd -= inc; + } else { + gd = inc; + } + } + + while (1 < gd) { + const u32 inc = gd >> 1; + const u32 hash32 = crc32c_inc_123(kref->ptr + lo, inc, loh); + if (wormhmap_peek(hmap, hash32)) { + loh = hash32; + lo += inc; + gd -= inc; + } else { + gd = inc; + } + } +#undef META_LCP_GAP_1 + + kref->hash32 = loh; + kref->len = lo; + struct wormmeta * ret = wormhmap_get_kref(hmap, kref); + if (likely(ret != NULL)) + return ret; + + gd = lo; + lo = 0; + loh = KV_CRC32C_SEED; + +#define META_LCP_GAP_2 ((5u)) + while (META_LCP_GAP_2 < gd) { + const u32 inc = (gd * 3) >> 2; + wormhole_kref_inc(kref, lo, loh, inc); + struct wormmeta * const tmp = wormhmap_get_kref(hmap, kref); + if (tmp) { + loh = kref->hash32; + lo += inc; + gd -= inc; + ret = tmp; + if (wormmeta_bm_test(tmp, kref->ptr[lo])) { + loh = crc32c_u8(loh, kref->ptr[lo]); + lo++; + gd--; + ret = NULL; + } else { + gd = 1; + break; + } + } else { + gd = inc; + } + } + + while (1 < gd) { + const u32 inc = (gd * 3) >> 2; + wormhole_kref_inc_123(kref, lo, loh, inc); + struct wormmeta * const tmp = wormhmap_get_kref(hmap, kref); + if (tmp) { + loh = kref->hash32; + lo += inc; + gd -= inc; + ret = tmp; + if (wormmeta_bm_test(tmp, kref->ptr[lo])) { + loh = crc32c_u8(loh, kref->ptr[lo]); + lo++; + gd--; + ret = NULL; + } else { + break; + } + } else { + gd = inc; + } + } +#undef META_LCP_GAP_2 + + if (kref->len != lo) { + kref->hash32 = loh; + kref->len = lo; + } + if (ret == NULL) + ret = wormhmap_get_kref(hmap, kref); + debug_assert(ret); + return ret; +} +// }}} lcp + +// down {{{ + static struct wormleaf * +wormhole_meta_down(const struct wormhmap * const hmap, const struct kref * const lcp, + const struct wormmeta * const meta, const u32 klen) +{ + if (likely(lcp->len < klen)) { // partial match + const u32 id0 = lcp->ptr[lcp->len]; + if (wormmeta_bitmin_load(meta) > id0) { // no left, don't care about right. + return wormmeta_lpath_load(meta); + } else if (wormmeta_bitmax_load(meta) < id0) { // has left sibling but no right sibling + return wormmeta_rmost_load(meta); + } else { // has both (expensive) + return wormmeta_rmost_load(wormhmap_get_kref1(hmap, lcp, (u8)wormmeta_bm_lt(meta, id0))); + } + } else { // lcp->len == klen + return wormmeta_lpath_load(meta); + } +} +// }}} down + +// jump-rw {{{ + static struct wormleaf * +wormhole_jump_leaf(const struct wormhmap * const hmap, const struct kref * const key) +{ + struct kref kref = {.ptr = key->ptr}; + debug_assert(kv_crc32c(key->ptr, key->len) == key->hash32); + + const struct wormmeta * const meta = wormhole_meta_lcp(hmap, &kref, key->len); + return wormhole_meta_down(hmap, &kref, meta, key->len); +} + + static struct wormleaf * +wormhole_jump_leaf_read(struct wormref * const ref, const struct kref * const key) +{ + struct wormhole * const map = ref->map; +#pragma nounroll + do { + const struct wormhmap * const hmap = wormhmap_load(map); + const u64 v = wormhmap_version_load(hmap); + qsbr_update(&ref->qref, v); + struct wormleaf * const leaf = wormhole_jump_leaf(hmap, key); + wormleaf_prefetch(leaf, key->hash32); +#pragma nounroll + do { + if (rwlock_trylock_read_nr(&(leaf->leaflock), 64)) { + if (wormleaf_version_load(leaf) <= v) + return leaf; + wormleaf_unlock_read(leaf); + break; + } + // v1 is loaded before lv; if lv <= v, can update v1 without redo jump + const u64 v1 = wormhmap_version_load(wormhmap_load(map)); + if (wormleaf_version_load(leaf) > v) + break; + wormhole_qsbr_update_pause(ref, v1); + } while (true); + } while (true); +} + + static struct wormleaf * +wormhole_jump_leaf_write(struct wormref * const ref, const struct kref * const key) +{ + struct wormhole * const map = ref->map; +#pragma nounroll + do { + const struct wormhmap * const hmap = wormhmap_load(map); + const u64 v = wormhmap_version_load(hmap); + qsbr_update(&ref->qref, v); + struct wormleaf * const leaf = wormhole_jump_leaf(hmap, key); + wormleaf_prefetch(leaf, key->hash32); +#pragma nounroll + do { + if (rwlock_trylock_write_nr(&(leaf->leaflock), 64)) { + if (wormleaf_version_load(leaf) <= v) + return leaf; + wormleaf_unlock_write(leaf); + break; + } + // v1 is loaded before lv; if lv <= v, can update v1 without redo jump + const u64 v1 = wormhmap_version_load(wormhmap_load(map)); + if (wormleaf_version_load(leaf) > v) + break; + wormhole_qsbr_update_pause(ref, v1); + } while (true); + } while (true); +} +// }}} jump-rw + +// }}} jump + +// leaf-read {{{ + static inline struct kv * +wormleaf_kv_at_ih(const struct wormleaf * const leaf, const u32 ih) +{ + return u64_to_ptr(leaf->hs[ih].e3); +} + + static inline struct kv * +wormleaf_kv_at_is(const struct wormleaf * const leaf, const u32 is) +{ + return u64_to_ptr(leaf->hs[leaf->ss[is]].e3); +} + + static inline void +wormleaf_prefetch_ss(const struct wormleaf * const leaf) +{ + for (u32 i = 0; i < WH_KPN; i+=64) + cpu_prefetch0(&leaf->ss[i]); +} + +// leaf must have been sorted +// return the key at [i] as if k1 has been inserted into leaf; i <= leaf->nr_sorted + static const struct kv * +wormleaf_kv_at_is1(const struct wormleaf * const leaf, const u32 i, const u32 is1, const struct kv * const k1) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + debug_assert(is1 <= leaf->nr_sorted); + if (i < is1) + return wormleaf_kv_at_is(leaf, i); + else if (i > is1) + return wormleaf_kv_at_is(leaf, i-1); + else // i == is1 + return k1; +} + + + +// fast point-lookup +// returns WH_KPN if not found + static u32 +wormleaf_match_hs(const struct wormleaf * const leaf, const struct kref * const key) +{ + const u16 pkey = wormhole_pkey(key->hash32); + const u32 i0 = pkey / WH_HDIV; + const struct entry13 * const hs = leaf->hs; + + if (hs[i0].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i0].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i0; + } + if (hs[i0].e1 == 0) + return WH_KPN; + + // search left + u32 i = i0 - 1; + while (i < WH_KPN) { + if (hs[i].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i; + } else if (hs[i].e1 < pkey) { + break; + } + i--; + } + + // search right + i = i0 + 1; + while (i < WH_KPN) { + if (hs[i].e1 == pkey) { + struct kv * const curr = u64_to_ptr(hs[i].e3); + if (likely(wormhole_kref_kv_match(key, curr))) + return i; + } else if ((hs[i].e1 > pkey) || (hs[i].e1 == 0)) { + break; + } + i++; + } + + + // not found + return WH_KPN; +} + +// search for an existing entry in hs + static u32 +wormleaf_search_ih(const struct wormleaf * const leaf, const struct entry13 e) +{ + const u16 pkey = e.e1; + const u32 i0 = pkey / WH_HDIV; + const struct entry13 * const hs = leaf->hs; + const struct entry13 e0 = hs[i0]; + + if (e0.v64 == e.v64) + return i0; + + if (e0.e1 == 0) + return WH_KPN; + + // search left + u32 i = i0 - 1; + while (i < WH_KPN) { + const struct entry13 ei = hs[i]; + if (ei.v64 == e.v64) { + return i; + } else if (ei.e1 < pkey) { + break; + } + i--; + } + + // search right + i = i0 + 1; + while (i < WH_KPN) { + const struct entry13 ei = hs[i]; + if (ei.v64 == e.v64) { + return i; + } else if ((ei.e1 > pkey) || (ei.e1 == 0)) { + break; + } + i++; + } + + // not found + return WH_KPN; +} + +// search for an existing entry in ss + static u32 +wormleaf_search_is(const struct wormleaf * const leaf, const u8 ih) +{ +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 i1 = _mm256_set1_epi8((char)ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const u32 mask = (u32)_mm256_movemask_epi8(_mm256_cmpeq_epi8(sv, i1)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#else // SSE4.2 + const m128 i1 = _mm_set1_epi8((char)ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const u32 mask = (u32)_mm_movemask_epi8(_mm_cmpeq_epi8(sv, i1)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#endif // __AVX2__ +#elif defined(__aarch64__) + static const m128 vtbl = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; + static const uint16x8_t mbits = {0x0101, 0x0202, 0x0404, 0x0808, 0x1010, 0x2020, 0x4040, 0x8080}; + const m128 i1 = vdupq_n_u8(ih); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 cmp = vceqq_u8(vld1q_u8(leaf->ss+i), i1); // cmpeq => 0xff or 0x00 + const m128 cmp1 = vqtbl1q_u8(cmp, vtbl); // reorder + const u32 mask = (u32)vaddvq_u16(vandq_u8(vreinterpretq_u16_u8(cmp1), mbits)); + if (mask) + return i + (u32)__builtin_ctz(mask); + } +#endif // __x86_64__ + debug_die(); +} + +// assumes there in no duplicated keys +// search the first key that is >= the given key +// return 0 .. nr_sorted + static u32 +wormleaf_search_ss(const struct wormleaf * const leaf, const struct kref * const key) +{ + u32 lo = 0; + u32 hi = leaf->nr_sorted; + while ((lo + 2) < hi) { + const u32 i = (lo + hi) >> 1; + const struct kv * const curr = wormleaf_kv_at_is(leaf, i); + cpu_prefetch0(curr); + cpu_prefetch0(leaf->hs + leaf->ss[(lo + i) >> 1]); + cpu_prefetch0(leaf->hs + leaf->ss[(i + 1 + hi) >> 1]); + const int cmp = kref_kv_compare(key, curr); + debug_assert(cmp != 0); + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + + while (lo < hi) { + const u32 i = (lo + hi) >> 1; + const struct kv * const curr = wormleaf_kv_at_is(leaf, i); + const int cmp = kref_kv_compare(key, curr); + debug_assert(cmp != 0); + if (cmp < 0) + hi = i; + else + lo = i + 1; + } + return lo; +} + + static u32 +wormleaf_seek(const struct wormleaf * const leaf, const struct kref * const key) +{ + debug_assert(leaf->nr_sorted == leaf->nr_keys); + wormleaf_prefetch_ss(leaf); // effective for both hit and miss + const u32 ih = wormleaf_match_hs(leaf, key); + if (ih < WH_KPN) { // hit + return wormleaf_search_is(leaf, (u8)ih); + } else { // miss, binary search for gt + return wormleaf_search_ss(leaf, key); + } +} + +// same to search_sorted but the target is very likely beyond the end + static u32 +wormleaf_seek_end(const struct wormleaf * const leaf, const struct kref * const key) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + if (leaf->nr_sorted) { + const int cmp = kref_kv_compare(key, wormleaf_kv_at_is(leaf, leaf->nr_sorted-1)); + if (cmp > 0) + return leaf->nr_sorted; + else if (cmp == 0) + return leaf->nr_sorted - 1; + else + return wormleaf_seek(leaf, key); + } else { + return 0; + } +} +// }}} leaf-read + +// leaf-write {{{ + static void +wormleaf_sort_m2(struct wormleaf * const leaf, const u32 n1, const u32 n2) +{ + if (n1 == 0 || n2 == 0) + return; // no need to sort + + u8 * const ss = leaf->ss; + u8 et[WH_KPN/2]; // min(n1,n2) < KPN/2 + if (n1 <= n2) { // merge left + memcpy(et, &(ss[0]), sizeof(ss[0]) * n1); + u8 * eo = ss; + u8 * e1 = et; // size == n1 + u8 * e2 = &(ss[n1]); // size == n2 + const u8 * const z1 = e1 + n1; + const u8 * const z2 = e2 + n2; + while ((e1 < z1) && (e2 < z2)) { + const int cmp = kv_compare(wormleaf_kv_at_ih(leaf, *e1), wormleaf_kv_at_ih(leaf, *e2)); + if (cmp < 0) + *(eo++) = *(e1++); + else if (cmp > 0) + *(eo++) = *(e2++); + else + debug_die(); + + if (eo == e2) + break; // finish early + } + if (eo < e2) + memcpy(eo, e1, sizeof(*eo) * (size_t)(e2 - eo)); + } else { + memcpy(et, &(ss[n1]), sizeof(ss[0]) * n2); + u8 * eo = &(ss[n1 + n2 - 1]); // merge backwards + u8 * e1 = &(ss[n1 - 1]); // size == n1 + u8 * e2 = &(et[n2 - 1]); // size == n2 + const u8 * const z1 = e1 - n1; + const u8 * const z2 = e2 - n2; + while ((e1 > z1) && (e2 > z2)) { + const int cmp = kv_compare(wormleaf_kv_at_ih(leaf, *e1), wormleaf_kv_at_ih(leaf, *e2)); + if (cmp < 0) + *(eo--) = *(e2--); + else if (cmp > 0) + *(eo--) = *(e1--); + else + debug_die(); + + if (eo == e1) + break; + } + if (eo > e1) + memcpy(e1 + 1, et, sizeof(*eo) * (size_t)(eo - e1)); + } +} + +#if defined(__linux__) + static int +wormleaf_ss_cmp(const void * const p1, const void * const p2, void * priv) +{ + const struct kv * const k1 = wormleaf_kv_at_ih(priv, *(const u8 *)p1); + const struct kv * const k2 = wormleaf_kv_at_ih(priv, *(const u8 *)p2); + return kv_compare(k1, k2); +} +#else // (FreeBSD and APPLE only) + static int +wormleaf_ss_cmp(void * priv, const void * const p1, const void * const p2) +{ + const struct kv * const k1 = wormleaf_kv_at_ih(priv, *(const u8 *)p1); + const struct kv * const k2 = wormleaf_kv_at_ih(priv, *(const u8 *)p2); + return kv_compare(k1, k2); +} +#endif // __linux__ + + static inline void +wormleaf_sort_range(struct wormleaf * const leaf, const u32 i0, const u32 nr) +{ +#if defined(__linux__) + qsort_r(&(leaf->ss[i0]), nr, sizeof(leaf->ss[0]), wormleaf_ss_cmp, leaf); +#else // (FreeBSD and APPLE only) + qsort_r(&(leaf->ss[i0]), nr, sizeof(leaf->ss[0]), leaf, wormleaf_ss_cmp); +#endif // __linux__ +} + +// make sure all keys are sorted in a leaf node + static void +wormleaf_sync_sorted(struct wormleaf * const leaf) +{ + const u32 s = leaf->nr_sorted; + const u32 n = leaf->nr_keys; + if (s == n) + return; + + wormleaf_sort_range(leaf, s, n - s); + // merge-sort inplace + wormleaf_sort_m2(leaf, s, n - s); + leaf->nr_sorted = n; +} + +// shift a sequence of entries on hs and update the corresponding ss values + static void +wormleaf_shift_inc(struct wormleaf * const leaf, const u32 to, const u32 from, const u32 nr) +{ + debug_assert(to == (from+1)); + struct entry13 * const hs = leaf->hs; + memmove(&(hs[to]), &(hs[from]), sizeof(hs[0]) * nr); + +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 ones = _mm256_set1_epi8(1); + const m256 addx = _mm256_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m256 cmpx = _mm256_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const m256 add1 = _mm256_and_si256(_mm256_cmpgt_epi8(_mm256_add_epi8(sv, addx), cmpx), ones); + _mm256_store_si256((m256 *)(leaf->ss+i), _mm256_add_epi8(sv, add1)); + } +#else // SSE4.2 + const m128 ones = _mm_set1_epi8(1); + const m128 addx = _mm_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m128 cmpx = _mm_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const m128 add1 = _mm_and_si128(_mm_cmpgt_epi8(_mm_add_epi8(sv, addx), cmpx), ones); + _mm_store_si128((m128 *)(leaf->ss+i), _mm_add_epi8(sv, add1)); + } +#endif // __AVX2__ +#elif defined(__aarch64__) // __x86_64__ + // aarch64 + const m128 subx = vdupq_n_u8((u8)from); + const m128 cmpx = vdupq_n_u8((u8)nr); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = vld1q_u8(leaf->ss+i); + const m128 add1 = vshrq_n_u8(vcltq_u8(vsubq_u8(sv, subx), cmpx), 7); + vst1q_u8(leaf->ss+i, vaddq_u8(sv, add1)); + } +#endif // __x86_64__ +} + + static void +wormleaf_shift_dec(struct wormleaf * const leaf, const u32 to, const u32 from, const u32 nr) +{ + debug_assert(to == (from-1)); + struct entry13 * const hs = leaf->hs; + memmove(&(hs[to]), &(hs[from]), sizeof(hs[0]) * nr); + +#if defined(__x86_64__) + // TODO: avx512 +#if defined(__AVX2__) + const m256 ones = _mm256_set1_epi8(1); + const m256 addx = _mm256_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m256 cmpx = _mm256_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m256)) { + const m256 sv = _mm256_load_si256((m256 *)(leaf->ss+i)); + const m256 add1 = _mm256_and_si256(_mm256_cmpgt_epi8(_mm256_add_epi8(sv, addx), cmpx), ones); + _mm256_store_si256((m256 *)(leaf->ss+i), _mm256_sub_epi8(sv, add1)); + } +#else // SSE4.2 + const m128 ones = _mm_set1_epi8(1); + const m128 addx = _mm_set1_epi8((char)(u8)(INT8_MAX + 1 - from - nr)); + const m128 cmpx = _mm_set1_epi8((char)(u8)(INT8_MAX - nr)); + for (u32 i = 0; i < leaf->nr_keys; i += 16) { + const m128 sv = _mm_load_si128((m128 *)(leaf->ss+i)); + const m128 add1 = _mm_and_si128(_mm_cmpgt_epi8(_mm_add_epi8(sv, addx), cmpx), ones); + _mm_store_si128((m128 *)(leaf->ss+i), _mm_sub_epi8(sv, add1)); + } +#endif // __AVX2__ +#elif defined(__aarch64__) // __x86_64__ + // aarch64 + const m128 subx = vdupq_n_u8((u8)from); + const m128 cmpx = vdupq_n_u8((u8)nr); + for (u32 i = 0; i < leaf->nr_keys; i += sizeof(m128)) { + const m128 sv = vld1q_u8(leaf->ss+i); + const m128 add1 = vshrq_n_u8(vcltq_u8(vsubq_u8(sv, subx), cmpx), 7); + vst1q_u8(leaf->ss+i, vsubq_u8(sv, add1)); + } +#endif // __x86_64__ +} + +// insert hs and also shift ss + static u32 +wormleaf_insert_hs(struct wormleaf * const leaf, const struct entry13 e) +{ + struct entry13 * const hs = leaf->hs; + const u16 pkey = e.e1; + const u32 i0 = pkey / WH_HDIV; + if (hs[i0].e1 == 0) { // insert + hs[i0] = e; + return i0; + } + + // find left-most insertion point + u32 i = i0; + while (i && hs[i-1].e1 && (hs[i-1].e1 >= pkey)) + i--; + while ((i < WH_KPN) && hs[i].e1 && (hs[i].e1 < pkey)) // stop at >= or empty + i++; + const u32 il = --i; // i in [0, KPN] + + // find left empty slot + if (i > (i0 - 1)) + i = i0 - 1; + while ((i < WH_KPN) && hs[i].e1) + i--; + const u32 el = i; // el < i0 or el is invalid (>= KPN) + + // find right-most insertion point. + i = il + 1; + while ((i < WH_KPN) && hs[i].e1 && (hs[i].e1 == pkey)) + i++; + const u32 ir = i; // ir >= il, in [0, KPN] + + // find right empty slot + if (i < (i0 + 1)) + i = i0 + 1; + while ((i < WH_KPN) && hs[i].e1) + i++; + const u32 er = i; // er > i0 or el is invalid (>= KPN) + + // el <= il < ir <= er (if < WH_KPN) + const u32 dl = (el < WH_KPN) ? (il - el) : WH_KPN; + const u32 dr = (er < WH_KPN) ? (er - ir) : WH_KPN; + if (dl <= dr) { // push left + debug_assert(dl < WH_KPN); + if (dl) + wormleaf_shift_dec(leaf, el, el+1, dl); + hs[il] = e; + return il; + } else { + debug_assert(dr < WH_KPN); + if (dr) + wormleaf_shift_inc(leaf, ir+1, ir, dr); + hs[ir] = e; + return ir; + } +} + + static void +wormleaf_insert_e13(struct wormleaf * const leaf, const struct entry13 e) +{ + // insert to hs and fix all existing is + const u32 ih = wormleaf_insert_hs(leaf, e); + debug_assert(ih < WH_KPN); + // append the new is + leaf->ss[leaf->nr_keys] = (u8)ih; + // fix nr + leaf->nr_keys++; +} + + static void +wormleaf_insert(struct wormleaf * const leaf, const struct kv * const new) +{ + debug_assert(new->hash == kv_crc32c_extend(kv_crc32c(new->kv, new->klen))); + debug_assert(leaf->nr_keys < WH_KPN); + + // insert + const struct entry13 e = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + const u32 nr0 = leaf->nr_keys; + wormleaf_insert_e13(leaf, e); + + // optimize for seq insertion + if (nr0 == leaf->nr_sorted) { + if (nr0) { + const struct kv * const kvn = wormleaf_kv_at_is(leaf, nr0 - 1); + if (kv_compare(new, kvn) > 0) + leaf->nr_sorted = nr0 + 1; + } else { + leaf->nr_sorted = 1; + } + } +} + + static void +wormleaf_pull_ih(struct wormleaf * const leaf, const u32 ih) +{ + struct entry13 * const hs = leaf->hs; + // try left + u32 i = ih - 1; + while ((i < WH_KPN) && hs[i].e1 && ((hs[i].e1 / WH_HDIV) > i)) + i--; + + if ((++i) < ih) { + wormleaf_shift_inc(leaf, i+1, i, ih - i); + leaf->hs[i].v64 = 0; + return; + } + + // try right + i = ih + 1; + while ((i < WH_KPN) && hs[i].e1 && ((hs[i].e1 / WH_HDIV) < i)) + i++; + + if ((--i) > ih) { + wormleaf_shift_dec(leaf, ih, ih+1, i - ih); + hs[i].v64 = 0; + } + // hs[ih] may still be 0 +} + +// internal only + static struct kv * +wormleaf_remove(struct wormleaf * const leaf, const u32 ih, const u32 is) +{ + // ss + leaf->ss[is] = leaf->ss[leaf->nr_keys - 1]; + if (leaf->nr_sorted > is) + leaf->nr_sorted = is; + + // ret + struct kv * const victim = wormleaf_kv_at_ih(leaf, ih); + // hs + leaf->hs[ih].v64 = 0; + leaf->nr_keys--; + // use magnet + wormleaf_pull_ih(leaf, ih); + return victim; +} + +// remove key from leaf but do not call free + static struct kv * +wormleaf_remove_ih(struct wormleaf * const leaf, const u32 ih) +{ + // remove from ss + const u32 is = wormleaf_search_is(leaf, (u8)ih); + debug_assert(is < leaf->nr_keys); + return wormleaf_remove(leaf, ih, is); +} + + static struct kv * +wormleaf_remove_is(struct wormleaf * const leaf, const u32 is) +{ + return wormleaf_remove(leaf, leaf->ss[is], is); +} + +// for delr (delete-range) + static void +wormleaf_delete_range(struct wormhole * const map, struct wormleaf * const leaf, + const u32 i0, const u32 end) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + for (u32 i = end; i > i0; i--) { + const u32 ir = i - 1; + struct kv * const victim = wormleaf_remove_is(leaf, ir); + map->mm.free(victim, map->mm.priv); + } +} + +// return the old kv; the caller should free the old kv + static struct kv * +wormleaf_update(struct wormleaf * const leaf, const u32 ih, const struct kv * const new) +{ + debug_assert(new->hash == kv_crc32c_extend(kv_crc32c(new->kv, new->klen))); + // search entry in ss (is) + struct kv * const old = wormleaf_kv_at_ih(leaf, ih); + debug_assert(old); + + entry13_update_e3(&leaf->hs[ih], (u64)new); + return old; +} +// }}} leaf-write + +// leaf-split {{{ +// It only works correctly in cut_search +// quickly tell if a cut between k1 and k2 can achieve a specific anchor-key length + static bool +wormhole_split_cut_alen_check(const u32 alen, const struct kv * const k1, const struct kv * const k2) +{ + debug_assert(k2->klen >= alen); + return (k1->klen < alen) || (k1->kv[alen - 1] != k2->kv[alen - 1]); +} + +// return the number of keys that should go to leaf1 +// assert(r > 0 && r <= nr_keys) +// (1) r < is1, anchor key is ss[r-1]:ss[r] +// (2) r == is1: anchor key is ss[r-1]:new +// (3) r == is1+1: anchor key is new:ss[r-1] (ss[r-1] is the ss[r] on the logically sorted array) +// (4) r > is1+1: anchor key is ss[r-2]:ss[r-1] (ss[r-2] is the [r-1] on the logically sorted array) +// edge cases: +// (case 2) is1 == nr_keys: r = nr_keys; ss[r-1]:new +// (case 3) is1 == 0, r == 1; new:ss[0] +// return 1..WH_KPN + static u32 +wormhole_split_cut_search1(struct wormleaf * const leaf, u32 l, u32 h, const u32 is1, const struct kv * const new) +{ + debug_assert(leaf->nr_keys == leaf->nr_sorted); + debug_assert(leaf->nr_keys); + debug_assert(l < h && h <= leaf->nr_sorted); + + const struct kv * const kl0 = wormleaf_kv_at_is1(leaf, l, is1, new); + const struct kv * const kh0 = wormleaf_kv_at_is1(leaf, h, is1, new); + const u32 alen = kv_key_lcp(kl0, kh0) + 1; + if (unlikely(alen > UINT16_MAX)) + return WH_KPN2; + + const u32 target = leaf->next ? WH_MID : WH_KPN_MRG; + while ((l + 1) < h) { + const u32 m = (l + h + 1) >> 1; + if (m <= target) { // try right + const struct kv * const k1 = wormleaf_kv_at_is1(leaf, m, is1, new); + const struct kv * const k2 = wormleaf_kv_at_is1(leaf, h, is1, new); + if (wormhole_split_cut_alen_check(alen, k1, k2)) + l = m; + else + h = m; + } else { // try left + const struct kv * const k1 = wormleaf_kv_at_is1(leaf, l, is1, new); + const struct kv * const k2 = wormleaf_kv_at_is1(leaf, m, is1, new); + if (wormhole_split_cut_alen_check(alen, k1, k2)) + h = m; + else + l = m; + } + } + return h; +} + + static void +wormhole_split_leaf_move1(struct wormleaf * const leaf1, struct wormleaf * const leaf2, + const u32 cut, const u32 is1, const struct kv * const new) +{ + const u32 nr_keys = leaf1->nr_keys; + const struct entry13 e1 = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + struct entry13 es[WH_KPN]; + + if (cut <= is1) { // e1 goes to leaf2 + // leaf2 + for (u32 i = cut; i < is1; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + wormleaf_insert_e13(leaf2, e1); + + for (u32 i = is1; i < nr_keys; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + // leaf1 + for (u32 i = 0; i < cut; i++) + es[i] = leaf1->hs[leaf1->ss[i]]; + + } else { // e1 goes to leaf1 + // leaf2 + for (u32 i = cut - 1; i < nr_keys; i++) + wormleaf_insert_e13(leaf2, leaf1->hs[leaf1->ss[i]]); + + // leaf1 + for (u32 i = 0; i < is1; i++) + es[i] = leaf1->hs[leaf1->ss[i]]; + + es[is1] = e1; + + for (u32 i = is1 + 1; i < cut; i++) + es[i] = leaf1->hs[leaf1->ss[i - 1]]; + } + + leaf2->nr_sorted = leaf2->nr_keys; + + memset(leaf1->hs, 0, sizeof(leaf1->hs[0]) * WH_KPN); + leaf1->nr_keys = 0; + for (u32 i = 0; i < cut; i++) + wormleaf_insert_e13(leaf1, es[i]); + leaf1->nr_sorted = cut; + debug_assert((leaf1->nr_sorted + leaf2->nr_sorted) == (nr_keys + 1)); +} + +// create an anchor for leaf-split + static struct kv * +wormhole_split_alloc_anchor(const struct kv * const key1, const struct kv * const key2) +{ + const u32 alen = kv_key_lcp(key1, key2) + 1; + debug_assert(alen <= key2->klen); + + struct kv * const anchor = wormhole_alloc_akey(alen); + if (anchor) + kv_refill(anchor, key2->kv, alen, NULL, 0); + return anchor; +} + +// leaf1 is locked +// split leaf1 into leaf1+leaf2; insert new into leaf1 or leaf2, return leaf2 + static struct wormleaf * +wormhole_split_leaf(struct wormhole * const map, struct wormleaf * const leaf1, struct kv * const new) +{ + wormleaf_sync_sorted(leaf1); + struct kref kref_new; + kref_ref_kv(&kref_new, new); + const u32 is1 = wormleaf_search_ss(leaf1, &kref_new); // new should be inserted at [is1] + const u32 cut = wormhole_split_cut_search1(leaf1, 0, leaf1->nr_keys, is1, new); + if (unlikely(cut == WH_KPN2)) + return NULL; + + // anchor of leaf2 + debug_assert(cut && (cut <= leaf1->nr_keys)); + const struct kv * const key1 = wormleaf_kv_at_is1(leaf1, cut - 1, is1, new); + const struct kv * const key2 = wormleaf_kv_at_is1(leaf1, cut, is1, new); + struct kv * const anchor2 = wormhole_split_alloc_anchor(key1, key2); + if (unlikely(anchor2 == NULL)) // anchor alloc failed + return NULL; + + // create leaf2 with anchor2 + struct wormleaf * const leaf2 = wormleaf_alloc(map, leaf1, leaf1->next, anchor2); + if (unlikely(leaf2 == NULL)) { + wormhole_free_akey(anchor2); + return NULL; + } + + // split_hmap will unlock the leaf nodes; must move now + wormhole_split_leaf_move1(leaf1, leaf2, cut, is1, new); + // leaf1 and leaf2 should be sorted after split + debug_assert(leaf1->nr_keys == leaf1->nr_sorted); + debug_assert(leaf2->nr_keys == leaf2->nr_sorted); + + return leaf2; +} +// }}} leaf-split + +// leaf-merge {{{ +// MERGE is the only operation that deletes a leaf node (leaf2). +// It ALWAYS merges the right node into the left node even if the left is empty. +// This requires both of their writer locks to be acquired. +// This allows iterators to safely probe the next node (but not backwards). +// In other words, if either the reader or the writer lock of node X has been acquired: +// X->next (the pointer) cannot be changed by any other thread. +// X->next cannot be deleted. +// But the content in X->next can still be changed. + static bool +wormleaf_merge(struct wormleaf * const leaf1, struct wormleaf * const leaf2) +{ + debug_assert((leaf1->nr_keys + leaf2->nr_keys) <= WH_KPN); + const bool leaf1_sorted = leaf1->nr_keys == leaf1->nr_sorted; + + for (u32 i = 0; i < leaf2->nr_keys; i++) + wormleaf_insert_e13(leaf1, leaf2->hs[leaf2->ss[i]]); + if (leaf1_sorted) + leaf1->nr_sorted += leaf2->nr_sorted; + return true; +} + +// for undoing insertion under split_meta failure; leaf2 is still local +// remove the new key; merge keys in leaf2 into leaf1; free leaf2 + static void +wormleaf_split_undo(struct wormhole * const map, struct wormleaf * const leaf1, + struct wormleaf * const leaf2, struct kv * const new) +{ + if (new) { + const struct entry13 e = entry13(wormhole_pkey(new->hashlo), ptr_to_u64(new)); + const u32 im1 = wormleaf_search_ih(leaf1, e); + if (im1 < WH_KPN) { + (void)wormleaf_remove_ih(leaf1, im1); + } else { // not found in leaf1; search leaf2 + const u32 im2 = wormleaf_search_ih(leaf2, e); + debug_assert(im2 < WH_KPN); + (void)wormleaf_remove_ih(leaf2, im2); + } + } + // this merge must succeed + if (!wormleaf_merge(leaf1, leaf2)) + debug_die(); + // Keep this to avoid triggering false alarm in wormleaf_free + leaf2->leaflock.opaque = 0; + wormleaf_free(map->slab_leaf, leaf2); +} +// }}} leaf-merge + +// get/probe {{{ + struct kv * +wormhole_get(struct wormref * const ref, const struct kref * const key, struct kv * const out) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 i = wormleaf_match_hs(leaf, key); + struct kv * const tmp = (i < WH_KPN) ? ref->map->mm.out(wormleaf_kv_at_ih(leaf, i), out) : NULL; + wormleaf_unlock_read(leaf); + return tmp; +} + + struct kv * +whsafe_get(struct wormref * const ref, const struct kref * const key, struct kv * const out) +{ + wormhole_resume(ref); + struct kv * const ret = wormhole_get(ref, key, out); + wormhole_park(ref); + return ret; +} + + struct kv * +whunsafe_get(struct wormhole * const map, const struct kref * const key, struct kv * const out) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 i = wormleaf_match_hs(leaf, key); + return (i < WH_KPN) ? map->mm.out(wormleaf_kv_at_ih(leaf, i), out) : NULL; +} + + bool +wormhole_probe(struct wormref * const ref, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 i = wormleaf_match_hs(leaf, key); + wormleaf_unlock_read(leaf); + return i < WH_KPN; +} + + bool +whsafe_probe(struct wormref * const ref, const struct kref * const key) +{ + wormhole_resume(ref); + const bool r = wormhole_probe(ref, key); + wormhole_park(ref); + return r; +} + + bool +whunsafe_probe(struct wormhole * const map, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + return wormleaf_match_hs(leaf, key) < WH_KPN; +} +// }}} get/probe + +// meta-split {{{ +// duplicate from meta1; only has one bit but will soon add a new bit + static struct wormmeta * +wormmeta_expand(struct wormhmap * const hmap, struct wormmeta * const meta1) +{ + struct wormmeta * const meta2 = slab_alloc_unsafe(hmap->slab2); + if (meta2 == NULL) + return NULL; + + memcpy(meta2, meta1, sizeof(*meta1)); + for (u32 i = 0; i < WH_BMNR; i++) + meta2->bitmap[i] = 0; + const u32 bitmin = wormmeta_bitmin_load(meta1); + debug_assert(bitmin == wormmeta_bitmax_load(meta1)); + debug_assert(bitmin < WH_FO); + // set the only bit + meta2->bitmap[bitmin >> 6u] |= (1lu << (bitmin & 0x3fu)); + + wormhmap_replace(hmap, meta1, meta2); + slab_free_unsafe(hmap->slab1, meta1); + return meta2; +} + + static struct wormmeta * +wormmeta_bm_set_helper(struct wormhmap * const hmap, struct wormmeta * const meta, const u32 id) +{ + debug_assert(id < WH_FO); + const u32 bitmin = wormmeta_bitmin_load(meta); + const u32 bitmax = wormmeta_bitmax_load(meta); + if (bitmin < bitmax) { // already in full size + wormmeta_bm_set(meta, id); + return meta; + } else if (id == bitmin) { // do nothing + return meta; + } else if (bitmin == WH_FO) { // add the first bit + wormmeta_bitmin_store(meta, id); + wormmeta_bitmax_store(meta, id); + return meta; + } else { // need to expand + struct wormmeta * const meta2 = wormmeta_expand(hmap, meta); + wormmeta_bm_set(meta2, id); + return meta2; + } +} + +// return true if a new node is created + static void +wormmeta_split_touch(struct wormhmap * const hmap, struct kv * const mkey, + struct wormleaf * const leaf, const u32 alen) +{ + struct wormmeta * meta = wormhmap_get(hmap, mkey); + if (meta) { + if (mkey->klen < alen) + meta = wormmeta_bm_set_helper(hmap, meta, mkey->kv[mkey->klen]); + if (wormmeta_lmost_load(meta) == leaf->next) + wormmeta_lmost_store(meta, leaf); + else if (wormmeta_rmost_load(meta) == leaf->prev) + wormmeta_rmost_store(meta, leaf); + } else { // create new node + const u32 bit = (mkey->klen < alen) ? mkey->kv[mkey->klen] : WH_FO; + meta = wormmeta_alloc(hmap, leaf, mkey, alen, bit); + debug_assert(meta); + wormhmap_set(hmap, meta); + } +} + + static void +wormmeta_lpath_update(struct wormhmap * const hmap, const struct kv * const a1, const struct kv * const a2, + struct wormleaf * const lpath) +{ + struct kv * const pbuf = hmap->pbuf; + kv_dup2_key(a2, pbuf); + + // only need to update a2's own branch + u32 i = kv_key_lcp(a1, a2) + 1; + debug_assert(i <= pbuf->klen); + wormhole_prefix(pbuf, i); + while (i < a2->klen) { + debug_assert(i <= hmap->maxplen); + struct wormmeta * const meta = wormhmap_get(hmap, pbuf); + debug_assert(meta); + wormmeta_lpath_store(meta, lpath); + + i++; + wormhole_prefix_inc1(pbuf); + } +} + +// for leaf1, a leaf2 is already linked at its right side. +// this function updates the meta-map by moving leaf1 and hooking leaf2 at correct positions + static void +wormmeta_split(struct wormhmap * const hmap, struct wormleaf * const leaf, + struct kv * const mkey) +{ + // left branches + struct wormleaf * const prev = leaf->prev; + struct wormleaf * const next = leaf->next; + u32 i = next ? kv_key_lcp(prev->anchor, next->anchor) : 0; + const u32 alen = leaf->anchor->klen; + + // save klen + const u32 mklen = mkey->klen; + wormhole_prefix(mkey, i); + do { + wormmeta_split_touch(hmap, mkey, leaf, alen); + if (i >= alen) + break; + i++; + wormhole_prefix_inc1(mkey); + } while (true); + + // adjust maxplen; i is the plen of the last _touch() + if (i > hmap->maxplen) + hmap->maxplen = i; + debug_assert(i <= UINT16_MAX); + + // restore klen + mkey->klen = mklen; + + if (next) + wormmeta_lpath_update(hmap, leaf->anchor, next->anchor, leaf); +} + +// all locks will be released before returning + static bool +wormhole_split_meta(struct wormref * const ref, struct wormleaf * const leaf2) +{ + struct kv * const mkey = wormhole_alloc_mkey(leaf2->anchor->klen); + if (unlikely(mkey == NULL)) + return false; + kv_dup2_key(leaf2->anchor, mkey); + + struct wormhole * const map = ref->map; + // metalock + wormhmap_lock(map, ref); + + // check slab reserve + const bool sr = wormhole_slab_reserve(map, mkey->klen); + if (unlikely(!sr)) { + wormhmap_unlock(map); + wormhole_free_mkey(mkey); + return false; + } + + struct wormhmap * const hmap0 = wormhmap_load(map); + struct wormhmap * const hmap1 = wormhmap_switch(map, hmap0); + + // link + struct wormleaf * const leaf1 = leaf2->prev; + leaf1->next = leaf2; + if (leaf2->next) + leaf2->next->prev = leaf2; + + // update versions + const u64 v1 = wormhmap_version_load(hmap0) + 1; + wormleaf_version_store(leaf1, v1); + wormleaf_version_store(leaf2, v1); + wormhmap_version_store(hmap1, v1); + + wormmeta_split(hmap1, leaf2, mkey); + + qsbr_update(&ref->qref, v1); + + // switch hmap + wormhmap_store(map, hmap1); + + wormleaf_unlock_write(leaf1); + wormleaf_unlock_write(leaf2); + + qsbr_wait(map->qsbr, v1); + + wormmeta_split(hmap0, leaf2, mkey); + + wormhmap_unlock(map); + + if (mkey->refcnt == 0) // this is possible + wormhole_free_mkey(mkey); + return true; +} + +// all locks (metalock + leaflocks) will be released before returning +// leaf1->lock (write) is already taken + static bool +wormhole_split_insert(struct wormref * const ref, struct wormleaf * const leaf1, + struct kv * const new) +{ + struct wormleaf * const leaf2 = wormhole_split_leaf(ref->map, leaf1, new); + if (unlikely(leaf2 == NULL)) { + wormleaf_unlock_write(leaf1); + return false; + } + + rwlock_lock_write(&(leaf2->leaflock)); + const bool rsm = wormhole_split_meta(ref, leaf2); + if (unlikely(!rsm)) { + // undo insertion & merge; free leaf2 + wormleaf_split_undo(ref->map, leaf1, leaf2, new); + wormleaf_unlock_write(leaf1); + } + return rsm; +} + + static bool +whunsafe_split_meta(struct wormhole * const map, struct wormleaf * const leaf2) +{ + struct kv * const mkey = wormhole_alloc_mkey(leaf2->anchor->klen); + if (unlikely(mkey == NULL)) + return false; + kv_dup2_key(leaf2->anchor, mkey); + + const bool sr = wormhole_slab_reserve(map, mkey->klen); + if (unlikely(!sr)) { + wormhmap_unlock(map); + wormhole_free_mkey(mkey); + return false; + } + + // link + leaf2->prev->next = leaf2; + if (leaf2->next) + leaf2->next->prev = leaf2; + + for (u32 i = 0; i < 2; i++) + if (map->hmap2[i].pmap) + wormmeta_split(&(map->hmap2[i]), leaf2, mkey); + if (mkey->refcnt == 0) // this is possible + wormhole_free_mkey(mkey); + return true; +} + + static bool +whunsafe_split_insert(struct wormhole * const map, struct wormleaf * const leaf1, + struct kv * const new) +{ + struct wormleaf * const leaf2 = wormhole_split_leaf(map, leaf1, new); + if (unlikely(leaf2 == NULL)) + return false; + + const bool rsm = whunsafe_split_meta(map, leaf2); + if (unlikely(!rsm)) // undo insertion, merge, free leaf2 + wormleaf_split_undo(map, leaf1, leaf2, new); + + return rsm; +} +// }}} meta-split + +// meta-merge {{{ +// now it only contains one bit + static struct wormmeta * +wormmeta_shrink(struct wormhmap * const hmap, struct wormmeta * const meta2) +{ + debug_assert(wormmeta_bitmin_load(meta2) == wormmeta_bitmax_load(meta2)); + struct wormmeta * const meta1 = slab_alloc_unsafe(hmap->slab1); + if (meta1 == NULL) + return NULL; + + memcpy(meta1, meta2, sizeof(*meta1)); + + wormhmap_replace(hmap, meta2, meta1); + slab_free_unsafe(hmap->slab2, meta2); + return meta1; +} + + static void +wormmeta_bm_clear_helper(struct wormhmap * const hmap, struct wormmeta * const meta, const u32 id) +{ + if (wormmeta_bitmin_load(meta) == wormmeta_bitmax_load(meta)) { + debug_assert(wormmeta_bitmin_load(meta) < WH_FO); + wormmeta_bitmin_store(meta, WH_FO); + wormmeta_bitmax_store(meta, WH_FO); + } else { // has more than 1 bit + wormmeta_bm_clear(meta, id); + if (wormmeta_bitmin_load(meta) == wormmeta_bitmax_load(meta)) + wormmeta_shrink(hmap, meta); + } +} + +// all locks held + static void +wormmeta_merge(struct wormhmap * const hmap, struct wormleaf * const leaf) +{ + // leaf->next is the new next after merge, which can be NULL + struct wormleaf * const prev = leaf->prev; + struct wormleaf * const next = leaf->next; + struct kv * const pbuf = hmap->pbuf; + kv_dup2_key(leaf->anchor, pbuf); + u32 i = (prev && next) ? kv_key_lcp(prev->anchor, next->anchor) : 0; + const u32 alen = leaf->anchor->klen; + wormhole_prefix(pbuf, i); + struct wormmeta * parent = NULL; + do { + debug_assert(i <= hmap->maxplen); + struct wormmeta * meta = wormhmap_get(hmap, pbuf); + if (wormmeta_lmost_load(meta) == wormmeta_rmost_load(meta)) { // delete single-child + debug_assert(wormmeta_lmost_load(meta) == leaf); + const u32 bitmin = wormmeta_bitmin_load(meta); + wormhmap_del(hmap, meta); + wormmeta_free(hmap, meta); + if (parent) { + wormmeta_bm_clear_helper(hmap, parent, pbuf->kv[i-1]); + parent = NULL; + } + if (bitmin == WH_FO) // no child + break; + } else { // adjust lmost rmost + if (wormmeta_lmost_load(meta) == leaf) + wormmeta_lmost_store(meta, next); + else if (wormmeta_rmost_load(meta) == leaf) + wormmeta_rmost_store(meta, prev); + parent = meta; + } + + if (i >= alen) + break; + i++; + wormhole_prefix_inc1(pbuf); + } while (true); + + if (next) + wormmeta_lpath_update(hmap, leaf->anchor, next->anchor, prev); +} + +// all locks (metalock + two leaflock) will be released before returning +// merge leaf2 to leaf1, removing all metadata to leaf2 and leaf2 itself + static void +wormhole_meta_merge(struct wormref * const ref, struct wormleaf * const leaf1, + struct wormleaf * const leaf2, const bool unlock_leaf1) +{ + debug_assert(leaf1->next == leaf2); + debug_assert(leaf2->prev == leaf1); + struct wormhole * const map = ref->map; + + wormhmap_lock(map, ref); + + struct wormhmap * const hmap0 = wormhmap_load(map); + struct wormhmap * const hmap1 = wormhmap_switch(map, hmap0); + const u64 v1 = wormhmap_version_load(hmap0) + 1; + + leaf1->next = leaf2->next; + if (leaf2->next) + leaf2->next->prev = leaf1; + + wormleaf_version_store(leaf1, v1); + wormleaf_version_store(leaf2, v1); + wormhmap_version_store(hmap1, v1); + + wormmeta_merge(hmap1, leaf2); + + qsbr_update(&ref->qref, v1); + + // switch hmap + wormhmap_store(map, hmap1); + + if (unlock_leaf1) + wormleaf_unlock_write(leaf1); + wormleaf_unlock_write(leaf2); + + qsbr_wait(map->qsbr, v1); + + wormmeta_merge(hmap0, leaf2); + // leaf2 is now safe to be removed + wormleaf_free(map->slab_leaf, leaf2); + wormhmap_unlock(map); +} + +// caller must acquire leaf->wlock and next->wlock +// all locks will be released when this function returns + static bool +wormhole_meta_leaf_merge(struct wormref * const ref, struct wormleaf * const leaf) +{ + struct wormleaf * const next = leaf->next; + debug_assert(next); + + // double check + if ((leaf->nr_keys + next->nr_keys) <= WH_KPN) { + if (wormleaf_merge(leaf, next)) { + wormhole_meta_merge(ref, leaf, next, true); + return true; + } + } + // merge failed but it's fine + wormleaf_unlock_write(leaf); + wormleaf_unlock_write(next); + return false; +} + + static void +whunsafe_meta_leaf_merge(struct wormhole * const map, struct wormleaf * const leaf1, + struct wormleaf * const leaf2) +{ + debug_assert(leaf1->next == leaf2); + debug_assert(leaf2->prev == leaf1); + if (!wormleaf_merge(leaf1, leaf2)) + return; + + leaf1->next = leaf2->next; + if (leaf2->next) + leaf2->next->prev = leaf1; + for (u32 i = 0; i < 2; i++) + if (map->hmap2[i].pmap) + wormmeta_merge(&(map->hmap2[i]), leaf2); + wormleaf_free(map->slab_leaf, leaf2); +} +// }}} meta-merge + +// put {{{ + bool +wormhole_put(struct wormref * const ref, struct kv * const kv) +{ + // we always allocate a new item on SET + // future optimizations may perform in-place update + struct wormhole * const map = ref->map; + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + const struct kref kref = kv_kref(new); + + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, &kref); + // update + const u32 im = wormleaf_match_hs(leaf, &kref); + if (im < WH_KPN) { + struct kv * const old = wormleaf_update(leaf, im, new); + wormleaf_unlock_write(leaf); + map->mm.free(old, map->mm.priv); + return true; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + wormleaf_unlock_write(leaf); + return true; + } + + // split_insert changes hmap + // all locks should be released in wormhole_split_insert() + const bool rsi = wormhole_split_insert(ref, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +whsafe_put(struct wormref * const ref, struct kv * const kv) +{ + wormhole_resume(ref); + const bool r = wormhole_put(ref, kv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_put(struct wormhole * const map, struct kv * const kv) +{ + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + const struct kref kref = kv_kref(new); + + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, &kref); + // update + const u32 im = wormleaf_match_hs(leaf, &kref); + if (im < WH_KPN) { // overwrite + struct kv * const old = wormleaf_update(leaf, im, new); + map->mm.free(old, map->mm.priv); + return true; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + return true; + } + + // split_insert changes hmap + const bool rsi = whunsafe_split_insert(map, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +wormhole_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + struct wormhole * const map = ref->map; + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, kref); + // update + const u32 im = wormleaf_match_hs(leaf, kref); + if (im < WH_KPN) { // update + struct kv * const kv0 = wormleaf_kv_at_ih(leaf, im); + struct kv * const kv = uf(kv0, priv); + if ((kv == kv0) || (kv == NULL)) { // no replacement + wormleaf_unlock_write(leaf); + return true; + } + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) { // mm error + wormleaf_unlock_write(leaf); + return false; + } + + struct kv * const old = wormleaf_update(leaf, im, new); + wormleaf_unlock_write(leaf); + map->mm.free(old, map->mm.priv); + return true; + } + + struct kv * const kv = uf(NULL, priv); + if (kv == NULL) { // nothing to be inserted + wormleaf_unlock_write(leaf); + return true; + } + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) { // mm error + wormleaf_unlock_write(leaf); + return false; + } + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + wormleaf_unlock_write(leaf); + return true; + } + + // split_insert changes hmap + // all locks should be released in wormhole_split_insert() + const bool rsi = wormhole_split_insert(ref, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} + + bool +whsafe_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_merge(ref, kref, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_merge(struct wormhole * const map, const struct kref * const kref, + kv_merge_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, kref); + // update + const u32 im = wormleaf_match_hs(leaf, kref); + if (im < WH_KPN) { // update + struct kv * const kv0 = wormleaf_kv_at_ih(leaf, im); + struct kv * const kv = uf(kv0, priv); + if ((kv == kv0) || (kv == NULL)) + return true; + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) + return false; + + struct kv * const old = wormleaf_update(leaf, im, new); + map->mm.free(old, map->mm.priv); + return true; + } + + struct kv * const kv = uf(NULL, priv); + if (kv == NULL) // nothing to be inserted + return true; + + struct kv * const new = map->mm.in(kv, map->mm.priv); + if (unlikely(new == NULL)) // mm error + return false; + + // insert + if (likely(leaf->nr_keys < WH_KPN)) { // just insert + wormleaf_insert(leaf, new); + return true; + } + + // split_insert changes hmap + const bool rsi = whunsafe_split_insert(map, leaf, new); + if (!rsi) + map->mm.free(new, map->mm.priv); + return rsi; +} +// }}} put + +// inplace {{{ + bool +wormhole_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_read(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { + uf(wormleaf_kv_at_ih(leaf, im), priv); + wormleaf_unlock_read(leaf); + return true; + } else { + uf(NULL, priv); + wormleaf_unlock_read(leaf); + return false; + } +} + + bool +wormhole_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { + uf(wormleaf_kv_at_ih(leaf, im), priv); + wormleaf_unlock_write(leaf); + return true; + } else { + uf(NULL, priv); + wormleaf_unlock_write(leaf); + return false; + } +} + + bool +whsafe_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_inpr(ref, key, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whsafe_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + wormhole_resume(ref); + const bool r = wormhole_inpw(ref, key, uf, priv); + wormhole_park(ref); + return r; +} + + bool +whunsafe_inp(struct wormhole * const map, const struct kref * const key, + kv_inp_func uf, void * const priv) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // overwrite + uf(wormleaf_kv_at_ih(leaf, im), priv); + return true; + } else { + uf(NULL, priv); + return false; + } +} +// }}} put + +// del {{{ + static void +wormhole_del_try_merge(struct wormref * const ref, struct wormleaf * const leaf) +{ + struct wormleaf * const next = leaf->next; + if (next && ((leaf->nr_keys == 0) || ((leaf->nr_keys + next->nr_keys) < WH_KPN_MRG))) { + // try merge, it may fail if size becomes larger after locking + wormleaf_lock_write(next, ref); + (void)wormhole_meta_leaf_merge(ref, leaf); + // locks are already released; immediately return + } else { + wormleaf_unlock_write(leaf); + } +} + + bool +wormhole_del(struct wormref * const ref, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf_write(ref, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // found + struct kv * const kv = wormleaf_remove_ih(leaf, im); + wormhole_del_try_merge(ref, leaf); + debug_assert(kv); + // free after releasing locks + struct wormhole * const map = ref->map; + map->mm.free(kv, map->mm.priv); + return true; + } else { + wormleaf_unlock_write(leaf); + return false; + } +} + + bool +whsafe_del(struct wormref * const ref, const struct kref * const key) +{ + wormhole_resume(ref); + const bool r = wormhole_del(ref, key); + wormhole_park(ref); + return r; +} + + static void +whunsafe_del_try_merge(struct wormhole * const map, struct wormleaf * const leaf) +{ + const u32 n0 = leaf->prev ? leaf->prev->nr_keys : WH_KPN; + const u32 n1 = leaf->nr_keys; + const u32 n2 = leaf->next ? leaf->next->nr_keys : WH_KPN; + + if ((leaf->prev && (n1 == 0)) || ((n0 + n1) < WH_KPN_MRG)) { + whunsafe_meta_leaf_merge(map, leaf->prev, leaf); + } else if ((leaf->next && (n1 == 0)) || ((n1 + n2) < WH_KPN_MRG)) { + whunsafe_meta_leaf_merge(map, leaf, leaf->next); + } +} + + bool +whunsafe_del(struct wormhole * const map, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(map->hmap, key); + const u32 im = wormleaf_match_hs(leaf, key); + if (im < WH_KPN) { // found + struct kv * const kv = wormleaf_remove_ih(leaf, im); + debug_assert(kv); + + whunsafe_del_try_merge(map, leaf); + map->mm.free(kv, map->mm.priv); + return true; + } + return false; +} + + u64 +wormhole_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end) +{ + struct wormleaf * const leafa = wormhole_jump_leaf_write(ref, start); + wormleaf_sync_sorted(leafa); + const u32 ia = wormleaf_seek(leafa, start); + const u32 iaz = end ? wormleaf_seek_end(leafa, end) : leafa->nr_keys; + if (iaz < ia) { // do nothing if end < start + wormleaf_unlock_write(leafa); + return 0; + } + u64 ndel = iaz - ia; + struct wormhole * const map = ref->map; + wormleaf_delete_range(map, leafa, ia, iaz); + if (leafa->nr_keys > ia) { // end hit; done + wormhole_del_try_merge(ref, leafa); + return ndel; + } + + while (leafa->next) { + struct wormleaf * const leafx = leafa->next; + wormleaf_lock_write(leafx, ref); + // two leaf nodes locked + wormleaf_sync_sorted(leafx); + const u32 iz = end ? wormleaf_seek_end(leafx, end) : leafx->nr_keys; + ndel += iz; + wormleaf_delete_range(map, leafx, 0, iz); + if (leafx->nr_keys == 0) { // removed all + // must hold leaf1's lock for the next iteration + wormhole_meta_merge(ref, leafa, leafx, false); + } else { // partially removed; done + (void)wormhole_meta_leaf_merge(ref, leafa); + return ndel; + } + } + wormleaf_unlock_write(leafa); + return ndel; +} + + u64 +whsafe_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end) +{ + wormhole_resume(ref); + const u64 ret = wormhole_delr(ref, start, end); + wormhole_park(ref); + return ret; +} + + u64 +whunsafe_delr(struct wormhole * const map, const struct kref * const start, + const struct kref * const end) +{ + // first leaf + struct wormhmap * const hmap = map->hmap; + struct wormleaf * const leafa = wormhole_jump_leaf(hmap, start); + wormleaf_sync_sorted(leafa); + // last leaf + struct wormleaf * const leafz = end ? wormhole_jump_leaf(hmap, end) : NULL; + + // select start/end on leafa + const u32 ia = wormleaf_seek(leafa, start); + const u32 iaz = end ? wormleaf_seek_end(leafa, end) : leafa->nr_keys; + if (iaz < ia) + return 0; + + wormleaf_delete_range(map, leafa, ia, iaz); + u64 ndel = iaz - ia; + + if (leafa == leafz) { // one node only + whunsafe_del_try_merge(map, leafa); + return ndel; + } + + // 0 or more nodes between leafa and leafz + while (leafa->next != leafz) { + struct wormleaf * const leafx = leafa->next; + ndel += leafx->nr_keys; + for (u32 i = 0; i < leafx->nr_keys; i++) + map->mm.free(wormleaf_kv_at_is(leafx, i), map->mm.priv); + leafx->nr_keys = 0; + leafx->nr_sorted = 0; + whunsafe_meta_leaf_merge(map, leafa, leafx); + } + // delete the smaller keys in leafz + if (leafz) { + wormleaf_sync_sorted(leafz); + const u32 iz = wormleaf_seek_end(leafz, end); + wormleaf_delete_range(map, leafz, 0, iz); + ndel += iz; + whunsafe_del_try_merge(map, leafa); + } + return ndel; +} +// }}} del + +// iter {{{ +// safe iter: safe sort with read-lock acquired +// unsafe iter: allow concurrent seek/skip + static void +wormhole_iter_leaf_sync_sorted(struct wormleaf * const leaf) +{ + if (unlikely(leaf->nr_keys != leaf->nr_sorted)) { + spinlock_lock(&(leaf->sortlock)); + wormleaf_sync_sorted(leaf); + spinlock_unlock(&(leaf->sortlock)); + } +} + + struct wormhole_iter * +wormhole_iter_create(struct wormref * const ref) +{ + struct wormhole_iter * const iter = malloc(sizeof(*iter)); + if (iter == NULL) + return NULL; + iter->ref = ref; + iter->map = ref->map; + iter->leaf = NULL; + iter->is = 0; + return iter; +} + + static void +wormhole_iter_fix(struct wormhole_iter * const iter) +{ + if (!wormhole_iter_valid(iter)) + return; + + while (unlikely(iter->is >= iter->leaf->nr_sorted)) { + struct wormleaf * const next = iter->leaf->next; + if (likely(next != NULL)) { + struct wormref * const ref = iter->ref; + wormleaf_lock_read(next, ref); + wormleaf_unlock_read(iter->leaf); + + wormhole_iter_leaf_sync_sorted(next); + } else { + wormleaf_unlock_read(iter->leaf); + } + iter->leaf = next; + iter->is = 0; + if (!wormhole_iter_valid(iter)) + return; + } +} + + void +wormhole_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + debug_assert(key); + if (iter->leaf) + wormleaf_unlock_read(iter->leaf); + + struct wormleaf * const leaf = wormhole_jump_leaf_read(iter->ref, key); + wormhole_iter_leaf_sync_sorted(leaf); + + iter->leaf = leaf; + iter->is = wormleaf_seek(leaf, key); + wormhole_iter_fix(iter); +} + + void +whsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + wormhole_resume(iter->ref); + wormhole_iter_seek(iter, key); +} + + bool +wormhole_iter_valid(struct wormhole_iter * const iter) +{ + return iter->leaf != NULL; +} + + static struct kv * +wormhole_iter_current(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + debug_assert(iter->is < iter->leaf->nr_sorted); + struct kv * const kv = wormleaf_kv_at_is(iter->leaf, iter->is); + return kv; + } + return NULL; +} + + struct kv * +wormhole_iter_peek(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + struct kv * const ret = iter->map->mm.out(kv, out); + return ret; + } + return NULL; +} + + bool +wormhole_iter_kref(struct wormhole_iter * const iter, struct kref * const kref) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + kref_ref_kv(kref, kv); + return true; + } + return false; +} + + bool +wormhole_iter_kvref(struct wormhole_iter * const iter, struct kvref * const kvref) +{ + struct kv * const kv = wormhole_iter_current(iter); + if (kv) { + kvref_ref_kv(kvref, kv); + return true; + } + return false; +} + + void +wormhole_iter_skip1(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + iter->is++; + wormhole_iter_fix(iter); + } +} + + void +wormhole_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + u32 todo = nr; + while (todo && wormhole_iter_valid(iter)) { + const u32 cap = iter->leaf->nr_sorted - iter->is; + const u32 nskip = (cap < todo) ? cap : todo; + iter->is += nskip; + wormhole_iter_fix(iter); + todo -= nskip; + } +} + + struct kv * +wormhole_iter_next(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const ret = wormhole_iter_peek(iter, out); + wormhole_iter_skip1(iter); + return ret; +} + + bool +wormhole_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv) +{ + struct kv * const kv = wormhole_iter_current(iter); + uf(kv, priv); // call uf even if (kv == NULL) + return kv != NULL; +} + + void +wormhole_iter_park(struct wormhole_iter * const iter) +{ + if (iter->leaf) { + wormleaf_unlock_read(iter->leaf); + iter->leaf = NULL; + } +} + + void +whsafe_iter_park(struct wormhole_iter * const iter) +{ + wormhole_iter_park(iter); + wormhole_park(iter->ref); +} + + void +wormhole_iter_destroy(struct wormhole_iter * const iter) +{ + if (iter->leaf) + wormleaf_unlock_read(iter->leaf); + free(iter); +} + + void +whsafe_iter_destroy(struct wormhole_iter * const iter) +{ + wormhole_park(iter->ref); + wormhole_iter_destroy(iter); +} +// }}} iter + +// unsafe iter {{{ + struct wormhole_iter * +whunsafe_iter_create(struct wormhole * const map) +{ + struct wormhole_iter * const iter = malloc(sizeof(*iter)); + if (iter == NULL) + return NULL; + iter->ref = NULL; + iter->map = map; + iter->leaf = NULL; + iter->is = 0; + whunsafe_iter_seek(iter, kref_null()); + return iter; +} + + static void +whunsafe_iter_fix(struct wormhole_iter * const iter) +{ + if (!wormhole_iter_valid(iter)) + return; + + while (unlikely(iter->is >= iter->leaf->nr_sorted)) { + struct wormleaf * const next = iter->leaf->next; + if (likely(next != NULL)) + wormhole_iter_leaf_sync_sorted(next); + iter->leaf = next; + iter->is = 0; + if (!wormhole_iter_valid(iter)) + return; + } +} + + void +whunsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key) +{ + struct wormleaf * const leaf = wormhole_jump_leaf(iter->map->hmap, key); + wormhole_iter_leaf_sync_sorted(leaf); + + iter->leaf = leaf; + iter->is = wormleaf_seek(leaf, key); + whunsafe_iter_fix(iter); +} + + void +whunsafe_iter_skip1(struct wormhole_iter * const iter) +{ + if (wormhole_iter_valid(iter)) { + iter->is++; + whunsafe_iter_fix(iter); + } +} + + void +whunsafe_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + u32 todo = nr; + while (todo && wormhole_iter_valid(iter)) { + const u32 cap = iter->leaf->nr_sorted - iter->is; + const u32 nskip = (cap < todo) ? cap : todo; + iter->is += nskip; + whunsafe_iter_fix(iter); + todo -= nskip; + } +} + + struct kv * +whunsafe_iter_next(struct wormhole_iter * const iter, struct kv * const out) +{ + struct kv * const ret = wormhole_iter_peek(iter, out); + whunsafe_iter_skip1(iter); + return ret; +} + + void +whunsafe_iter_destroy(struct wormhole_iter * const iter) +{ + free(iter); +} +// }}} unsafe iter + +// misc {{{ + struct wormref * +wormhole_ref(struct wormhole * const map) +{ + struct wormref * const ref = malloc(sizeof(*ref)); + if (ref == NULL) + return NULL; + ref->map = map; + if (qsbr_register(map->qsbr, &(ref->qref)) == false) { + free(ref); + return NULL; + } + return ref; +} + + struct wormref * +whsafe_ref(struct wormhole * const map) +{ + struct wormref * const ref = wormhole_ref(map); + if (ref) + wormhole_park(ref); + return ref; +} + + struct wormhole * +wormhole_unref(struct wormref * const ref) +{ + struct wormhole * const map = ref->map; + qsbr_unregister(map->qsbr, &(ref->qref)); + free(ref); + return map; +} + + inline void +wormhole_park(struct wormref * const ref) +{ + qsbr_park(&(ref->qref)); +} + + inline void +wormhole_resume(struct wormref * const ref) +{ + qsbr_resume(&(ref->qref)); +} + + inline void +wormhole_refresh_qstate(struct wormref * const ref) +{ + qsbr_update(&(ref->qref), wormhmap_version_load(wormhmap_load(ref->map))); +} + + static void +wormhole_clean_hmap(struct wormhole * const map) +{ + for (u32 x = 0; x < 2; x++) { + if (map->hmap2[x].pmap == NULL) + continue; + struct wormhmap * const hmap = &(map->hmap2[x]); + const u64 nr_slots = ((u64)(hmap->mask)) + 1; + struct wormmbkt * const pmap = hmap->pmap; + for (u64 s = 0; s < nr_slots; s++) { + struct wormmbkt * const slot = &(pmap[s]); + for (u32 i = 0; i < WH_BKT_NR; i++) + if (slot->e[i]) + wormmeta_keyref_release(slot->e[i]); + } + + slab_free_all(hmap->slab1); + slab_free_all(hmap->slab2); + memset(hmap->pmap, 0, hmap->msize); + hmap->maxplen = 0; + } +} + + static void +wormhole_free_leaf_keys(struct wormhole * const map, struct wormleaf * const leaf) +{ + const u32 nr = leaf->nr_keys; + for (u32 i = 0; i < nr; i++) { + void * const curr = wormleaf_kv_at_is(leaf, i); + debug_assert(curr); + map->mm.free(curr, map->mm.priv); + } + wormhole_free_akey(leaf->anchor); +} + + static void +wormhole_clean_helper(struct wormhole * const map) +{ + wormhole_clean_hmap(map); + for (struct wormleaf * leaf = map->leaf0; leaf; leaf = leaf->next) + wormhole_free_leaf_keys(map, leaf); + slab_free_all(map->slab_leaf); + map->leaf0 = NULL; +} + +// unsafe + void +wormhole_clean(struct wormhole * const map) +{ + wormhole_clean_helper(map); + wormhole_create_leaf0(map); +} + + void +wormhole_destroy(struct wormhole * const map) +{ + wormhole_clean_helper(map); + for (u32 i = 0; i < 2; i++) { + struct wormhmap * const hmap = &map->hmap2[i]; + if (hmap->slab1) + slab_destroy(hmap->slab1); + if (hmap->slab2) + slab_destroy(hmap->slab2); + wormhmap_deinit(hmap); + } + qsbr_destroy(map->qsbr); + slab_destroy(map->slab_leaf); + free(map->pbuf); + free(map); +} + + void +wormhole_fprint(struct wormhole * const map, FILE * const out) +{ + const u64 nr_slab_ul = slab_get_nalloc(map->slab_leaf); + const u64 nr_slab_um11 = slab_get_nalloc(map->hmap2[0].slab1); + const u64 nr_slab_um12 = slab_get_nalloc(map->hmap2[0].slab2); + const u64 nr_slab_um21 = map->hmap2[1].slab1 ? slab_get_nalloc(map->hmap2[1].slab1) : 0; + const u64 nr_slab_um22 = map->hmap2[1].slab2 ? slab_get_nalloc(map->hmap2[1].slab2) : 0; + fprintf(out, "%s L-SLAB %lu M-SLAB [0] %lu+%lu [1] %lu+%lu\n", + __func__, nr_slab_ul, nr_slab_um11, nr_slab_um12, nr_slab_um21, nr_slab_um22); +} +// }}} misc + +// api {{{ +const struct kvmap_api kvmap_api_wormhole = { + .hashkey = true, + .ordered = true, + .threadsafe = true, + .unique = true, + .refpark = true, + .put = (void *)wormhole_put, + .get = (void *)wormhole_get, + .probe = (void *)wormhole_probe, + .del = (void *)wormhole_del, + .inpr = (void *)wormhole_inpr, + .inpw = (void *)wormhole_inpw, + .merge = (void *)wormhole_merge, + .delr = (void *)wormhole_delr, + .iter_create = (void *)wormhole_iter_create, + .iter_seek = (void *)wormhole_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)wormhole_iter_skip1, + .iter_skip = (void *)wormhole_iter_skip, + .iter_next = (void *)wormhole_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_park = (void *)wormhole_iter_park, + .iter_destroy = (void *)wormhole_iter_destroy, + .ref = (void *)wormhole_ref, + .unref = (void *)wormhole_unref, + .park = (void *)wormhole_park, + .resume = (void *)wormhole_resume, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + +const struct kvmap_api kvmap_api_whsafe = { + .hashkey = true, + .ordered = true, + .threadsafe = true, + .unique = true, + .put = (void *)whsafe_put, + .get = (void *)whsafe_get, + .probe = (void *)whsafe_probe, + .del = (void *)whsafe_del, + .inpr = (void *)whsafe_inpr, + .inpw = (void *)whsafe_inpw, + .merge = (void *)whsafe_merge, + .delr = (void *)whsafe_delr, + .iter_create = (void *)wormhole_iter_create, + .iter_seek = (void *)whsafe_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)wormhole_iter_skip1, + .iter_skip = (void *)wormhole_iter_skip, + .iter_next = (void *)wormhole_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_park = (void *)whsafe_iter_park, + .iter_destroy = (void *)whsafe_iter_destroy, + .ref = (void *)whsafe_ref, + .unref = (void *)wormhole_unref, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + +const struct kvmap_api kvmap_api_whunsafe = { + .hashkey = true, + .ordered = true, + .unique = true, + .put = (void *)whunsafe_put, + .get = (void *)whunsafe_get, + .probe = (void *)whunsafe_probe, + .del = (void *)whunsafe_del, + .inpr = (void *)whunsafe_inp, + .inpw = (void *)whunsafe_inp, + .merge = (void *)whunsafe_merge, + .delr = (void *)whunsafe_delr, + .iter_create = (void *)whunsafe_iter_create, + .iter_seek = (void *)whunsafe_iter_seek, + .iter_valid = (void *)wormhole_iter_valid, + .iter_peek = (void *)wormhole_iter_peek, + .iter_kref = (void *)wormhole_iter_kref, + .iter_kvref = (void *)wormhole_iter_kvref, + .iter_skip1 = (void *)whunsafe_iter_skip1, + .iter_skip = (void *)whunsafe_iter_skip, + .iter_next = (void *)whunsafe_iter_next, + .iter_inp = (void *)wormhole_iter_inp, + .iter_destroy = (void *)whunsafe_iter_destroy, + .clean = (void *)wormhole_clean, + .destroy = (void *)wormhole_destroy, + .fprint = (void *)wormhole_fprint, +}; + + static void * +wormhole_kvmap_api_create(const char * const name, const struct kvmap_mm * const mm, char ** args) +{ + (void)args; + if ((!strcmp(name, "wormhole")) || (!strcmp(name, "whsafe"))) { + return wormhole_create(mm); + } else if (!strcmp(name, "whunsafe")) { + return whunsafe_create(mm); + } else { + return NULL; + } +} + +__attribute__((constructor)) + static void +wormhole_kvmap_api_init(void) +{ + kvmap_api_register(0, "wormhole", "", wormhole_kvmap_api_create, &kvmap_api_wormhole); + kvmap_api_register(0, "whsafe", "", wormhole_kvmap_api_create, &kvmap_api_whsafe); + kvmap_api_register(0, "whunsafe", "", wormhole_kvmap_api_create, &kvmap_api_whunsafe); +} +// }}} api + +// wh {{{ +// Users often don't enjoy dealing with struct kv/kref and just want to use plain buffers. +// No problem! +// This example library shows you how to use Wormhole efficiently in the most intuitive way. + +// Use the worry-free api +static const struct kvmap_api * const wh_api = &kvmap_api_whsafe; + +// You can change the wh_api to kvmap_api_wormhole with a one-line replacement +// The standard Wormhole api can give you ~5% boost; read README for thread-safety tips +//static const struct kvmap_api * const wh_api = &kvmap_api_wormhole; + + struct wormhole * +wh_create(void) +{ + // kvmap_mm_ndf (kv.h) will let the caller allocate the kv when inserting + // This can avoid a memcpy if the caller does not have the data in a struct kv + return wormhole_create(&kvmap_mm_ndf); +} + + struct wormref * +wh_ref(struct wormhole * const wh) +{ + return wh_api->ref(wh); +} + + void +wh_unref(struct wormref * const ref) +{ + (void)wh_api->unref(ref); +} + + void +wh_park(struct wormref * const ref) +{ + if (wh_api->park) + wh_api->park(ref); +} + + void +wh_resume(struct wormref * const ref) +{ + if (wh_api->resume) + wh_api->resume(ref); +} + + void +wh_clean(struct wormhole * const map) +{ + wh_api->clean(map); +} + + void +wh_destroy(struct wormhole * const map) +{ + wh_api->destroy(map); +} + +// Do set/put with explicit kv buffers + bool +wh_put(struct wormref * const ref, const void * const kbuf, const u32 klen, + const void * const vbuf, const u32 vlen) +{ + struct kv * const newkv = kv_create(kbuf, klen, vbuf, vlen); + if (newkv == NULL) + return false; + // must use with kvmap_mm_ndf (see below) + // the newkv will be saved in the Wormhole and freed by Wormhole when upon deletion + return wh_api->put(ref, newkv); +} + +// delete a key + bool +wh_del(struct wormref * const ref, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->del(ref, &kref); +} + +// test if the key exist in Wormhole + bool +wh_probe(struct wormref * const ref, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->probe(ref, &kref); +} + +// for wh_get() +struct wh_inp_info { void * vbuf_out; u32 * vlen_out; u32 vbuf_size; }; + +// a kv_inp_func; use this to retrieve the KV's data without unnecesary memory copying + static void +wh_inp_copy_value(struct kv * const curr, void * const priv) +{ + if (curr) { // found + struct wh_inp_info * const info = (typeof(info))priv; + // copy the value data out + const u32 copy_size = info->vbuf_size < curr->vlen ? info->vbuf_size : curr->vlen; + memcpy(info->vbuf_out, kv_vptr_c(curr), copy_size); + // copy the vlen out + *info->vlen_out = curr->vlen; + } +} + +// returns a boolean value indicating whether the key is found. +// the value's data will be written to *vlen_out and vbuf_out if the key is found +// if vbuf_size < vlen, then only the first vbuf_size bytes is copied to the buffer +// a small vbuf_size can be used to reduce memcpy cost when only the first a few bytes are needed + bool +wh_get(struct wormref * const ref, const void * const kbuf, const u32 klen, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + struct wh_inp_info info = {vbuf_out, vlen_out, vbuf_size}; + // use the inplace read function to get the value if it exists + return wh_api->inpr(ref, &kref, wh_inp_copy_value, &info); +} + + bool +wh_inpr(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->inpr(ref, &kref, uf, priv); +} + +// inplace update KV's value with a user-defined hook function +// the update should only modify the data in the value; It should not change the value size + bool +wh_inpw(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->inpw(ref, &kref, uf, priv); +} + +// merge existing KV with updates with a user-defined hook function + bool +wh_merge(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_merge_func uf, void * const priv) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + return wh_api->merge(ref, &kref, uf, priv); +} + +// remove a range of KVs from start (inclusive) to end (exclusive); [start, end) + u64 +wh_delr(struct wormref * const ref, const void * const kbuf_start, const u32 klen_start, + const void * const kbuf_end, const u32 klen_end) +{ + struct kref kref_start, kref_end; + kref_ref_hash32(&kref_start, kbuf_start, klen_start); + kref_ref_hash32(&kref_end, kbuf_end, klen_end); + return wh_api->delr(ref, &kref_start, &kref_end); +} + + struct wormhole_iter * +wh_iter_create(struct wormref * const ref) +{ + return wh_api->iter_create(ref); +} + + void +wh_iter_seek(struct wormhole_iter * const iter, const void * const kbuf, const u32 klen) +{ + struct kref kref; + kref_ref_hash32(&kref, kbuf, klen); + wh_api->iter_seek(iter, &kref); +} + + bool +wh_iter_valid(struct wormhole_iter * const iter) +{ + return wh_api->iter_valid(iter); +} + +// for wh_iter_peek() +// the out ptrs must be provided in pairs; use a pair of NULLs to ignore the key or value +struct wh_iter_inp_info { void * kbuf_out; void * vbuf_out; u32 kbuf_size; u32 vbuf_size; u32 * klen_out; u32 * vlen_out; }; + +// a kv_inp_func; use this to retrieve the KV's data without unnecesary memory copying + static void +inp_copy_kv_cb(struct kv * const curr, void * const priv) +{ + if (curr) { // found + struct wh_iter_inp_info * const info = (typeof(info))priv; + + // copy the key + if (info->kbuf_out) { // it assumes klen_out is also not NULL + // copy the key data out + const u32 clen = curr->klen < info->kbuf_size ? curr->klen : info->kbuf_size; + memcpy(info->kbuf_out, kv_kptr_c(curr), clen); + // copy the klen out + *info->klen_out = curr->klen; + } + + // copy the value + if (info->vbuf_out) { // it assumes vlen_out is also not NULL + // copy the value data out + const u32 clen = curr->vlen < info->vbuf_size ? curr->vlen : info->vbuf_size; + memcpy(info->vbuf_out, kv_vptr_c(curr), clen); + // copy the vlen out + *info->vlen_out = curr->vlen; + } + } +} + +// seek is similar to get + bool +wh_iter_peek(struct wormhole_iter * const iter, + void * const kbuf_out, const u32 kbuf_size, u32 * const klen_out, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out) +{ + struct wh_iter_inp_info info = {kbuf_out, vbuf_out, kbuf_size, vbuf_size, klen_out, vlen_out}; + return wh_api->iter_inp(iter, inp_copy_kv_cb, &info); +} + + void +wh_iter_skip1(struct wormhole_iter * const iter) +{ + wh_api->iter_skip1(iter); +} + + void +wh_iter_skip(struct wormhole_iter * const iter, const u32 nr) +{ + wh_api->iter_skip(iter, nr); +} + + bool +wh_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv) +{ + return wh_api->iter_inp(iter, uf, priv); +} + + void +wh_iter_park(struct wormhole_iter * const iter) +{ + wh_api->iter_park(iter); +} + + void +wh_iter_destroy(struct wormhole_iter * const iter) +{ + wh_api->iter_destroy(iter); +} +// }}} wh + +// vim:fdm=marker diff --git a/test/MassTrie-beta/wormhole/wh.h b/test/MassTrie-beta/wormhole/wh.h new file mode 100644 index 00000000..bd17b38d --- /dev/null +++ b/test/MassTrie-beta/wormhole/wh.h @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2016--2021 Wu, Xingbo + * + * All rights reserved. No warranty, explicit or implicit, provided. + */ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct wormhole; +struct wormref; + +// wormhole {{{ +// the wh created by wormhole_create() can work with all of safe/unsafe operations. + extern struct wormhole * +wormhole_create(const struct kvmap_mm * const mm); + +// the wh created by whunsafe_create() can only work with the unsafe operations. + extern struct wormhole * +whunsafe_create(const struct kvmap_mm * const mm); + + extern struct kv * +wormhole_get(struct wormref * const ref, const struct kref * const key, struct kv * const out); + + extern bool +wormhole_probe(struct wormref * const ref, const struct kref * const key); + + extern bool +wormhole_put(struct wormref * const ref, struct kv * const kv); + + extern bool +wormhole_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +wormhole_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +wormhole_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +wormhole_del(struct wormref * const ref, const struct kref * const key); + + extern u64 +wormhole_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end); + + extern struct wormhole_iter * +wormhole_iter_create(struct wormref * const ref); + + extern void +wormhole_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + + extern bool +wormhole_iter_valid(struct wormhole_iter * const iter); + + extern struct kv * +wormhole_iter_peek(struct wormhole_iter * const iter, struct kv * const out); + + extern bool +wormhole_iter_kref(struct wormhole_iter * const iter, struct kref * const kref); + + extern bool +wormhole_iter_kvref(struct wormhole_iter * const iter, struct kvref * const kvref); + + extern void +wormhole_iter_skip1(struct wormhole_iter * const iter); + + extern void +wormhole_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern struct kv * +wormhole_iter_next(struct wormhole_iter * const iter, struct kv * const out); + + extern bool +wormhole_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv); + + extern void +wormhole_iter_park(struct wormhole_iter * const iter); + + extern void +wormhole_iter_destroy(struct wormhole_iter * const iter); + + extern struct wormref * +wormhole_ref(struct wormhole * const map); + + extern struct wormhole * +wormhole_unref(struct wormref * const ref); + + extern void +wormhole_park(struct wormref * const ref); + + extern void +wormhole_resume(struct wormref * const ref); + + extern void +wormhole_refresh_qstate(struct wormref * const ref); + +// clean with more threads + extern void +wormhole_clean_th(struct wormhole * const map, const u32 nr_threads); + + extern void +wormhole_clean(struct wormhole * const map); + + extern void +wormhole_destroy(struct wormhole * const map); + +// safe API (no need to refresh qstate) + + extern struct kv * +whsafe_get(struct wormref * const ref, const struct kref * const key, struct kv * const out); + + extern bool +whsafe_probe(struct wormref * const ref, const struct kref * const key); + + extern bool +whsafe_put(struct wormref * const ref, struct kv * const kv); + + extern bool +whsafe_merge(struct wormref * const ref, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +whsafe_inpr(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whsafe_inpw(struct wormref * const ref, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whsafe_del(struct wormref * const ref, const struct kref * const key); + + extern u64 +whsafe_delr(struct wormref * const ref, const struct kref * const start, + const struct kref * const end); + +// use wormhole_iter_create + extern void +whsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + + extern struct kv * +whsafe_iter_peek(struct wormhole_iter * const iter, struct kv * const out); + +// use wormhole_iter_valid +// use wormhole_iter_peek +// use wormhole_iter_kref +// use wormhole_iter_kvref +// use wormhole_iter_skip1 +// use wormhole_iter_skip +// use wormhole_iter_next +// use wormhole_iter_inp + + extern void +whsafe_iter_park(struct wormhole_iter * const iter); + + extern void +whsafe_iter_destroy(struct wormhole_iter * const iter); + + extern struct wormref * +whsafe_ref(struct wormhole * const map); + +// use wormhole_unref + +// unsafe API + + extern struct kv * +whunsafe_get(struct wormhole * const map, const struct kref * const key, struct kv * const out); + + extern bool +whunsafe_probe(struct wormhole * const map, const struct kref * const key); + + extern bool +whunsafe_put(struct wormhole * const map, struct kv * const kv); + + extern bool +whunsafe_merge(struct wormhole * const map, const struct kref * const kref, + kv_merge_func uf, void * const priv); + + extern bool +whunsafe_inp(struct wormhole * const map, const struct kref * const key, + kv_inp_func uf, void * const priv); + + extern bool +whunsafe_del(struct wormhole * const map, const struct kref * const key); + + extern u64 +whunsafe_delr(struct wormhole * const map, const struct kref * const start, + const struct kref * const end); + + extern struct wormhole_iter * +whunsafe_iter_create(struct wormhole * const map); + + extern void +whunsafe_iter_seek(struct wormhole_iter * const iter, const struct kref * const key); + +// unsafe iter_valid: use wormhole_iter_valid +// unsafe iter_peek: use wormhole_iter_peek +// unsafe iter_kref: use wormhole_iter_kref + + extern void +whunsafe_iter_skip1(struct wormhole_iter * const iter); + + extern void +whunsafe_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern struct kv * +whunsafe_iter_next(struct wormhole_iter * const iter, struct kv * const out); + +// unsafe iter_inp: use wormhole_iter_inp + + extern void +whunsafe_iter_destroy(struct wormhole_iter * const iter); + + extern void +wormhole_fprint(struct wormhole * const map, FILE * const out); + +extern const struct kvmap_api kvmap_api_wormhole; +extern const struct kvmap_api kvmap_api_whsafe; +extern const struct kvmap_api kvmap_api_whunsafe; +// }}} wormhole + +// wh {{{ + extern struct wormhole * +wh_create(void); + + extern struct wormref * +wh_ref(struct wormhole * const wh); + + extern void +wh_unref(struct wormref * const ref); + + extern void +wh_park(struct wormref * const ref); + + extern void +wh_resume(struct wormref * const ref); + + extern void +wh_clean(struct wormhole * const map); + + extern void +wh_destroy(struct wormhole * const map); + + extern bool +wh_put(struct wormref * const ref, const void * const kbuf, const u32 klen, + const void * const vbuf, const u32 vlen); + + extern bool +wh_del(struct wormref * const ref, const void * const kbuf, const u32 klen); + + extern bool +wh_probe(struct wormref * const ref, const void * const kbuf, const u32 klen); + + extern bool +wh_get(struct wormref * const ref, const void * const kbuf, const u32 klen, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out); + + extern bool +wh_inpr(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv); + + extern bool +wh_inpw(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_inp_func uf, void * const priv); + + extern bool +wh_merge(struct wormref * const ref, const void * const kbuf, const u32 klen, + kv_merge_func uf, void * const priv); + + extern u64 +wh_delr(struct wormref * const ref, const void * const kbuf_start, const u32 klen_start, + const void * const kbuf_end, const u32 klen_end); + + extern struct wormhole_iter * +wh_iter_create(struct wormref * const ref); + + extern void +wh_iter_seek(struct wormhole_iter * const iter, const void * const kbuf, const u32 klen); + + extern bool +wh_iter_valid(struct wormhole_iter * const iter); + + extern bool +wh_iter_peek(struct wormhole_iter * const iter, + void * const kbuf_out, const u32 kbuf_size, u32 * const klen_out, + void * const vbuf_out, const u32 vbuf_size, u32 * const vlen_out); + + extern void +wh_iter_skip1(struct wormhole_iter * const iter); + + extern void +wh_iter_skip(struct wormhole_iter * const iter, const u32 nr); + + extern bool +wh_iter_inp(struct wormhole_iter * const iter, kv_inp_func uf, void * const priv); + + extern void +wh_iter_park(struct wormhole_iter * const iter); + + extern void +wh_iter_destroy(struct wormhole_iter * const iter); +// }}} wh + +#ifdef __cplusplus +} +#endif +// vim:fdm=marker diff --git a/test/MassTrie-beta/wormhole/wh.py b/test/MassTrie-beta/wormhole/wh.py new file mode 100644 index 00000000..e744cec8 --- /dev/null +++ b/test/MassTrie-beta/wormhole/wh.py @@ -0,0 +1,192 @@ +#!/usr/bin/python3 + +# +# Copyright (c) 2016--2021 Wu, Xingbo +# +# All rights reserved. No warranty, explicit or implicit, provided. +# + +import msgpack +from ctypes import * # CDLL and c_xxx types + +# libwh {{{ +# Change this path when necessary +libwh = CDLL("./libwh.so") + +# create +libwh.wh_create.argtypes = [] +libwh.wh_create.restype = c_void_p + +# close (no return value) +libwh.wh_destroy.argtypes = [c_void_p] + +# ref +libwh.wh_ref.argtypes = [c_void_p] +libwh.wh_ref.restype = c_void_p + +# unref +libwh.wh_unref.argtypes = [c_void_p] + +# put +libwh.wh_put.argtypes = [c_void_p, c_char_p, c_uint, c_char_p, c_uint] +libwh.wh_put.restype = c_bool + +# get +libwh.wh_get.argtypes = [c_void_p, c_char_p, c_uint, c_char_p, c_uint, c_void_p] +libwh.wh_get.restype = c_bool + +# probe +libwh.wh_probe.argtypes = [c_void_p, c_char_p, c_uint] +libwh.wh_probe.restype = c_bool + +# del +libwh.wh_del.argtypes = [c_void_p, c_char_p, c_uint] +libwh.wh_del.restype = c_bool + +# iter_create +libwh.wh_iter_create.argtypes = [c_void_p] +libwh.wh_iter_create.restype = c_void_p + +# iter_seek +libwh.wh_iter_seek.argtypes = [c_void_p, c_char_p, c_uint] + +# iter_valid +libwh.wh_iter_valid.argtypes = [c_void_p] +libwh.wh_iter_valid.restype = c_bool + +# iter_skip1 +libwh.wh_iter_skip1.argtypes = [c_void_p] + +# iter_skip +libwh.wh_iter_skip.argtypes = [c_void_p, c_uint] + +# iter_peek +libwh.wh_iter_peek.argtypes = [c_void_p, c_char_p, c_uint, c_void_p, c_char_p, c_uint, c_void_p] +libwh.wh_iter_peek.restype = c_bool + +# iter_park +libwh.wh_iter_park.argtypes = [c_void_p] + +# iter_destroy +libwh.wh_iter_destroy.argtypes = [c_void_p] +# }}} libwh + +# class {{{ +class Wh: + def __init__(self, maxklen=256, maxvlen=8192): + self.whptr = libwh.wh_create() + self.kbufsz = maxklen + self.vbufsz = maxvlen + + # user must call explicitly + def destroy(self): + libwh.wh_destroy(self.whptr) + + def ref(self): + return WhRef(self.whptr, self.kbufsz, self.vbufsz) + +class WhRef: + def __init__(self, whptr, kbufsz, vbufsz): + self.refptr = libwh.wh_ref(whptr) + self.kbufsz = kbufsz + self.vbufsz = vbufsz + self.vbuf = create_string_buffer(self.vbufsz) + + # user must call explicitly + def unref(self): + libwh.wh_unref(self.refptr) + + def iter(self): + return WhIter(self.refptr, self.kbufsz, self.vbufsz) + + # key: python string; value: any (hierarchical) python object + def put(self, key, value): + binkey = key.encode() + binvalue = msgpack.packb(value) + return libwh.wh_put(self.refptr, binkey, c_uint(len(binkey)), binvalue, c_uint(len(binvalue))) + + # return the value as a python object + def get(self, key): + binkey = key.encode() + vlen = c_uint() + ret = libwh.wh_get(self.refptr, binkey, len(binkey), self.vbuf, self.vbufsz, byref(vlen)) + if ret and vlen.value <= self.vbufsz: + return msgpack.unpackb(self.vbuf.value) + else: + return None + + def delete(self, key): + binkey = key.encode() + return libwh.wh_del(self.refptr, binkey, c_uint(len(binkey))) + + def probe(self, key): + binkey = key.encode() + return libwh.wh_probe(self.refptr, binkey, c_uint(len(binkey))) + +class WhIter: + def __init__(self, refptr, kbufsz, vbufsz): + self.iptr = libwh.wh_iter_create(refptr) + self.kbufsz = kbufsz + self.vbufsz = vbufsz + self.kbuf = create_string_buffer(kbufsz) + self.vbuf = create_string_buffer(vbufsz) + + # user must call explicitly + def destroy(self): + libwh.wh_iter_destroy(self.iptr) + + def seek(self, key): + if key is None: + libwh.wh_iter_seek(self.iptr, None, c_uint(0)) + else: + binkey = key.encode() + libwh.wh_iter_seek(self.iptr, binkey, c_uint(len(binkey))) + + def valid(self): + return libwh.wh_iter_valid(self.iptr) + + def skip1(self): + libwh.wh_iter_skip1(self.iptr) + + def skip(self, nr): + libwh.wh_iter_skip(self.iptr, c_uint(nr)) + + # return (key, value) pair or None + def peek(self): + klen = c_uint() + vlen = c_uint() + ret = libwh.wh_iter_peek(self.iptr, self.kbuf, self.kbufsz, byref(klen), self.vbuf, self.vbufsz, byref(vlen)) + if ret and klen.value <= self.kbufsz and vlen.value <= self.vbufsz: + self.kbuf[klen.value] = b'\x00' + return (self.kbuf.value.decode(), klen.value, msgpack.unpackb(self.vbuf.value), vlen.value) + else: + return None + +# }}} class + +# examples +wh1 = Wh(32, 1024) +ref1 = wh1.ref() # take a ref for kv operations + +ref1.put("Hello", "pywh") +ref1.put("key1", "value1") +ref1.put("key2", "value2") +ref1.put("key3", {"xxx":"valuex", "yyy":"valuey"}) +ref1.delete("key2") + +rget = ref1.get("Hello") +print(rget) + +# don't use ref when iterating +iter1 = ref1.iter() +iter1.seek(None) +while iter1.valid(): + r = iter1.peek() + print(r) + iter1.skip1() + +iter1.destroy() # must destroy all iters before unref +ref1.unref() # must unref all refs before close() +wh1.destroy() + +# vim:fdm=marker diff --git a/test/MassTrie-beta/wormhole/wh.strip b/test/MassTrie-beta/wormhole/wh.strip new file mode 100644 index 00000000..e7b3971f --- /dev/null +++ b/test/MassTrie-beta/wormhole/wh.strip @@ -0,0 +1,161 @@ +-K key_size +-K key_size_align +-K kref_compare +-K kref_kv_compare +-K kref_kv_match +-K kref_lcp +-K kref_match +-K kref_null +-K kref_ref_hash32 +-K kref_ref_kv +-K kref_ref_kv_hash32 +-K kref_ref_raw +-K kref_update_hash32 +-K kv_compare +-K kv_compare_ptrs +-K kv_crc32c +-K kv_crc32c_extend +-K kv_create +-K kv_create_kref +-K kv_create_str +-K kv_create_str_str +-K kv_dup +-K kv_dup2 +-K kv_dup2_key +-K kv_dup2_key_prefix +-K kv_dup_key +-K kv_key_lcp +-K kv_kptr +-K kv_kptr_c +-K kv_kref +-K kvmap_api_whsafe +-K kvmap_api_whunsafe +-K kvmap_api_wormhole +-K kvmap_dump_keys +-K kvmap_inp_steal_kv +-K kvmap_kv_del +-K kvmap_kv_delr +-K kvmap_kv_get +-K kvmap_kv_inpr +-K kvmap_kv_inpw +-K kvmap_kv_iter_seek +-K kvmap_kv_merge +-K kvmap_kv_probe +-K kvmap_kv_put +-K kvmap_mm_dup +-K kvmap_mm_free_free +-K kvmap_mm_free_noop +-K kvmap_mm_in_dup +-K kvmap_mm_in_noop +-K kvmap_mm_ndf +-K kvmap_mm_out_dup +-K kvmap_mm_out_noop +-K kvmap_raw_del +-K kvmap_raw_get +-K kvmap_raw_inpr +-K kvmap_raw_inpw +-K kvmap_raw_iter_seek +-K kvmap_raw_probe +-K kvmap_ref +-K kvmap_unref +-K kv_match +-K kv_match_full +-K kv_null +-K kv_print +-K kv_qsort +-K kvref_dup2_key +-K kvref_dup2_kv +-K kv_refill +-K kv_refill_hex32 +-K kv_refill_hex64 +-K kv_refill_hex64_klen +-K kv_refill_kref +-K kv_refill_kref_v +-K kv_refill_str +-K kv_refill_str_str +-K kv_refill_u64 +-K kv_refill_value +-K kvref_kv_compare +-K kvref_ref_kv +-K kv_size +-K kv_size_align +-K kv_update_hash +-K kv_vptr +-K kv_vptr_c +-K wh_clean +-K wh_create +-K wh_del +-K wh_delr +-K wh_destroy +-K wh_get +-K wh_inpr +-K wh_inpw +-K wh_iter_create +-K wh_iter_destroy +-K wh_iter_inp +-K wh_iter_park +-K wh_iter_peek +-K wh_iter_seek +-K wh_iter_skip +-K wh_iter_valid +-K wh_merge +-K wh_park +-K wh_probe +-K wh_ref +-K wh_resume +-K whsafe_del +-K whsafe_delr +-K whsafe_get +-K whsafe_inpr +-K whsafe_inpw +-K whsafe_iter_destroy +-K whsafe_iter_park +-K whsafe_iter_seek +-K whsafe_merge +-K whsafe_probe +-K whsafe_ref +-K whsafe_put +-K wh_put +-K wh_unref +-K whunsafe_create +-K whunsafe_del +-K whunsafe_delr +-K whunsafe_get +-K whunsafe_inp +-K whunsafe_iter_create +-K whunsafe_iter_destroy +-K whunsafe_iter_next +-K whunsafe_iter_seek +-K whunsafe_iter_skip +-K whunsafe_merge +-K whunsafe_probe +-K whunsafe_put +-K wormhole_clean +-K wormhole_create +-K wormhole_del +-K wormhole_delr +-K wormhole_destroy +-K wormhole_fprint +-K wormhole_get +-K wormhole_inpr +-K wormhole_inpw +-K wormhole_iter_create +-K wormhole_iter_destroy +-K wormhole_iter_inp +-K wormhole_iter_kref +-K wormhole_iter_kvref +-K wormhole_iter_next +-K wormhole_iter_park +-K wormhole_iter_peek +-K wormhole_iter_seek +-K wormhole_iter_skip +-K wormhole_iter_valid +-K wormhole_kvmap_api_create +-K wormhole_merge +-K wormhole_park +-K wormhole_probe +-K wormhole_ref +-K wormhole_refresh_qstate +-K wormhole_resume +-K wormhole_put +-K wormhole_unref diff --git a/test/unit-dboindex.cc b/test/unit-dboindex.cc index 86f5cf6b..f5ddd1dc 100644 --- a/test/unit-dboindex.cc +++ b/test/unit-dboindex.cc @@ -1,781 +1,1671 @@ #include "DB_index.hh" + #include "DB_structs.hh" + #include "DB_params.hh" +#include + + + struct coarse_grained_row { + enum class NamedColumn : int { aa = 0, bb, cc }; + + uint64_t aa; + uint64_t bb; + uint64_t cc; + + coarse_grained_row() : aa(), bb(), cc() {} + + coarse_grained_row(uint64_t a, uint64_t b, uint64_t c) + : aa(a), bb(b), cc(c) {} + }; + + struct key_type { + uint64_t id; + + explicit key_type(uint64_t key) : id(bench::bswap(key)) {} + operator lcdf::Str() const { + return lcdf::Str((const char *)this, sizeof(*this)); + } + }; + + // using example_row from VersionSelector.hh + + namespace bench { + + template <> + struct SplitParams { + using split_type_list = std::tuple; + using layout_type = typename SplitMvObjectBuilder::type; + static constexpr size_t num_splits = std::tuple_size::value; + + static constexpr auto split_builder = std::make_tuple( + [](const coarse_grained_row& in) -> coarse_grained_row { + coarse_grained_row out; + out.aa = in.aa; + out.bb = in.bb; + out.cc = in.cc; + return out; + } + ); + + static constexpr auto split_merger = std::make_tuple( + [](coarse_grained_row* out, const coarse_grained_row& in) -> void { + out->aa = in.aa; + out->bb = in.bb; + out->cc = in.cc; + } + ); + + static constexpr auto map = [](int col_n) -> int { + (void)col_n; + return 0; + }; + }; + + template + class RecordAccessor { + public: + const uint64_t& aa() const { + return impl().aa_impl(); + } + + const uint64_t& bb() const { + return impl().bb_impl(); + } + + const uint64_t& cc() const { + return impl().cc_impl(); + } + + void copy_into(coarse_grained_row* dst) const { + return impl().copy_into_impl(dst); + } + + private: + const A& impl() const { + return *static_cast(this); + } + }; + + template <> + class UniRecordAccessor : public RecordAccessor, coarse_grained_row> { + public: + UniRecordAccessor(const coarse_grained_row* const vptr) : vptr_(vptr) {} + + private: + const uint64_t& aa_impl() const { + return vptr_->aa; + } + + const uint64_t& bb_impl() const { + return vptr_->bb; + } + + const uint64_t& cc_impl() const { + return vptr_->cc; + } + + void copy_into_impl(coarse_grained_row* dst) const { + if (vptr_) { + dst->aa = vptr_->aa; + dst->bb = vptr_->bb; + dst->cc = vptr_->cc; + } + } + + const coarse_grained_row* vptr_; + friend RecordAccessor, coarse_grained_row>; + }; + + template <> + class SplitRecordAccessor : public RecordAccessor, coarse_grained_row> { + public: + static constexpr size_t num_splits = SplitParams::num_splits; + + SplitRecordAccessor(const std::array& vptrs) + : vptr_0_(reinterpret_cast(vptrs[0])) {} + + private: + const uint64_t& aa_impl() const { + return vptr_0_->aa; + } + + const uint64_t& bb_impl() const { + return vptr_0_->bb; + } + + const uint64_t& cc_impl() const { + return vptr_0_->cc; + } + + void copy_into_impl(coarse_grained_row* dst) const { + if (vptr_0_) { + dst->aa = vptr_0_->aa; + dst->bb = vptr_0_->bb; + dst->cc = vptr_0_->cc; + } + } + + const coarse_grained_row* vptr_0_; + + friend RecordAccessor, coarse_grained_row>; + }; + + template <> + struct SplitParams { + using split_type_list = std::tuple; + using layout_type = typename SplitMvObjectBuilder::type; + static constexpr size_t num_splits = std::tuple_size::value; + + static constexpr auto split_builder = std::make_tuple( + [](const example_row& in) -> example_row { + example_row out; + out.d_ytd = in.d_ytd; + out.d_payment_cnt = in.d_payment_cnt; + out.d_date = in.d_date; + out.d_tax = in.d_tax; + out.d_next_oid = in.d_next_oid; + return out; + } + ); + + static constexpr auto split_merger = std::make_tuple( + [](example_row* out, const example_row& in) -> void { + out->d_ytd = in.d_ytd; + out->d_payment_cnt = in.d_payment_cnt; + out->d_date = in.d_date; + out->d_tax = in.d_tax; + out->d_next_oid = in.d_next_oid; + } + ); + + static constexpr auto map = [](int col_n) -> int { + (void)col_n; + return 0; + }; + }; + + template + class RecordAccessor { + public: + const uint32_t& d_ytd() const { + return impl().d_ytd_impl(); + } + + const uint32_t& d_payment_cnt() const { + return impl().d_payment_cnt_impl(); + } + + const uint32_t& d_date() const { + return impl().d_date_impl(); + } + + const uint32_t& d_tax() const { + return impl().d_tax_impl(); + } + + const uint32_t& d_next_oid() const { + return impl().d_next_oid_impl(); + } + + void copy_into(example_row* dst) const { + return impl().copy_into_impl(dst); + } + + private: + const A& impl() const { + return *static_cast(this); + } + }; + + template <> + class UniRecordAccessor : public RecordAccessor, example_row> { + public: + UniRecordAccessor(const example_row* const vptr) : vptr_(vptr) {} + + private: + const uint32_t& d_ytd_impl() const { + return vptr_->d_ytd; + } + + const uint32_t& d_payment_cnt_impl() const { + return vptr_->d_payment_cnt; + } + + const uint32_t& d_date_impl() const { + return vptr_->d_date; + } + + const uint32_t& d_tax_impl() const { + return vptr_->d_tax; + } + + const uint32_t& d_next_oid_impl() const { + return vptr_->d_next_oid; + } + + void copy_into_impl(example_row* dst) const { + if (vptr_) { + dst->d_ytd = vptr_->d_ytd; + dst->d_payment_cnt = vptr_->d_payment_cnt; + dst->d_date = vptr_->d_date; + dst->d_tax = vptr_->d_tax; + dst->d_next_oid = vptr_->d_next_oid; + } + } + + const example_row* vptr_; + friend RecordAccessor, example_row>; + }; -template <> + + +template <> + class SplitRecordAccessor : public RecordAccessor, example_row> { + public: + static constexpr size_t num_splits = SplitParams::num_splits; + + SplitRecordAccessor(const std::array& vptrs) + : vptr_0_(reinterpret_cast(vptrs[0])) {} + + private: + const uint32_t& d_ytd_impl() const { + return vptr_0_->d_ytd; + } + + const uint32_t& d_payment_cnt_impl() const { + return vptr_0_->d_payment_cnt; + } + + const uint32_t& d_date_impl() const { + return vptr_0_->d_date; + } + + const uint32_t& d_tax_impl() const { + return vptr_0_->d_tax; + } + + const uint32_t& d_next_oid_impl() const { + return vptr_0_->d_next_oid; + } + + void copy_into_impl(example_row* dst) const { + if (vptr_0_) { + dst->d_ytd = vptr_0_->d_ytd; + dst->d_payment_cnt = vptr_0_->d_payment_cnt; + dst->d_date = vptr_0_->d_date; + dst->d_tax = vptr_0_->d_tax; + dst->d_next_oid = vptr_0_->d_next_oid; + } + } + + const example_row* vptr_0_; + + friend RecordAccessor, example_row>; + }; + + }; // namespace bench + + using CoarseIndex = bench::ordered_index; + using FineIndex = bench::ordered_index; + using access_t = bench::access_t; + using RowAccess = bench::RowAccess; + + using MVIndex = bench::mvcc_ordered_index; + + template + void init_cindex(IndexType& ci) { + for (uint64_t i = 1; i <= 10; ++i) + ci.nontrans_put(key_type(i), coarse_grained_row(i, i, i)); + } + + void init_findex(FineIndex& fi) { + example_row row; + row.d_ytd = 3000; + row.d_tax = 10; + row.d_date = 200; + row.d_next_oid = 100; + row.d_payment_cnt = 50; + + for (uint64_t i = 1; i <= 10; ++i) + fi.nontrans_put(key_type(i), row); + } + + void test_coarse_basic() { + typedef CoarseIndex::NamedColumn nc; + CoarseIndex ci; + ci.thread_init(); + + init_cindex(ci); + + { + TestTransaction t(0); + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + (void) row; + assert(success && found); + assert(value.aa() == 1); + assert(t.try_commit()); + } + + { + TestTransaction t(0); + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::update}}); + (void) row; + assert(success && found); + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + new_row->aa = 2; + ci.update_row(row, new_row); + assert(t.try_commit()); + } + + { + TestTransaction t1(1); + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + (void) row; + assert(success && found); + assert(value.aa() == 2); + assert(t1.try_commit()); + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_coarse_read_my_split() { + typedef CoarseIndex::NamedColumn nc; + CoarseIndex ci; + ci.thread_init(); + + init_cindex(ci); + + { + TestTransaction t(0); + auto [success, found, row, value] = ci.select_split_row(key_type(20), {{nc::aa, access_t::read}}); + (void) row; + (void) value; + assert(success && !found); + for (int i = 0; i < 10; ++i) { + auto r = Sto::tx_alloc(); + new (r) coarse_grained_row(i, i, i); + ci.insert_row(key_type(10 + i), r); + } + assert(t.try_commit()); + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_coarse_conflict0() { + typedef CoarseIndex::NamedColumn nc; + CoarseIndex ci; + ci.thread_init(); + + init_cindex(ci); + + { + TestTransaction t1(0); + { + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + (void) row; + assert(success && found); + assert(value.aa() == 1); + } + + TestTransaction t2(1); + { + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::update}}); + assert(success && found); + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + new_row->aa = 2; + ci.update_row(row, new_row); + assert(t2.try_commit()); + } + + t1.use(); + assert(!t1.try_commit()); + } + + { + TestTransaction t1(0); + coarse_grained_row row_value(100, 100, 100); + { + auto [success, found] = ci.insert_row(key_type(100), &row_value); + assert(success && !found); + } + + TestTransaction t2(0); + { + auto [success, found, row, value] = ci.select_split_row(key_type(100), {{nc::aa, access_t::read}}); + (void) row; + (void) value; + assert(!success || !found); + } + + t1.use(); + assert(t1.try_commit()); + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_coarse_conflict1() { + typedef CoarseIndex::NamedColumn nc; + CoarseIndex ci; + ci.thread_init(); + + init_cindex(ci); + + { + TestTransaction t1(0); + { + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + (void) row; + assert(success && found); + assert(value.aa() == 1); + } + + TestTransaction t2(1); + { + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::bb, access_t::update}}); + assert(success && found); + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + new_row->aa = 2; // Will get installed + new_row->bb = 2; + ci.update_row(row, new_row); + assert(t2.try_commit()); + + t1.use(); + assert(value.aa() == 2); + assert(!t1.try_commit()); // expected coarse-grained behavior + } + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_fine_conflict0() { + typedef FineIndex::NamedColumn nc; + FineIndex fi; + fi.thread_init(); + + init_findex(fi); + + { + TestTransaction t1(0); + { + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::ytd, access_t::read}}); + (void) row; + assert(success && found); + assert(value.d_ytd() == 3000); + } + + TestTransaction t2(1); + { + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::ytd, access_t::update}}); + assert(success && found); + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + new_row->d_ytd += 10; + fi.update_row(row, new_row); + assert(t2.try_commit()); + + t1.use(); + assert(value.d_ytd() == 3010); + assert(!t1.try_commit()); + } + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_fine_conflict1() { + typedef FineIndex::NamedColumn nc; + FineIndex fi; + fi.thread_init(); + + init_findex(fi); + + { + TestTransaction t1(0); + { + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::ytd, access_t::read}}); + (void) row; + assert(success && found); + assert(value.d_ytd() == 3000); + } + + TestTransaction t2(1); + { + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::payment_cnt, access_t::update}}); + assert(success && found); + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + new_row->d_ytd += 10; + new_row->d_payment_cnt += 1; + fi.update_row(row, new_row); + assert(t2.try_commit()); + + t1.use(); + assert(value.d_ytd() == 3000); // unspecified modifications are not installed + assert(value.d_payment_cnt() == 51); + assert(!t1.try_commit()); // not able to commit due to hierarchical versions + } + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_fine_conflict2() { + typedef FineIndex::NamedColumn nc; + FineIndex fi; + fi.thread_init(); + + init_findex(fi); + + { + TestTransaction t1(0); + { + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::tax, access_t::read}}); + (void) row; + assert(success && found); + assert(value.d_tax() == 10); + } + + TestTransaction t2(1); + { + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::ytd, access_t::update}}); + assert(success && found); + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + new_row->d_ytd += 10; + new_row->d_payment_cnt += 1; + fi.update_row(row, new_row); + assert(t2.try_commit()); + + t1.use(); + assert(value.d_ytd() == 3010); + assert(value.d_payment_cnt() == 50); // unspecified modifications are not installed + assert(t1.try_commit()); // can commit because of fine-grained versions + } + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_fine_delete0() { + typedef FineIndex::NamedColumn nc; + FineIndex fi; + fi.thread_init(); + + init_findex(fi); + + { + TestTransaction t1(0); + { + auto [success, found] = fi.delete_row(key_type(1)); + assert(success && found); + } + + TestTransaction t2(1); + { + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::tax, access_t::update}}); + (void) row; + assert(success && found); + assert(value.d_tax() == 10); + + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + fi.update_row(row, new_row); + } + + assert(t2.try_commit()); + + t1.use(); + assert(!t1.try_commit()); + } + + { + TestTransaction t1(0); + { + auto [success, found] = fi.delete_row(key_type(2)); + assert(success && found); + } + + TestTransaction t2(1); + { + auto [success, found, row, value] = fi.select_split_row(key_type(2), {{nc::tax, access_t::update}}); + (void) row; + assert(success && found); + assert(value.d_tax() == 10); + + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + fi.update_row(row, new_row); + } + + assert(t1.try_commit()); + + t2.use(); + assert(!t2.try_commit()); + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_fine_delete1() { + typedef FineIndex::NamedColumn nc; + FineIndex fi; + fi.thread_init(); + + init_findex(fi); + + { + TestTransaction t1(0); + { + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::tax, access_t::update}}); + (void) row; + assert(success && found); + assert(value.d_tax() == 10); + + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + fi.update_row(row, new_row); + } + + TestTransaction t2(1); + { + auto [success, found] = fi.delete_row(key_type(1)); + assert(success && found); + } + + assert(t2.try_commit()); + + t1.use(); + assert(!t1.try_commit()); + } + + { + TestTransaction t1(0); + { + auto [success, found, row, value] = fi.select_split_row(key_type(2), {{nc::tax, access_t::update}}); + (void) row; + assert(success && found); + assert(value.d_tax() == 10); + + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + fi.update_row(row, new_row); + } + + TestTransaction t2(1); + { + auto [success, found] = fi.delete_row(key_type(2)); + assert(success && found); + } + + assert(t1.try_commit()); + + t2.use(); + assert(!t2.try_commit()); + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + void test_mvcc_snapshot() { + typedef CoarseIndex::NamedColumn nc; + MVIndex mi; + mi.thread_init(); + + init_cindex(mi); + + { + TestTransaction t1(0); + { + auto [success, found, row, value] = mi.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + (void) row; + assert(success && found); + assert(value.aa() == 1); + } + + TestTransaction t2(1); + { + auto [success, found, row, value] = mi.select_split_row(key_type(1), {{nc::aa, access_t::update}}); + assert(success && found); + auto new_row = Sto::tx_alloc(); + value.copy_into(new_row); + new_row->aa = 2; + mi.update_row(row, new_row); + assert(t2.try_commit()); + + t1.use(); + assert(t1.try_commit()); + } + } + + { + TestTransaction t1(0); + { + coarse_grained_row row_value(100, 100, 100); + auto [success, found] = mi.insert_row(key_type(100), &row_value); + assert(success && !found); + } + + TestTransaction t2(0); + { + auto [success, found, row, value] = mi.select_split_row(key_type(100), {{nc::aa, access_t::read}}); + (void) row; + (void) value; + assert(!success || !found); + + t1.use(); + assert(t1.try_commit()); + } + } - printf("pass %s\n", __FUNCTION__); + + + //printf("pass %s\n", __FUNCTION__); + } + + int main() { + +auto start = std::chrono::steady_clock::now(); + + for(int i=0;i<1000;i++) + test_coarse_basic(); + + auto end = std::chrono::steady_clock::now(); + + std::cout<<"The average elapsed time for test_coarse_basic with masstree is "<< + + std::chrono::duration_cast(end-start).count()/1000<<" ns"<(end-start).count()/1000<<" ns"<(end-start).count()/1000<<" ns"<(end-start).count()/1000<<" ns"<(end-start).count()/1000<<" ns"<(end-start).count()<<" ns"<(end-start).count()<<" ns"<(end-start).count()/1000<<" ns"<(end-start).count()/1000<<" ns"< + +struct MTrie_coarse_grained_row +{ + + enum class NamedColumn : int + { + aa = 0, + bb, + cc + }; + + uint64_t aa; + + uint64_t bb; + + uint64_t cc; + + MTrie_coarse_grained_row() : aa(), bb(), cc() {} + + MTrie_coarse_grained_row(uint64_t a, uint64_t b, uint64_t c) + + : aa(a), bb(b), cc(c) + { + } +}; + +struct key_type +{ + + uint64_t id; + + explicit key_type(uint64_t key) : id(bench::bswap(key)) {} + + operator lcdf::Str() const + { + + return lcdf::Str((const char *)this, sizeof(*this)); + } +}; + +int j = 0; + +// using example_row from VersionSelector.hh + +namespace bench +{ + + template <> + + struct SplitParams + { + + using split_type_list = std::tuple; + + using layout_type = typename SplitMvObjectBuilder::type; + + static constexpr size_t num_splits = std::tuple_size::value; + + static constexpr auto split_builder = std::make_tuple( + + [](const MTrie_coarse_grained_row &in) -> MTrie_coarse_grained_row + { + MTrie_coarse_grained_row out; + + out.aa = in.aa; + + out.bb = in.bb; + + out.cc = in.cc; + + return out; + } + + ); + + static constexpr auto split_merger = std::make_tuple( + + [](MTrie_coarse_grained_row *out, const MTrie_coarse_grained_row &in) -> void + { + out->aa = in.aa; + + out->bb = in.bb; + + out->cc = in.cc; + } + + ); + + static constexpr auto map = [](int col_n) -> int + { + (void)col_n; + + return 0; + }; + }; + + template + + class RecordAccessor + { + + public: + const uint64_t &aa() const + { + + return impl().aa_impl(); + } + + const uint64_t &bb() const + { + + return impl().bb_impl(); + } + + const uint64_t &cc() const + { + + return impl().cc_impl(); + } + + void copy_into(MTrie_coarse_grained_row *dst) const + { + + return impl().copy_into_impl(dst); + } + + private: + const A &impl() const + { + + return *static_cast(this); + } + }; + + template <> + + class UniRecordAccessor : public RecordAccessor, MTrie_coarse_grained_row> + { + + public: + UniRecordAccessor(const MTrie_coarse_grained_row *const vptr) : vptr_(vptr) {} + + private: + const uint64_t &aa_impl() const + { + + return vptr_->aa; + } + + const uint64_t &bb_impl() const + { + + return vptr_->bb; + } + + const uint64_t &cc_impl() const + { + + return vptr_->cc; + } + + void copy_into_impl(MTrie_coarse_grained_row *dst) const + { + + if (vptr_) + { + + dst->aa = vptr_->aa; + + dst->bb = vptr_->bb; + + dst->cc = vptr_->cc; + } + } + + const MTrie_coarse_grained_row *vptr_; + + friend RecordAccessor, MTrie_coarse_grained_row>; + }; + + template <> + + class SplitRecordAccessor : public RecordAccessor, MTrie_coarse_grained_row> + { + + public: + static constexpr size_t num_splits = SplitParams::num_splits; + + SplitRecordAccessor(const std::array &vptrs) + + : vptr_0_(reinterpret_cast(vptrs[0])) + { + } + + private: + const uint64_t &aa_impl() const + { + + return vptr_0_->aa; + } + + const uint64_t &bb_impl() const + { + + return vptr_0_->bb; + } + + const uint64_t &cc_impl() const + { + + return vptr_0_->cc; + } + + void copy_into_impl(MTrie_coarse_grained_row *dst) const + { + + if (vptr_0_) + { + + dst->aa = vptr_0_->aa; + + dst->bb = vptr_0_->bb; + + dst->cc = vptr_0_->cc; + } + } + + const MTrie_coarse_grained_row *vptr_0_; + + friend RecordAccessor, MTrie_coarse_grained_row>; + }; + + template <> + + struct SplitParams + { + + using split_type_list = std::tuple; + + using layout_type = typename SplitMvObjectBuilder::type; + + static constexpr size_t num_splits = std::tuple_size::value; + + static constexpr auto split_builder = std::make_tuple( + + [](const example_row &in) -> example_row + { + example_row out; + + out.d_ytd = in.d_ytd; + + out.d_payment_cnt = in.d_payment_cnt; + + out.d_date = in.d_date; + + out.d_tax = in.d_tax; + + out.d_next_oid = in.d_next_oid; + + return out; + } + + ); + + static constexpr auto split_merger = std::make_tuple( + + [](example_row *out, const example_row &in) -> void + { + out->d_ytd = in.d_ytd; + + out->d_payment_cnt = in.d_payment_cnt; + + out->d_date = in.d_date; + + out->d_tax = in.d_tax; + + out->d_next_oid = in.d_next_oid; + } + + ); + + static constexpr auto map = [](int col_n) -> int + { + (void)col_n; + + return 0; + }; + }; + + template + + class RecordAccessor + { + + public: + const uint32_t &d_ytd() const + { + + return impl().d_ytd_impl(); + } + + const uint32_t &d_payment_cnt() const + { + + return impl().d_payment_cnt_impl(); + } + + const uint32_t &d_date() const + { + + return impl().d_date_impl(); + } + + const uint32_t &d_tax() const + { + + return impl().d_tax_impl(); + } + + const uint32_t &d_next_oid() const + { + + return impl().d_next_oid_impl(); + } + + void copy_into(example_row *dst) const + { + + return impl().copy_into_impl(dst); + } + + private: + const A &impl() const + { + + return *static_cast(this); + } + }; + + template <> + + class UniRecordAccessor : public RecordAccessor, example_row> + { + + public: + UniRecordAccessor(const example_row *const vptr) : vptr_(vptr) {} + + private: + const uint32_t &d_ytd_impl() const + { + + return vptr_->d_ytd; + } + + const uint32_t &d_payment_cnt_impl() const + { + + return vptr_->d_payment_cnt; + } + + const uint32_t &d_date_impl() const + { + + return vptr_->d_date; + } + + const uint32_t &d_tax_impl() const + { + + return vptr_->d_tax; + } + + const uint32_t &d_next_oid_impl() const + { + + return vptr_->d_next_oid; + } + + void copy_into_impl(example_row *dst) const + { + + if (vptr_) + { + + dst->d_ytd = vptr_->d_ytd; + + dst->d_payment_cnt = vptr_->d_payment_cnt; + + dst->d_date = vptr_->d_date; + + dst->d_tax = vptr_->d_tax; + + dst->d_next_oid = vptr_->d_next_oid; + } + } + + const example_row *vptr_; + + friend RecordAccessor, example_row>; + }; + + template <> + + class SplitRecordAccessor : public RecordAccessor, example_row> + { + + public: + static constexpr size_t num_splits = SplitParams::num_splits; + + SplitRecordAccessor(const std::array &vptrs) + + : vptr_0_(reinterpret_cast(vptrs[0])) + { + } + + private: + const uint32_t &d_ytd_impl() const + { + + return vptr_0_->d_ytd; + } + + const uint32_t &d_payment_cnt_impl() const + { + + return vptr_0_->d_payment_cnt; + } + + const uint32_t &d_date_impl() const + { + + return vptr_0_->d_date; + } + + const uint32_t &d_tax_impl() const + { + + return vptr_0_->d_tax; + } + + const uint32_t &d_next_oid_impl() const + { + + return vptr_0_->d_next_oid; + } + + void copy_into_impl(example_row *dst) const + { + + if (vptr_0_) + { + + dst->d_ytd = vptr_0_->d_ytd; + + dst->d_payment_cnt = vptr_0_->d_payment_cnt; + + dst->d_date = vptr_0_->d_date; + + dst->d_tax = vptr_0_->d_tax; + + dst->d_next_oid = vptr_0_->d_next_oid; + } + } + + const example_row *vptr_0_; + + friend RecordAccessor, example_row>; + }; + +}; // namespace bench + +using CoarseIndex = bench::MTrie_ordered_index; + +using FineIndex = bench::MTrie_ordered_index; + +using access_t = bench::access_t; + +using RowAccess = bench::RowAccess; + +using MVIndex = bench::MTrie_mvcc_ordered_index; + +template + +void init_cindex(IndexType &ci) +{ + + for (uint64_t i = 1; i <= 10; ++i) + + ci.nontrans_put(key_type(i), MTrie_coarse_grained_row(i, i, i)); +} + +void init_findex(FineIndex &fi) +{ + + example_row row; + + row.d_ytd = 3000; + + row.d_tax = 10; + + row.d_date = 200; + + row.d_next_oid = 100; + + row.d_payment_cnt = 50; + + for (uint64_t i = 1; i <= 10; ++i) + + fi.nontrans_put(key_type(i), row); +} + +void test_coarse_basic() +{ + + typedef CoarseIndex::NamedColumn nc; + + CoarseIndex ci; + + ci.thread_init(); + + init_cindex(ci); + + { + + TestTransaction t(0); + + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + + (void)row; + + assert(success && found); + + // std::cout<<"value().aa() = "<(); + + value.copy_into(new_row); + + new_row->aa = 2; + + ci.update_row(row, new_row); + + assert(t.try_commit()); + } + + { + + TestTransaction t1(1); + + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + + (void)row; + + assert(success && found); + + assert(value.aa() == 2); + + assert(t1.try_commit()); + } + + // printf("pass %s\n", __FUNCTION__); +} + +void test_coarse_read_my_split() +{ + + typedef CoarseIndex::NamedColumn nc; + + CoarseIndex ci; + + ci.thread_init(); + + init_cindex(ci); + + { + + TestTransaction t(0); + + auto [success, found, row, value] = ci.select_split_row(key_type(20), {{nc::aa, access_t::read}}); + + (void)row; + + (void)value; + + // if only one iteration is performed, + // the value shouldn't be found pre-insertion + // if it is run multiple times, the MassTrie acts as a cache + // for quick performances + + if (j == 0) + assert(success && !found); + + for (int i = 0; i < 10; ++i) + { + + auto r = Sto::tx_alloc(); + + new (r) MTrie_coarse_grained_row(i, i, i); + + ci.insert_row(key_type(10 + i), r); + } + + assert(t.try_commit()); + } + + // printf("pass %s\n", __FUNCTION__); +} + +void test_coarse_conflict0() +{ + + typedef CoarseIndex::NamedColumn nc; + + CoarseIndex ci; + + ci.thread_init(); + + init_cindex(ci); + + { + + TestTransaction t1(0); + + { + + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + + (void)row; + + assert(success && found); + + assert(value.aa() == 1); + } + + TestTransaction t2(1); + + { + + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::update}}); + + assert(success && found); + + auto new_row = Sto::tx_alloc(); + + value.copy_into(new_row); + + new_row->aa = 2; + + ci.update_row(row, new_row); + + assert(t2.try_commit()); + } + + t1.use(); + + assert(!t1.try_commit()); + } + + { + + TestTransaction t1(0); + + MTrie_coarse_grained_row row_value(100, 100, 100); + + { + + auto [success, found] = ci.insert_row(key_type(100), &row_value); + // if only one iteration is performed, + // the value shouldn't be found pre-insertion + // if it is run multiple times, the MassTrie acts as a cache + // for quick performances + if (j == 0) + assert(success && !found); + } + + TestTransaction t2(0); + + { + + auto [success, found, row, value] = ci.select_split_row(key_type(100), {{nc::aa, access_t::read}}); + + (void)row; + + (void)value; + + // if only one iteration is performed, + // the value shouldn't be found pre-insertion + // if it is run multiple times, the MassTrie acts as a cache + // for quick performances + if (j == 0) + assert(!success || !found); + } + + t1.use(); + + assert(t1.try_commit()); + } + + // printf("pass %s\n", __FUNCTION__); +} + +void test_coarse_conflict1() +{ + + typedef CoarseIndex::NamedColumn nc; + + CoarseIndex ci; + + ci.thread_init(); + + init_cindex(ci); + + { + + TestTransaction t1(0); + + { + + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::aa, access_t::read}}); + + (void)row; + + assert(success && found); + + assert(value.aa() == 1); + } + + TestTransaction t2(1); + + { + + auto [success, found, row, value] = ci.select_split_row(key_type(1), {{nc::bb, access_t::update}}); + + assert(success && found); + + auto new_row = Sto::tx_alloc(); + + value.copy_into(new_row); + + new_row->aa = 2; // Will get installed + + new_row->bb = 2; + + ci.update_row(row, new_row); + + assert(t2.try_commit()); + + t1.use(); + + assert(value.aa() == 2); + + assert(!t1.try_commit()); // expected coarse-grained behavior + } + } + + // printf("pass %s\n", __FUNCTION__); +} + +void test_fine_conflict0() +{ + + typedef FineIndex::NamedColumn nc; + + FineIndex fi; + + fi.thread_init(); + + init_findex(fi); + + { + + TestTransaction t1(0); + + { + + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::ytd, access_t::read}}); + + (void)row; + + assert(success && found); + + assert(value.d_ytd() == 3000); + } + + TestTransaction t2(1); + + { + + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::ytd, access_t::update}}); + + assert(success && found); + + auto new_row = Sto::tx_alloc(); + + value.copy_into(new_row); + + new_row->d_ytd += 10; + + fi.update_row(row, new_row); + + assert(t2.try_commit()); + + t1.use(); + + assert(value.d_ytd() == 3010); + + assert(!t1.try_commit()); + } + } + + // printf("pass %s\n", __FUNCTION__); +} + +void test_fine_conflict1() +{ + + typedef FineIndex::NamedColumn nc; + + FineIndex fi; + + fi.thread_init(); + + init_findex(fi); + + { + + TestTransaction t1(0); + + { + + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::ytd, access_t::read}}); + + (void)row; + + assert(success && found); + + assert(value.d_ytd() == 3000); + } + + TestTransaction t2(1); + + { + + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::payment_cnt, access_t::update}}); + + assert(success && found); + + auto new_row = Sto::tx_alloc(); + + value.copy_into(new_row); + + new_row->d_ytd += 10; + + new_row->d_payment_cnt += 1; + + fi.update_row(row, new_row); + + assert(t2.try_commit()); + + t1.use(); + + assert(value.d_ytd() == 3000); // unspecified modifications are not installed + + assert(value.d_payment_cnt() == 51); + + assert(!t1.try_commit()); // not able to commit due to hierarchical versions + } + } + + // printf("pass %s\n", __FUNCTION__); +} + +void test_fine_conflict2() +{ + + typedef FineIndex::NamedColumn nc; + + FineIndex fi; + + fi.thread_init(); + + init_findex(fi); + + { + + TestTransaction t1(0); + + { + + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::tax, access_t::read}}); + + (void)row; + + assert(success && found); + + assert(value.d_tax() == 10); + } + + TestTransaction t2(1); + + { + + auto [success, found, row, value] = fi.select_split_row(key_type(1), {{nc::ytd, access_t::update}}); + + assert(success && found); + + auto new_row = Sto::tx_alloc(); + + value.copy_into(new_row); + + new_row->d_ytd += 10; + + new_row->d_payment_cnt += 1; + + fi.update_row(row, new_row); + + assert(t2.try_commit()); + + t1.use(); + + assert(value.d_ytd() == 3010); + + assert(value.d_payment_cnt() == 50); // unspecified modifications are not installed + + assert(t1.try_commit()); // can commit because of fine-grained versions + } + } + + // printf("pass %s\n", __FUNCTION__); +} + +void test_fine_delete0() +{ + + typedef FineIndex::NamedColumn nc; + + FineIndex fi; + + fi.thread_init(); + + init_findex(fi); + + { + + TestTransaction t1(0); + + { + + auto [success, found] = fi.delete_row(key_type(1)); + + // std::cout<<"success ="<(); + + value.copy_into(new_row); + + fi.update_row(row, new_row); + } + + TestTransaction t2(1); + + { + + auto [success, found] = fi.delete_row(key_type(1)); + + assert(success && found); + } + + assert(t2.try_commit()); + + t1.use(); + + assert(!t1.try_commit()); + } + + { + + TestTransaction t1(0); + + { + + auto [success, found, row, value] = fi.select_split_row(key_type(2), {{nc::tax, access_t::update}}); + + (void)row; + + assert(success && found); + + assert(value.d_tax() == 10); + + auto new_row = Sto::tx_alloc(); + + value.copy_into(new_row); + + fi.update_row(row, new_row); + } + + TestTransaction t2(1); + + { + + auto [success, found] = fi.delete_row(key_type(2)); + + assert(success && found); + } + + assert(t1.try_commit()); + + t2.use(); + + assert(!t2.try_commit()); + } + + // printf("pass %s\n", __FUNCTION__); +} + +void test_get() +{ + + typedef CoarseIndex::NamedColumn nc; + + CoarseIndex ci; + + ci.thread_init(); + + init_cindex(ci); + + { + + TestTransaction t1(0); + + { + + ci.nontrans_get(key_type(1)); + + assert(t1.try_commit()); + } + } + + // printf("pass %s\n", __FUNCTION__); +} + +/****/ + +int main() +{ + + auto start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_coarse_basic(); + + auto end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_coarse_basic with MassTrie is " << + + std::chrono::duration_cast(end - start).count() / 1000 << " ns" << std::endl; + + start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_coarse_read_my_split(); + + end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_coarse_read_my_split with MassTrie is " << + + std::chrono::duration_cast(end - start).count() / 1000 << " ns" << std::endl; + + start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_coarse_conflict0(); + + end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_coarse_conflict0 with MassTrie is " << + + std::chrono::duration_cast(end - start).count() / 1000 << " ns" << std::endl; + + start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_coarse_conflict1(); + + end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_coarse_conflict1 with MassTrie is " << + + std::chrono::duration_cast(end - start).count() / 1000 << " ns" << std::endl; + + start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_fine_conflict0(); + + end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_fine_conflict0 with MassTrie is " << + + std::chrono::duration_cast(end - start).count() / 1000 << " ns" << std::endl; + + start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_fine_conflict1(); + + end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_fine_conflict1 with MassTrie is " << + + std::chrono::duration_cast(end - start).count() << " ns" << std::endl; + + start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_fine_conflict2(); + + end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_fine_conflict2 with MassTrie is " << + + std::chrono::duration_cast(end - start).count() << " ns" << std::endl; + + start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_fine_delete0(); + + end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_fine_delete0 with MassTrie is " << + + std::chrono::duration_cast(end - start).count() / 1000 << " ns" << std::endl; + + start = std::chrono::steady_clock::now(); + + for (j = 0; j < 1000; j++) + + test_fine_delete1(); + + end = std::chrono::steady_clock::now(); + + std::cout << "The average elapsed time for test_fine_delete1 with MassTrie is " << + + std::chrono::duration_cast(end - start).count() / 1000 << " ns" << std::endl; + + // test_get(); + + printf("All tests pass!\n"); + + std::thread advancer; // empty thread because we have no advancer thread + + Transaction::rcu_release_all(advancer, 2); + + return 0; +}