Skip to content

Commit 5c58094

Browse files
committed
release
1 parent 8662708 commit 5c58094

28 files changed

+22662
-0
lines changed

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
*
2+
!*.c
3+
!*.h
4+
!Makefile*
5+
!.gitignore
6+
!.gitmodules
7+
!c
8+
!*_benchmark
9+
!*.md

Makefile

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Makefile
2+
# rules (always with .out)
3+
# SRC-X.out += abc # extra source: abc.c
4+
# MOD-X.out += abc # extra module: abc.c abc.h
5+
# ASM-X.out += abc # extra assembly: abc.S
6+
# DEP-X.out += abc # extra dependency: abc
7+
# FLG-X.out += -finline # extra flags
8+
# LIB-X.out += abc # extra -labc options
9+
10+
# X.out : xyz.h xyz.c # for extra dependences that are to be compiled/linked.
11+
12+
VPATH += .:c/
13+
14+
# X => X.out
15+
TARGETS += test_flextree test_flexfile test_flexdb
16+
# X => X.c only
17+
SOURCES +=
18+
# X => X.S only
19+
ASSMBLY +=
20+
# X => X.c X.h
21+
MODULES += c/lib c/kv c/ord generic flextree flexfile flexdb
22+
# X => X.h
23+
HEADERS +=
24+
25+
# EXTERNSRC/EXTERNDEP do not belong to this repo.
26+
# extern-src will be linked
27+
EXTERNSRC +=
28+
# extern-dep will not be linked
29+
EXTERNDEP +=
30+
31+
FLG +=
32+
LIB += rt m uring
33+
34+
ifeq ($(LEVELDB),y)
35+
FLG += -DLEVELDB
36+
LIB += leveldb
37+
endif
38+
ifeq ($(ROCKSDB),y)
39+
FLG += -DROCKSDB
40+
LIB += rocksdb
41+
endif
42+
ifeq ($(LMDB),y)
43+
FLG += -DLMDB
44+
LIB += lmdb
45+
endif
46+
ifeq ($(KVELL),y)
47+
FLG += -DKVELL -L.
48+
LIB += kvell
49+
endif
50+
51+
# when $ make FORKER_PAPI=y
52+
ifeq ($(strip $(FORKER_PAPI)),y)
53+
LIB += papi
54+
FLG += -DFORKER_PAPI
55+
endif
56+
57+
include c/Makefile.common
58+
59+
libflexfile.so : Makefile Makefile.common c/lib.h c/lib.c flextree.h \
60+
flextree.c flexfile.h flexfile.c generic.h generic.c wrapper.c wrapper.h
61+
$(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) -shared -fPIC)
62+
$(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@)))
63+
$(CCC) $(ALLFLG) -o $@ wrapper.c flextree.c flexfile.c generic.c c/lib.c $(ALLLIB) -ldl

README.md

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# FlexTree, FlexSpace and FlexDB
2+
3+
This repository contains the reference implementation of FlexTree, FlexSpace and FlexDB presented in the paper
4+
"Building an Efficient Key-value Store Using Lightweight In-place Updates".
5+
6+
*More details of the paper are expected to be updated here.*
7+
8+
## Repository Structure
9+
10+
Note: the terms we used in the implementation has some difference from the paper.
11+
Thw following table shows the mapping between the paper terms and API prefixes in the code.
12+
13+
| Paper Term | Code Term |
14+
| --- | --- |
15+
| FlexTree | `flextree` |
16+
| Sorted Extent Array | `brute_force` |
17+
| FlexSpace | `flexfile` |
18+
| FlexSpace Segment | `block` and `BLOCK` in macros |
19+
| FlexDB | `flexdb` |
20+
21+
The content of this repository is organized as follows:
22+
- `c/`: external library dependencies (including the thread-safe skip list used as FlexDB's MemTable)
23+
- `*_benchmark/`: experiment code for FlexTree, FlexSpace and FlexDB (Paper Section 6.1 to 6.3)
24+
- `flextree.h`, `flextree.c`: FlexTree implementation and its APIs, including reference code of sorted extent array
25+
- `flexfile.h`, `flexfile.c`: FlexSpace implementation and its APIs
26+
- `flexdb.h`, `flexdb.c`: FlexDB implementation and its APIs
27+
- `generic.h`, `generic.c`: A generic syscall wrapper for different platforms (only Linux currently)
28+
- `wrapper.h`, `wrapper.h`: A library that implements POSIX-file-like interface for FlexSpace
29+
(not coupled with its core functionalities)
30+
- `test_flextree.c`, `test_flexfile.c`, `test_flexdb.c`: Test cases for each component's core functionalities
31+
(correctness only)
32+
- `LICENSE`, `Makefile`: namely
33+
34+
## Implementation Details
35+
36+
You can refer to the header files for more details of the implementation.
37+
38+
### FlexTree and FlexSpace
39+
40+
The FlexTree implementation supports storing the tree in the main memory and using CoW to checkpoint the in-memory
41+
tree into an on-disk file.
42+
The code also contains a naive implementation of a sorted extent array to verify the correctness of FlexTree.
43+
The test program for FlexTree contains ten micro-benchmarks to test its functionalities.
44+
45+
The FlexSpace implementation provides a set of file-like APIs to support
46+
read/(over)write/insert-range/collapse-range/sync operations.
47+
It is backed with the functionalities of log-structured space allocation,
48+
segment-based garbage collection and crash recovery mechanisms.
49+
The test program for FlexSpace tests the basic data I/O operations.
50+
Specifically, the implementation also contains an extra set of APIs that support
51+
setting a tag at a specific logical address.
52+
A tag is a 16-bit value and it shifts with the data.
53+
54+
*Please note that the current implementation of FlexSpace and FlexTree is not thread-safe.*
55+
56+
### FlexDB
57+
58+
The FlexDB implementation provides a fully-functional persistent key-value store.
59+
The code contains the implementation of a volatile sparse KV index, an interval cache,
60+
a skip-list-based MemTable, write-ahead logging and the according crash recovery mechanisms.
61+
FlexDB supports regular KV operations including write/update (`PUT`), point query (`GET`), range query (`SCAN`)
62+
and deletion (`DELETE`).
63+
You can refer to `flexdb.h` to better understand the implementation of FlexDB, as well as the APIs provided.
64+
65+
*The implementation of FlexDB is thread-safe and can be used with multiple concurrent threads.*
66+
67+
## Demo
68+
69+
The test programs for each component is a good starting point to demonstrate the basic usage.
70+
To compile the test program, simply using `make` will compile their executable binaries.
71+
The codebase is verified to compile with the following environment and dependencies:
72+
73+
- Linux kernel 5.10.32 LTS (support io_uring)
74+
- clang 12.0.1
75+
- jemalloc 5.2.1
76+
- liburing 2.0
77+
78+
The basic usage of each testing program is:
79+
80+
- `test_flextree.out`: this program tests the basic functionalities of the FlexTree implementation, and it uses sorted
81+
extent array as reference to verify the correctness. It contains ten test cases and each can be run by using the
82+
command `./test_flextree.out <n>` where `n` is the test ID (0-9). Note that most tests are conducted in DRAM only but
83+
a few tests will use `/tmp` (though it's also usually a memory resident). You can refer to the code to see the content
84+
of each test.
85+
- `test_flexfile.out`: this program is a minimal test to verify the correctness of insertions and deletions in a
86+
FlexSpace. It will create a FlexSpace in `/tmp` and performs a few I/O operations on it, and verify the results with
87+
expected output. You can simply run `./test_flexfile.out` to see if the outputs are correct.
88+
- `test_flexdb.out`: this program tests the basic functionality of FlexDB. It creates a store in `/tmp` and perform a
89+
range of regular KV operations on it. To run the program, simply use `./test_flexdb.out`.

c/Makefile.common

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#usage: include Makefile.common at the end of your Makefile
2+
3+
# no builtin rules/vars (CC, CXX, etc. are still defined but will be empty)
4+
MAKEFLAGS += -r -R
5+
6+
HDR = $(addsuffix .h,$(MODULES) $(HEADERS))
7+
SRC = $(addsuffix .c,$(MODULES) $(SOURCES))
8+
ASM = $(addsuffix .S,$(ASSMBLY))
9+
OBJ = $(addsuffix .o,$(MODULES) $(SOURCES) $(ASSEMBLY))
10+
DEP = Makefile.common Makefile $(HDR) $(EXTERNDEP) $(EXTERNSRC)
11+
BIN = $(addsuffix .out,$(TARGETS))
12+
DIS = $(addsuffix .dis,$(TARGETS))
13+
14+
# clang:
15+
# EXTRA="-Rpass=loop-vectorize" # IDs loops that were successfully V-ed
16+
# EXTRA="-Rpass-missed=loop-vectorize" # IDs loops that failed V
17+
# EXTRA="-Rpass-analysis=loop-vectorize" # IDs the statements that caused V to fail
18+
# EXTRA="-Rpass=\ *" # remarks for all passes
19+
# other passes: https://llvm.org/docs/Passes.html
20+
21+
O ?= rg
22+
23+
# predefined OPT: make O={rg,r,0g,3g,p,0s,3s,cov,mc,hc,wn,stk}
24+
ifeq ($O,rg) # make O=rg
25+
OPT ?= -DNDEBUG -g3 -O3 -flto -fno-stack-protector
26+
else ifeq ($O,r) # make O=r (for release)
27+
OPT ?= -DNDEBUG -O3 -flto -fno-stack-protector
28+
else ifeq ($O,ns) # make O=ns (no signal handlers)
29+
OPT ?= -DNDEBUG -O3 -flto -fno-stack-protector -DNOSIGNAL
30+
else ifeq ($O,0g) # make O=0g
31+
OPT ?= -g3 -O0 -fno-inline
32+
else ifeq ($O,2g) # make O=2g
33+
OPT ?= -g3 -O2
34+
else ifeq ($O,3g) # make O=3g
35+
OPT ?= -g3 -O3 -flto -fno-inline
36+
else ifeq ($O,p) # make O=p (profiling: rg+noinline)
37+
OPT ?= -DNDEBUG -g3 -O3 -flto -fno-stack-protector -fno-inline
38+
else ifeq ($O,0s) # make O=0s (address sanitizer)
39+
OPT ?= -g3 -O0 -fno-inline -fsanitize=address -fno-omit-frame-pointer -DHEAPCHECKING
40+
else ifeq ($O,3s) # make O=3s (address sanitizer)
41+
OPT ?= -g3 -O3 -fno-inline -fsanitize=address -fno-omit-frame-pointer -DHEAPCHECKING
42+
else ifeq ($O,t) # make O=0t (thread sanitizer)
43+
OPT ?= -g3 -O1 -fno-inline -fsanitize=thread -fno-stack-protector
44+
else ifeq ($O,cov) # make O=cov (for gcov)
45+
OPT ?= -g3 -DNDEBUG -O0 --coverage
46+
CCC = gcc
47+
else ifeq ($O,mc) # make O=mc (for valgrind memcheck)
48+
OPT ?= -g3 -O1 -fno-inline -DHEAPCHECKING
49+
ARCH ?= broadwell
50+
else ifeq ($O,hc) # make O=hc (for gperftools heapcheck)
51+
OPT ?= -g3 -O1 -fno-inline
52+
LIB += tcmalloc
53+
else ifeq ($O,wn) # more warning
54+
OPT ?= -g3 -O3 -Wvla -Wformat=2 -Wconversion -Wstrict-prototypes -Wmissing-prototypes
55+
else ifeq ($O,stk) # check stack usage with gcc
56+
OPT ?= -g3 -O3 -DNDEBUG -fstack-usage
57+
CCC = gcc
58+
endif
59+
60+
# malloc: g:glibc, t:tcmalloc, j:jemalloc
61+
M ?= g
62+
63+
ifeq ($M,t)
64+
LIB += tcmalloc
65+
FLG += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
66+
else ifeq ($M,j)
67+
LIB += jemalloc
68+
endif
69+
70+
UNAME_S := $(shell uname -s)
71+
ifeq ($(UNAME_S),Linux)
72+
CHECK_S := -D__linux__
73+
LIB += rt
74+
else ifeq ($(UNAME_S),FreeBSD)
75+
CHECK_S := -D__FreeBSD__
76+
FLG += -I/usr/local/include -L/usr/local/lib
77+
LIB += rt
78+
LIB += execinfo
79+
TPUT := /usr/local/bin/tput
80+
else ifeq ($(UNAME_S),Darwin)
81+
CHECK_S := -D__APPLE__ -D__MACH__
82+
# do nothing
83+
else
84+
$(error "Supported Platforms: Linux, FreeBSD, Darwin")
85+
endif
86+
TPUT ?= tput
87+
88+
CCC ?= clang
89+
CSTD = -std=gnu18
90+
XCC ?= clang++
91+
XSTD = -std=gnu++17
92+
93+
UNAME_M := $(shell uname -m)
94+
ifeq ($(UNAME_M),aarch64) # "native" does not work for clang@aarch64
95+
CHECK_M := -D__aarch64__
96+
ARCH ?= armv8-a+crc
97+
else ifeq ($(UNAME_M),arm64) # "native" does not work for clang@aarch64
98+
CHECK_M := -D__aarch64__
99+
ARCH ?= armv8-a+crc
100+
else ifeq ($(UNAME_M),x86_64)
101+
CHECK_M := -D__x86_64__
102+
ARCH ?= native
103+
else ifeq ($(UNAME_M),amd64) # freebsd
104+
CHECK_M := -D__x86_64__
105+
ARCH ?= native
106+
else
107+
$(error "Supported Platforms: aarch64, x86_64")
108+
endif
109+
110+
TUNE ?= native
111+
112+
NBI += memcpy memmove memcmp
113+
114+
# minimal requirement on x86_64: -march=nehalem
115+
# minimal requirement on aarch64: -march=armv8-a+crc
116+
FLG += -march=$(ARCH) -mtune=$(TUNE)
117+
FLG += -pthread -Wall -Wextra -Wshadow #-Weverything
118+
FLG += $(addprefix -fno-builtin-,$(NBI))
119+
FLG += $(OPT)
120+
121+
ifneq ($(shell $(CCC) --version 2>/dev/null | grep clang),)
122+
FLG += -ferror-limit=3
123+
CCCTYPE := clang
124+
else ifneq ($(shell $(CCC) --version 2>/dev/null | grep gcc),)
125+
FLG += -fmax-errors=3
126+
FLG += -Wno-unknown-pragmas
127+
CCCTYPE := gcc
128+
else
129+
$(error "Supported Compilers: clang, gcc")
130+
endif
131+
132+
ifeq ($(CCCTYPE),clang)
133+
CCINST = /usr/lib/clang/$(shell $(CCC) --version 2>/dev/null | awk '/^clang/ { print $$3 }')
134+
CCINC = $(CCINST)/include
135+
else ifeq ($(CCCTYPE),gcc)
136+
CCINST = /usr/lib/gcc/$(shell $(CCC) -dumpmachine)/$(shell $(CCC) -dumpversion)
137+
CCINC = $(CCINST)/include $(CCINST)/include-fixed
138+
endif
139+
CCINC = /usr/include /usr/local/include
140+
141+
ifneq ($(shell find $(CCINC) -name backtrace-supported.h 2>/dev/null),)
142+
LIB += backtrace
143+
FLG += -DBACKTRACE
144+
endif
145+
146+
ifneq ($(shell find $(CCINC) -name liburing.h 2>/dev/null),)
147+
LIB += uring
148+
FLG += -DLIBURING
149+
endif
150+
151+
152+
uniq = $(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1)))
153+
magentatxt := $(shell $(TPUT) setaf 5)
154+
greentxt := $(shell $(TPUT) setaf 2)
155+
bluetxt := $(shell $(TPUT) setaf 4)
156+
normaltxt := $(shell $(TPUT) sgr0)
157+
158+
.PHONY : bin dis def clean cleanx check tags
159+
160+
bin : $(BIN)
161+
dis : $(DIS) bin
162+
.DEFAULT_GOAL = bin
163+
.SECONDEXPANSION:
164+
165+
ifeq ($(J),o)
166+
# DANGER. Don't use unless it works!
167+
# build from .o files but target-specific flags are missing in %.o : %.x
168+
%.out : %.o $(OBJ) $$(addsuffix .o,$$(SRC-$$@) $$(MOD-$$@) $$(ASM-$$@))
169+
$(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$*.o) $(FLG-$@) -rdynamic)
170+
$(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@)))
171+
$(CCC) $(ALLFLG) -o $@ $^ $(ALLLIB)
172+
#
173+
else # default: all-in-one command
174+
%.out : %.c $(SRC) $(ASM) $(DEP) $$(DEP-$$@) $$(addsuffix .c,$$(SRC-$$@) $$(MOD-$$@)) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@)) $$(addsuffix .S,$$(ASM-$$@))
175+
$(eval ALLSRC := $(SRC) $(addsuffix .c,$(SRC-$@) $(MOD-$@)) $(ASM) $(addsuffix .S,$(ASM-$@)))
176+
$(eval UNIQSRC := $(call uniq,$(ALLSRC)))
177+
$(eval ALLFLG := $(CSTD) $(EXTRA) $(FLG) $(FLG-$@) -rdynamic)
178+
$(eval ALLLIB := $(addprefix -l,$(LIB) $(LIB-$@)))
179+
@printf '$(bluetxt)$@$(magentatxt) <= $(greentxt)$< $(UNIQSRC)$(normaltxt)\n'
180+
$(CCC) $(ALLFLG) -o $@ $< $(UNIQSRC) $(ALLLIB)
181+
#
182+
endif
183+
184+
185+
%.dis : %.out
186+
objdump -SlwtC $< 1>$@ 2>/dev/null
187+
188+
%.o : %.cc $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@))
189+
$(XCC) $(XSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $<
190+
191+
%.o : %.c $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@))
192+
$(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $<
193+
194+
%.o : %.S $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@))
195+
$(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$@) -o $@ -c $<
196+
197+
%.s : %.c $(DEP) $$(DEP-$$@) $$(addsuffix .h,$$(HDR-$$@) $$(MOD-$$@))
198+
$(CCC) $(CSTD) $(EXTRA) $(FLG) $(FLG-$*) $(FLG-$*.o) -S -o $@ -c $<
199+
200+
def :
201+
$(CCC) $(FLG) -dM -E - </dev/null
202+
203+
clean :
204+
rm -rf *.out *.dis *.o *.so *.gcda *.gcno *.gcov *.dSYM
205+
206+
cleanx : clean
207+
rm -rf $(EXTERNDEP) $(EXTERNSRC)
208+
209+
check :
210+
cppcheck $(addprefix -I ,$(CCINC)) \
211+
-q $(CHECK_M) $(CHECK_S) -DNDEBUG -U__cplusplus \
212+
--std=c11 --language=c --platform=unix64 \
213+
--enable=warning,style,performance,portability,information --inline-suppr .
214+
215+
tags :
216+
ctags -R . /usr/include /usr/local/include $(CCINC)

0 commit comments

Comments
 (0)