Skip to content

Commit bed0dad

Browse files
authored
Compress/Uncompress without Encryption (#4)
Allow both encryption and compression to be optional and refactor for better memory allocation management
1 parent 65bdece commit bed0dad

File tree

4 files changed

+140
-107
lines changed

4 files changed

+140
-107
lines changed

README.md

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ _For easy reference, it will be assumed that the SQLite source code is in the `s
3535

3636
#### Build a Static Library
3737
1. Create a temporary `build` directory and `cd` to it.
38-
1. Copy `sqlite3.c` and `cevfs.c` to the `build` directory.
38+
1. Copy `sqlite3.c`, `cevfs.c` and `cevfs.h` to the `build` directory.
3939
1. Combine the files: `cat sqlite3.c cevfs.c > cevfs-all.c`
40-
1. Compile: `clang -c cevfs-all.c -o sqlite3.o -oS`
40+
1. Compile: `clang -c cevfs-all.c -o sqlite3.o -Os`
4141
1. Create static lib: `libtool -static sqlite3.o -o sqlite3.a`
4242

4343
### Creating a Command-Line Build Tool
@@ -46,7 +46,8 @@ If you are using macOS, you can use the `cevfs_build` example which implements c
4646
Copy the following files to your temporary build directory:
4747
- sqlite/sqlite3.c
4848
- cevfs/cevfs.c
49-
- cevfs_build/cevfs_build.c
49+
- cevfs\_build/cevfs\_build.c
50+
- cevfs_build/xMethods.c
5051

5152
Build:
5253
```
@@ -66,8 +67,17 @@ parameters:
6667
- **KEY**: encryption key
6768

6869
E.g.:
70+
71+
```
72+
./cevfs_build myDatabase.db myNewDatabase.db default "x'2F3A995FCE317EA22F3A995FCE317EA22F3A995FCE317EA22F3A995FCE317EA2'"
73+
```
74+
75+
(hex key is 32 pairs of 2-digit hex values)
76+
77+
You can also try different block sizes and compare the sizes of the new databases to see which one uses less space. To specify the block size, specify the destination path using a URI and append `?block_size=<block size>`:
78+
6979
```
70-
./cevfs_build myDatabase.db myNewDatabase.db default "x'2F3A995FCE317EA2...'"
80+
./cevfs_build myDatabase.db "file:///absolute/path/to/myNewDatabase.db?block_size=4096" default "x'2F3A995FCE317EA2...'"
7181
```
7282

7383
### Creating a Custom Version of SQLite
@@ -88,6 +98,8 @@ build> $ cat sqlite3.c cevfs.c cevfs_mod.c > cevfs-all.c
8898
build> $ clang cevfs-all.c shell.c -DSQLITE_ENABLE_CEROD=1 -DHAVE_READLINE=1 -O2 -o sqlite3 -lz -lreadline
8999
```
90100

101+
_If you get errors related to implicit declaration of functions under C99, you can add `-Wno-implicit-function-declaration` to disable them._
102+
91103
Then, to open a CEVFS database:
92104

93105
```

cevfs/cevfs-all.c

Lines changed: 0 additions & 10 deletions
This file was deleted.

cevfs/cevfs.c

Lines changed: 122 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,20 @@ SOFTWARE.
4040
#include <CommonCrypto/CommonDigest.h>
4141
#include <CommonCrypto/CommonCryptor.h>
4242

43-
// Size of standard Sqlite3 pager header
44-
#define CEVFS_DB_HEADER1_SZ 100
45-
// Size of cevfs-specific pager header
46-
#define CEVFS_DB_HEADER2_SZ 100
43+
// Standard Sqlite3 pager header
4744
#define CEVFS_DB_HEADER1_OFST 000
45+
#define CEVFS_DB_HEADER1_SZ 100
46+
47+
// cevfs-specific pager header
4848
#define CEVFS_DB_HEADER2_OFST CEVFS_DB_HEADER1_OFST+CEVFS_DB_HEADER1_SZ
49-
// Offset to master map table
50-
#define CEVFS_DB_MMTBL_OFST CEVFS_DB_HEADER2_OFST+CEVFS_DB_HEADER2_SZ
49+
#define CEVFS_DB_HEADER2_SZ 100
50+
51+
// Total header size
5152
#define CEVFS_DB_HEADER_SIZE (CEVFS_DB_HEADER1_SZ + CEVFS_DB_HEADER2_SZ)
5253

54+
// Offset to master map table, starts just after header
55+
#define CEVFS_DB_MMTBL_OFST CEVFS_DB_HEADER2_OFST+CEVFS_DB_HEADER2_SZ
56+
5357
#define CEVFS_FILE_SCHEMA_NO 1
5458
#define CEVFS_FIRST_MAPPED_PAGE 3
5559

@@ -266,7 +270,6 @@ static char * cevfsMapPath(cevfs_file *pFile, const char *zName, bool *bMustRele
266270
static const char *zTail = "btree";
267271
if (bMustRelease) *bMustRelease = false;
268272
if( strstr(zName, "-journal")==0 ){
269-
bMustRelease = false;
270273
return (char *)zName;
271274
}
272275
char *zUppJournalPath = pFile ? pFile->zUppJournalPath : NULL;
@@ -614,7 +617,7 @@ static int cevfsLoadHeader(cevfs_file *p){
614617

615618
static CevfsMemPage *memPageFromDbPage(DbPage *pDbPage, Pgno mappedPgno){
616619
CevfsMemPage* pPg = (CevfsMemPage *)sqlite3PagerGetExtra(pDbPage);
617-
if(mappedPgno != pPg->pgno ){
620+
if( mappedPgno != pPg->pgno ){
618621
pPg->pgno = mappedPgno;
619622
pPg->pDbPage = pDbPage;
620623
pPg->dbHdrOffset = mappedPgno==1 ? CEVFS_DB_HEADER_SIZE : 0;
@@ -749,7 +752,7 @@ void cevfsAllocCmpPageSpace(
749752
//u32 realPageSize = pFile->pageSize - (header->currPgno == 1 ? CEVFS_DB_HEADER_SIZE : 0);
750753
header->currPageOfst += cmpSz;
751754
if( header->currPageOfst > /*realPageSize*/ pFile->pageSize ){
752-
// current page can't hold anymore, start new page.
755+
// current page can't hold any more, start new page.
753756
ofst = 0;
754757
header->currPageOfst = cmpSz;
755758
// Make sure to not use a pgno that we allocated to a pagemap page.
@@ -841,7 +844,7 @@ static int cevfsPageMapSet(
841844

842845
if( (rc = cevfsPageMapGet(pFile, uppOfst, outUppPgno, outLwrPgno, outCmpOfst, &oldCmpSz, &ix))==SQLITE_OK ){
843846
/*
844-
** We found a map entry. It's either a placeholder entry that need valid data,
847+
** We found a map entry. It's either a placeholder entry that needs valid data,
845848
** an outdated entry that needs updating, or a valid up-to-date entry.
846849
** If the entry needs updating, we will reuse the space used to hold the previously compressed
847850
** data if the compressed data now takes up less space or allocate a new space at the end of
@@ -960,29 +963,40 @@ static int cevfsRead(
960963
Pgno uppPgno, mappedPgno;
961964
CevfsCmpOfst cmprPgOfst;
962965
CevfsCmpSize uCmpPgSz;
966+
963967
if( (rc = cevfsPageMapGet(p, iOfst, &uppPgno, &mappedPgno, &cmprPgOfst, &uCmpPgSz, NULL)) == SQLITE_OK ){
964-
if( rc==SQLITE_OK && (rc = sqlite3PagerGet(p->pPager, mappedPgno, &pPage, 0)) == SQLITE_OK ){
968+
if( rc==SQLITE_OK &&
969+
(rc = sqlite3PagerGet(p->pPager, mappedPgno, &pPage, 0))==SQLITE_OK
970+
){
971+
void *pDecBuf = NULL;
972+
void *pUncBuf = NULL;
973+
void *pDstData = NULL;
974+
965975
CevfsMemPage *pMemPage = memPageFromDbPage(pPage, mappedPgno);
966976
CEVFS_PRINTF(
967977
pInfo, "%s.xRead(%s,pgno=%u->%u,ofst=%08lld->%u,amt=%d->%u)",
968978
pInfo->zVfsName, p->zFName, uppPgno, mappedPgno, iOfst, cmprPgOfst, iAmt, uCmpPgSz
969979
);
970980
assert( uCmpPgSz > 0 );
981+
971982
size_t iDstAmt = uppPgSz;
972-
void *pUncBuf = sqlite3_malloc((int)iDstAmt);
973-
void *pCmpBuf = sqlite3_malloc(uCmpPgSz);
974-
int bSuccess = 0;
975-
976-
if( pUncBuf ){
977-
// If using encryption, the IV is stored first followed by the enctypted data
978-
void *iv =
979-
(char *)pMemPage->aData
980-
+pMemPage->dbHdrOffset
981-
+pMemPage->pgHdrOffset
982-
+cmprPgOfst;
983-
984-
// decrypt
985-
if( p->bEncryptionEnabled ){
983+
int bSuccess = 1;
984+
985+
void *pSrcData =
986+
(char *)pMemPage->aData
987+
+pMemPage->dbHdrOffset
988+
+pMemPage->pgHdrOffset
989+
+cmprPgOfst;
990+
991+
// src = dst, assuming no encryption or compression
992+
pDstData = pSrcData;
993+
994+
if( p->bEncryptionEnabled ){
995+
// The IV is stored first followed by the enctypted data
996+
void *iv = pSrcData;
997+
998+
pDecBuf = sqlite3_malloc(uCmpPgSz);
999+
if( pDecBuf ){
9861000
void *srcData = iv+p->nEncIvSz;
9871001
size_t nDataInSize = uCmpPgSz-p->nEncIvSz;
9881002
size_t nFinalSz;
@@ -992,38 +1006,36 @@ static int cevfsRead(
9921006
srcData, // dataIn
9931007
nDataInSize, // data-in length
9941008
iv, // IvIn
995-
pCmpBuf, // dataOut; result is written here.
1009+
pDecBuf, // dataOut; result is written here.
9961010
uCmpPgSz, // The size of the dataOut buffer in bytes
9971011
&nFinalSz // On successful return, the number of bytes written to dataOut.
9981012
);
9991013

10001014
if( bSuccess ){
10011015
uCmpPgSz = nFinalSz;
1002-
}
1003-
}else{
1004-
bSuccess = true;
1005-
pCmpBuf = iv;
1006-
}
1007-
1008-
// uncompress
1009-
if( bSuccess ){
1010-
if( p->bCompressionEnabled ){
1011-
bSuccess = p->vfsMethods.xUncompress(pInfo->pCtx, pUncBuf, &iDstAmt, pCmpBuf, (int)uCmpPgSz);
1012-
if( !bSuccess ){
1013-
return CEVFS_ERROR_DECOMPRESSION_FAILED;
1014-
}
1016+
pSrcData = pDstData = pDecBuf;
1017+
}else rc=CEVFS_ERROR_DECRYPTION_FAILED;
1018+
}else rc=SQLITE_NOMEM;
1019+
} // encryption
1020+
1021+
if( p->bCompressionEnabled && bSuccess && bSuccess && rc==SQLITE_OK ){
1022+
pUncBuf = sqlite3_malloc((int)iDstAmt);
1023+
if( pUncBuf ){
1024+
bSuccess = p->vfsMethods.xUncompress(pInfo->pCtx, pUncBuf, &iDstAmt, pSrcData, (int)uCmpPgSz);
1025+
if( bSuccess ){
10151026
assert( iDstAmt==uppPgSz );
1016-
}else{
1017-
pUncBuf = pCmpBuf;
1018-
}
1019-
sqlite3_free(pCmpBuf);
1020-
u16 uBufOfst = iOfst % uppPgSz;
1021-
memcpy(zBuf, pUncBuf+uBufOfst, iAmt);
1022-
}else{
1023-
rc = CEVFS_ERROR_DECRYPTION_FAILED;
1024-
}
1025-
sqlite3_free(pUncBuf);
1026-
}else rc = SQLITE_NOMEM;
1027+
pDstData = pUncBuf;
1028+
}else rc=CEVFS_ERROR_DECOMPRESSION_FAILED;
1029+
}else rc=SQLITE_NOMEM;
1030+
}
1031+
1032+
if( bSuccess && rc==SQLITE_OK ){
1033+
u16 uBufOfst = iOfst % uppPgSz;
1034+
memcpy(zBuf, pDstData+uBufOfst, iAmt);
1035+
}
1036+
1037+
if( pDecBuf ) sqlite3_free( pDecBuf );
1038+
if( pUncBuf ) sqlite3_free( pUncBuf );
10271039
sqlite3PagerUnref(pPage);
10281040
}
10291041
}else{
@@ -1050,65 +1062,84 @@ static int cevfsWrite(
10501062
){
10511063
cevfs_file *p = (cevfs_file *)pFile;
10521064
cevfs_info *pInfo = p->pInfo;
1053-
int rc;
1065+
int rc = SQLITE_OK;
10541066

10551067
if( p->pPager ){
1056-
DbPage *pPage;
1057-
Pgno uppPgno, mappedPgno;
1068+
if( p->bReadOnly ) rc = SQLITE_READONLY;
1069+
else{
1070+
void *pCmpBuf = NULL;
1071+
void *pEncBuf = NULL;
1072+
void *pSrcData = (void *)zBuf;
1073+
size_t nSrcAmt = iAmt;
1074+
int bSuccess = 1;
1075+
1076+
if( p->bCompressionEnabled ){
1077+
size_t nDest = p->vfsMethods.xCompressBound(pInfo->pCtx, nSrcAmt);
1078+
pCmpBuf = sqlite3_malloc((int)nDest);
1079+
if( pCmpBuf ){
1080+
bSuccess = p->vfsMethods.xCompress(pInfo->pCtx, pCmpBuf, &nDest, pSrcData, nSrcAmt);
1081+
if( bSuccess ){
1082+
pSrcData = pCmpBuf;
1083+
nSrcAmt = nDest;
1084+
}
1085+
}else rc=SQLITE_NOMEM;
1086+
}
10581087

1059-
if( !p->bReadOnly ){
1060-
// compress
1061-
size_t nDest = p->vfsMethods.xCompressBound(pInfo->pCtx, iAmt);
1062-
void *pCmpBuf = sqlite3_malloc((int)nDest);
1063-
if( pCmpBuf ){
1064-
CevfsCmpOfst cmprPgOfst;
1065-
p->vfsMethods.xCompress(pInfo->pCtx, pCmpBuf, &nDest, (void *)zBuf, iAmt);
1066-
1067-
// encrypt
1068-
void *pEncBuf = NULL;
1088+
if( p->bEncryptionEnabled && bSuccess ){
10691089
size_t tmp_csz = 0;
1070-
int bSuccess = 0;
1071-
10721090
void *iv = sqlite3_malloc((int)p->nEncIvSz);
1073-
if( p->bEncryptionEnabled ){
1091+
if( iv ){
10741092
bSuccess = p->vfsMethods.xEncrypt(
10751093
pInfo->pCtx,
1076-
pCmpBuf, // dataIn
1077-
nDest, // data-in length
1094+
pSrcData, // dataIn
1095+
nSrcAmt, // data-in length
10781096
iv, // IV out
10791097
&pEncBuf, // dataOut; result is written here.
10801098
&tmp_csz, // On successful return, the number of bytes written to dataOut.
10811099
sqlite3_malloc
10821100
);
1083-
}
1101+
if( bSuccess && pEncBuf ){
1102+
// Join IV and pEncBuf. If IV is greater than pInfo->nEncIvSz, it will be truncated.
1103+
void *pIvEncBuf = NULL;
1104+
CevfsCmpSize uIvEncSz = p->nEncIvSz+tmp_csz;
1105+
pIvEncBuf = sqlite3_realloc(iv, (int)(uIvEncSz));
1106+
memcpy(pIvEncBuf+p->nEncIvSz, pEncBuf, tmp_csz);
1107+
sqlite3_free(pEncBuf);
1108+
pSrcData = pEncBuf = pIvEncBuf;
1109+
nSrcAmt = uIvEncSz;
1110+
}else rc=CEVFS_ERROR_ENCRYPTION_FAILED;
1111+
}else rc=SQLITE_NOMEM;
1112+
}
10841113

1085-
if( bSuccess && pEncBuf ){
1086-
// Join IV and pEncBuf. If IV is greater than pInfo->nEncIvSz, it will be truncated.
1087-
void *pIvEncBuf = sqlite3_realloc(iv, (int)(p->nEncIvSz+tmp_csz));
1088-
memcpy(pIvEncBuf+p->nEncIvSz, pEncBuf, tmp_csz);
1114+
// Make sure dest/lwr page size is large enough for incoming page of data
1115+
assert( nSrcAmt <= p->pageSize );
1116+
if( rc==SQLITE_OK ){
1117+
if( nSrcAmt <= p->pageSize ){
1118+
DbPage *pPage;
1119+
Pgno uppPgno, mappedPgno;
1120+
CevfsCmpOfst cmprPgOfst;
10891121

1090-
CevfsCmpSize uIvEncSz = tmp_csz + p->nEncIvSz;
1091-
cevfsPageMapSet(p, iOfst, uIvEncSz, &uppPgno, &mappedPgno, &cmprPgOfst);
1122+
cevfsPageMapSet(p, iOfst, nSrcAmt, &uppPgno, &mappedPgno, &cmprPgOfst);
10921123

10931124
// write
10941125
if( (rc = sqlite3PagerGet(p->pPager, mappedPgno, &pPage, 0))==SQLITE_OK ){
10951126
CevfsMemPage *pMemPage = memPageFromDbPage(pPage, mappedPgno);
1096-
if( (rc = cevfsPagerWrite(p, pPage))==SQLITE_OK ){
1127+
if( rc==SQLITE_OK && (rc = cevfsPagerWrite(p, pPage))==SQLITE_OK ){
10971128
CEVFS_PRINTF(
10981129
pInfo,
10991130
"%s.xWrite(%s, pgno=%u->%u, offset=%08lld->%06lu, amt=%06d->%06d)",
11001131
pInfo->zVfsName, p->zFName,
11011132
uppPgno, mappedPgno,
11021133
iOfst, (unsigned long)(pMemPage->dbHdrOffset+pMemPage->pgHdrOffset+cmprPgOfst),
1103-
iAmt, uIvEncSz
1134+
iAmt, nSrcAmt
11041135
);
11051136
memcpy(
11061137
pMemPage->aData
11071138
+pMemPage->dbHdrOffset
11081139
+pMemPage->pgHdrOffset
11091140
+cmprPgOfst,
1110-
pIvEncBuf,
1111-
uIvEncSz
1141+
pSrcData,
1142+
nSrcAmt
11121143
);
11131144

11141145
// Keep track of sizes of upper and lower pagers
@@ -1117,13 +1148,12 @@ static int cevfsWrite(
11171148
}
11181149
sqlite3PagerUnref(pPage);
11191150
}
1120-
sqlite3_free(pIvEncBuf);
1121-
}else{
1122-
rc = CEVFS_ERROR_ENCRYPTION_FAILED;
1123-
}
1124-
sqlite3_free(pCmpBuf);
1125-
}else rc = SQLITE_NOMEM;
1126-
}else rc = SQLITE_READONLY;
1151+
}else rc=CEVFS_ERROR_PAGE_SIZE_TOO_SMALL;
1152+
}
1153+
1154+
if( pEncBuf ) sqlite3_free( pEncBuf );
1155+
if( pCmpBuf ) sqlite3_free( pCmpBuf );
1156+
}
11271157
}else{
11281158
CEVFS_PRINTF(pInfo, "%s.xWrite(%s, offset=%08lld, amt=%06d)", pInfo->zVfsName, p->zFName, iOfst, iAmt);
11291159
rc = p->pReal->pMethods->xWrite(p->pReal, zBuf, iAmt, iOfst);
@@ -1837,15 +1867,16 @@ int cevfs_build(
18371867

18381868
DbPage *pPage1 = NULL;
18391869
// import all pages
1840-
for(Pgno i=0; i<pageCount; i++){
1870+
for(Pgno pgno=0; pgno<pageCount; pgno++){
18411871
// read source page
18421872
DbPage *pPage;
1843-
rc = sqlite3PagerGet(pPager, i+1, &pPage, 0);
1873+
rc = sqlite3PagerGet(pPager, pgno+1, &pPage, /* flags */ 0);
18441874
if( rc==SQLITE_OK ){
1845-
// write destination page
1875+
// read source page
18461876
void *pData = sqlite3PagerGetData(pPage);
1847-
rc = cevfsWrite((sqlite3_file *)pInfo->pFile, pData, pageSize, pageSize*i);
1848-
if (i==0) {
1877+
// write destination page
1878+
rc = cevfsWrite((sqlite3_file *)pInfo->pFile, pData, pageSize, pageSize*pgno);
1879+
if( pgno==0 ){
18491880
// To be deallocated later
18501881
pPage1 = pPage;
18511882
}else{

0 commit comments

Comments
 (0)