From 80bb84f0e75881d2d99c09601d2b4f5f39add1ff Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Mon, 11 Sep 2017 16:45:44 +0100 Subject: [PATCH 1/9] Add in-memory I/O --- hfile.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++-- htslib/hfile.h | 17 +++++++-- test/hfile.c | 32 +++++++++++++++++ 3 files changed, 140 insertions(+), 4 deletions(-) diff --git a/hfile.c b/hfile.c index b0c5eba30..bef731681 100644 --- a/hfile.c +++ b/hfile.c @@ -404,7 +404,7 @@ off_t hseek(hFILE *fp, off_t offset, int whence) { off_t curpos, pos; - if (writebuffer_is_nonempty(fp)) { + if (writebuffer_is_nonempty(fp) && fp->mobile) { int ret = flush_buffer(fp); if (ret < 0) return ret; } @@ -615,6 +615,47 @@ static hFILE *hopen_fd(const char *filename, const char *mode) return NULL; } +static hFILE *hpreload_fd(const char *filename, const char *mode) +{ + if(!strchr(mode, 'r')) + { + return NULL; + } + + hFILE_fd *fp = NULL; + FILE *file = fopen(filename, mode); + if (!file) goto error; + + fseek(file, 0, SEEK_END); + int len = ftell(file); + fseek(file, 0, SEEK_SET); + + char* buffer = malloc(len); + if(buffer == NULL) + { + errno = ENOMEM; + goto error; + } + if(fread(buffer, 1, len, file) != len) + { + errno = EIO; + goto error; + } + + fp = (hFILE_fd *) hfile_init_fixed(sizeof (hFILE_fd), mode, buffer, len, len); + if (fp == NULL) goto error; + + fp->fd = fileno(file); + fp->is_socket = 0; + fp->base.backend = &fd_backend; + return &fp->base; + +error: + if (file) { int save = errno; (void) fclose(file); errno = save; } + hfile_destroy((hFILE *) fp); + return NULL; +} + hFILE *hdopen(int fd, const char *mode) { hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); @@ -821,6 +862,41 @@ static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *), return 0; } +hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) +{ + char* buffer = va_arg(args, char*); + size_t sz = va_arg(args, size_t); + va_end(args); + + hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, sz, sz); + + fp->base.backend = &mem_backend; + + return &fp->base; +} + +int hfile_mem_get_buffer(hFILE *file, char **buffer, size_t *length){ + if(file->backend != &mem_backend) { + errno = EINVAL; + return -1; + } + + *buffer = file->buffer; + *length = file->buffer - file->limit; + + return 0; +} + +int hfile_plugin_init_mem(struct hFILE_plugin *self) +{ + // mem files are declared remote so they work with a tabix index + static const struct hFILE_scheme_handler handler = + {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem}; + self->name = "mem"; + hfile_add_scheme_handler("mem", &handler); + return 0; +} + static void load_hfile_plugins() { static const struct hFILE_scheme_handler @@ -833,6 +909,7 @@ static void load_hfile_plugins() hfile_add_scheme_handler("data", &data); hfile_add_scheme_handler("file", &file); init_add_plugin(NULL, hfile_plugin_init_net, "knetfile"); + init_add_plugin(NULL, hfile_plugin_init_mem, "mem"); #ifdef ENABLE_PLUGINS struct hts_path_itr path; @@ -879,11 +956,25 @@ static hFILE *hopen_unknown_scheme(const char *fname, const char *mode) return fp; } +static hFILE *hopenv_unknown_scheme(const char *fname, const char *mode, va_list args) +{ + char* method_type = va_arg(args, char*); + va_end(args); + if(!strcmp(method_type, "preload")){ + errno = EPROTONOSUPPORT; + return NULL; + } + + hFILE *fp = hpreload_fd(fname, mode); + if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT; + return fp; +} + /* Returns the appropriate handler, or NULL if the string isn't an URL. */ static const struct hFILE_scheme_handler *find_scheme_handler(const char *s) { static const struct hFILE_scheme_handler unknown_scheme = - { hopen_unknown_scheme, hfile_always_local, "built-in", 0 }; + { hopen_unknown_scheme, hfile_always_local, "built-in", 2000 + 50, hopenv_unknown_scheme }; char scheme[12]; int i; diff --git a/htslib/hfile.h b/htslib/hfile.h index fa8971842..d214a2d2a 100644 --- a/htslib/hfile.h +++ b/htslib/hfile.h @@ -202,7 +202,7 @@ hread(hFILE *fp, void *buffer, size_t nbytes) if (n > nbytes) n = nbytes; memcpy(buffer, fp->begin, n); fp->begin += n; - return (n == nbytes)? (ssize_t) n : hread2(fp, buffer, nbytes, n); + return (n == nbytes || !fp->mobile)? (ssize_t) n : hread2(fp, buffer, nbytes, n); } /// Write a character to the stream @@ -239,7 +239,15 @@ static inline ssize_t HTS_RESULT_USED hwrite(hFILE *fp, const void *buffer, size_t nbytes) { extern ssize_t hwrite2(hFILE *, const void *, size_t, size_t); - + extern int hfile_set_blksize(hFILE *fp, size_t bufsiz); + + if(!fp->mobile){ + if (fp->limit - fp->begin < nbytes){ + hfile_set_blksize(fp, fp->limit - fp->buffer + nbytes); + fp->end = fp->limit; + } + } + size_t n = fp->limit - fp->begin; if (n > nbytes) n = nbytes; memcpy(fp->begin, buffer, n); @@ -254,6 +262,11 @@ This includes low-level flushing such as via `fdatasync(2)`. */ int hflush(hFILE *fp) HTS_RESULT_USED; +/// For hfile_mem: get the internal buffer and it's size from a hfile +/** @return 0 if successful, or -1 if an error occurred +*/ +int hfile_mem_get_buffer(hFILE *file, char **buffer, size_t *length); + #ifdef __cplusplus } #endif diff --git a/test/hfile.c b/test/hfile.c index 577b8171b..a065356a0 100644 --- a/test/hfile.c +++ b/test/hfile.c @@ -202,6 +202,38 @@ int main(void) if ((c = hgetc(fin)) != EOF) fail("chars: hgetc (EOF) returned %d", c); if (hclose(fin) != 0) fail("hclose(test/hfile_chars.tmp) for reading"); + fin = hopen("test/hfile_chars.tmp", "r:", "preload"); + if (fin == NULL) fail("preloading hopen(\"test/hfile_chars.tmp\") for reading"); + for (i = 0; i < 256; i++) + if ((c = hgetc(fin)) != i) + fail("preloading chars: hgetc (%d = 0x%x) returned %d = 0x%x", i, i, c, c); + if ((c = hgetc(fin)) != EOF) fail("preloading chars: hgetc (EOF) returned %d", c); + if (hclose(fin) != 0) fail("preloading hclose(test/hfile_chars.tmp) for reading"); + + char* test_string = strdup("Test string"); + fin = hopen("mem:", "r:", test_string, 12); + if (fin == NULL) fail("hopen(\"mem:\", \"r:\", ...)"); + if (hread(fin, buffer, 12) != 12) + fail("hopen('mem:', 'r') failed read"); + if(strcmp(buffer, test_string) != 0) + fail("hopen('mem:', 'r') missread '%s' != '%s'", buffer, test_string); + if (hclose(fin) != 0) fail("hclose mem for reading"); + + test_string = strdup("Test string"); + fin = hopen("mem:", "wr:", test_string, 12); + if (fin == NULL) fail("hopen(\"mem:\", \"w:\", ...)"); + if (hseek(fin, -1, SEEK_END) < 0) + fail("hopen('mem:', 'wr') failed seek"); + if (hwrite(fin, strdup(" extra"), 7) != 7) + fail("hopen('mem:', 'wr') failed write"); + if (hseek(fin, 0, SEEK_SET) < 0) + fail("hopen('mem:', 'wr') failed seek"); + if (hread(fin, buffer, 18) != 18) + fail("hopen('mem:', 'wr') failed read"); + if (strcmp(buffer, "Test string extra") != 0) + fail("hopen('mem:', 'wr') misswrote '%s' != '%s'", buffer, "Test string extra"); + if (hclose(fin) != 0) fail("hclose mem for writing"); + fin = hopen("data:,hello, world!%0A", "r"); if (fin == NULL) fail("hopen(\"data:...\")"); n = hread(fin, buffer, 300); From 58fb9795b3cd97d3ae8864b882a5ad7a18791bd4 Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Tue, 12 Sep 2017 09:40:42 +0100 Subject: [PATCH 2/9] Add better error handling in hpreload_fd --- hfile.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/hfile.c b/hfile.c index bef731681..7ff4aafca 100644 --- a/hfile.c +++ b/hfile.c @@ -626,21 +626,13 @@ static hFILE *hpreload_fd(const char *filename, const char *mode) FILE *file = fopen(filename, mode); if (!file) goto error; - fseek(file, 0, SEEK_END); + if(fseek(file, 0, SEEK_END) != 0) goto error; int len = ftell(file); fseek(file, 0, SEEK_SET); char* buffer = malloc(len); - if(buffer == NULL) - { - errno = ENOMEM; - goto error; - } - if(fread(buffer, 1, len, file) != len) - { - errno = EIO; - goto error; - } + if(buffer == NULL) goto error; + if(fread(buffer, 1, len, file) != len) goto error; fp = (hFILE_fd *) hfile_init_fixed(sizeof (hFILE_fd), mode, buffer, len, len); if (fp == NULL) goto error; From 29a6789e90d645ca261854b083f022db58f6b0c6 Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Tue, 12 Sep 2017 09:40:58 +0100 Subject: [PATCH 3/9] Add create_hfile_mem --- hfile.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/hfile.c b/hfile.c index 7ff4aafca..1ec114147 100644 --- a/hfile.c +++ b/hfile.c @@ -744,6 +744,15 @@ static int cmp_prefix(const char *key, const char *s) return 0; } +static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size) +{ + hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size); + if (fp == NULL) { free(buffer); return NULL; } + + fp->base.backend = &mem_backend; + return &fp->base; +} + static hFILE *hopen_mem(const char *url, const char *mode) { size_t length, size; @@ -768,6 +777,8 @@ static hFILE *hopen_mem(const char *url, const char *mode) hts_decode_percent(buffer, &length, data); } + return create_hfile_mem(buffer, mode, length, size); + hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof (hFILE_mem), mode, buffer, length, size); if (fp == NULL) { free(buffer); return NULL; } @@ -860,11 +871,7 @@ hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) size_t sz = va_arg(args, size_t); va_end(args); - hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, sz, sz); - - fp->base.backend = &mem_backend; - - return &fp->base; + return create_hfile_mem(buffer, mode, sz, sz); } int hfile_mem_get_buffer(hFILE *file, char **buffer, size_t *length){ From 5cd9e07ed627f3cbb5fe0263a714af0a6b2fcb8a Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Tue, 12 Sep 2017 09:42:00 +0100 Subject: [PATCH 4/9] Move mem backend code to the correct section --- hfile.c | 62 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/hfile.c b/hfile.c index 1ec114147..524add66d 100644 --- a/hfile.c +++ b/hfile.c @@ -787,6 +787,37 @@ static hFILE *hopen_mem(const char *url, const char *mode) return &fp->base; } +hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) +{ + char* buffer = va_arg(args, char*); + size_t sz = va_arg(args, size_t); + va_end(args); + + return create_hfile_mem(buffer, mode, sz, sz); +} + +int hfile_mem_get_buffer(hFILE *file, char **buffer, size_t *length){ + if(file->backend != &mem_backend) { + errno = EINVAL; + return -1; + } + + *buffer = file->buffer; + *length = file->buffer - file->limit; + + return 0; +} + +int hfile_plugin_init_mem(struct hFILE_plugin *self) +{ + // mem files are declared remote so they work with a tabix index + static const struct hFILE_scheme_handler handler = + {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem}; + self->name = "mem"; + hfile_add_scheme_handler("mem", &handler); + return 0; +} + /***************************************** * Plugin and hopen() backend dispatcher * @@ -865,37 +896,6 @@ static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *), return 0; } -hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) -{ - char* buffer = va_arg(args, char*); - size_t sz = va_arg(args, size_t); - va_end(args); - - return create_hfile_mem(buffer, mode, sz, sz); -} - -int hfile_mem_get_buffer(hFILE *file, char **buffer, size_t *length){ - if(file->backend != &mem_backend) { - errno = EINVAL; - return -1; - } - - *buffer = file->buffer; - *length = file->buffer - file->limit; - - return 0; -} - -int hfile_plugin_init_mem(struct hFILE_plugin *self) -{ - // mem files are declared remote so they work with a tabix index - static const struct hFILE_scheme_handler handler = - {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem}; - self->name = "mem"; - hfile_add_scheme_handler("mem", &handler); - return 0; -} - static void load_hfile_plugins() { static const struct hFILE_scheme_handler From 39720156ff6c1e8ddc51ae8621da85245dec1df7 Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Tue, 12 Sep 2017 14:42:40 +0100 Subject: [PATCH 5/9] Fix issues with mem hfile --- hfile.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/hfile.c b/hfile.c index 524add66d..b91a32b5e 100644 --- a/hfile.c +++ b/hfile.c @@ -617,7 +617,7 @@ static hFILE *hopen_fd(const char *filename, const char *mode) static hFILE *hpreload_fd(const char *filename, const char *mode) { - if(!strchr(mode, 'r')) + if(mode == NULL || !strchr(mode, 'r')) { return NULL; } @@ -747,7 +747,8 @@ static int cmp_prefix(const char *key, const char *s) static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size) { hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size); - if (fp == NULL) { free(buffer); return NULL; } + if (fp == NULL) + return NULL; fp->base.backend = &mem_backend; return &fp->base; @@ -778,13 +779,6 @@ static hFILE *hopen_mem(const char *url, const char *mode) } return create_hfile_mem(buffer, mode, length, size); - - hFILE_mem *fp = (hFILE_mem *) - hfile_init_fixed(sizeof (hFILE_mem), mode, buffer, length, size); - if (fp == NULL) { free(buffer); return NULL; } - - fp->base.backend = &mem_backend; - return &fp->base; } hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) From 3d113770185140522a658600050184ae2701f99e Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Wed, 13 Sep 2017 12:19:33 +0100 Subject: [PATCH 6/9] Free hFILE buffer when erroring --- hfile.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/hfile.c b/hfile.c index b91a32b5e..2c2dfeeba 100644 --- a/hfile.c +++ b/hfile.c @@ -777,8 +777,14 @@ static hFILE *hopen_mem(const char *url, const char *mode) if (buffer == NULL) return NULL; hts_decode_percent(buffer, &length, data); } + hFILE* hf; - return create_hfile_mem(buffer, mode, length, size); + if(!(hf = create_hfile_mem(buffer, mode, length, size))){ + free(buffer); + return NULL; + } + + return hf; } hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) @@ -787,7 +793,14 @@ hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) size_t sz = va_arg(args, size_t); va_end(args); - return create_hfile_mem(buffer, mode, sz, sz); + hFILE* hf; + + if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){ + free(buffer); + return NULL; + } + + return hf; } int hfile_mem_get_buffer(hFILE *file, char **buffer, size_t *length){ From 3656732f31a5b773d3e75701a0395454cdb3ece6 Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Fri, 22 Sep 2017 16:42:40 +0100 Subject: [PATCH 7/9] Stop freeing hfile mem buffers in hclose --- hfile.c | 4 +++- test/hfile.c | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/hfile.c b/hfile.c index 2c2dfeeba..16547d4d0 100644 --- a/hfile.c +++ b/hfile.c @@ -138,10 +138,12 @@ hFILE *hfile_init_fixed(size_t struct_size, const char *mode, return fp; } +static const struct hFILE_backend mem_backend; + void hfile_destroy(hFILE *fp) { int save = errno; - if (fp) free(fp->buffer); + if (fp && fp->backend != &mem_backend) free(fp->buffer); free(fp); errno = save; } diff --git a/test/hfile.c b/test/hfile.c index a065356a0..2c1a1e721 100644 --- a/test/hfile.c +++ b/test/hfile.c @@ -217,7 +217,13 @@ int main(void) fail("hopen('mem:', 'r') failed read"); if(strcmp(buffer, test_string) != 0) fail("hopen('mem:', 'r') missread '%s' != '%s'", buffer, test_string); + char* internal_buf; + size_t interval_buf_len; + if(hfile_mem_get_buffer(fin, &internal_buf, &interval_buf_len) != 0){ + fail("hopen('mem:', 'r') failed to get internal buffer"); + } if (hclose(fin) != 0) fail("hclose mem for reading"); + free(internal_buf); test_string = strdup("Test string"); fin = hopen("mem:", "wr:", test_string, 12); @@ -232,7 +238,11 @@ int main(void) fail("hopen('mem:', 'wr') failed read"); if (strcmp(buffer, "Test string extra") != 0) fail("hopen('mem:', 'wr') misswrote '%s' != '%s'", buffer, "Test string extra"); + if(hfile_mem_get_buffer(fin, &internal_buf, &interval_buf_len) != 0){ + fail("hopen('mem:', 'wr') failed to get internal buffer"); + } if (hclose(fin) != 0) fail("hclose mem for writing"); + free(internal_buf); fin = hopen("data:,hello, world!%0A", "r"); if (fin == NULL) fail("hopen(\"data:...\")"); From fde4dcfcaeb012377f14343688df4e74cd622681 Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Thu, 7 Dec 2017 12:23:54 +0000 Subject: [PATCH 8/9] Fixes to PR#590. The buffer used in preload is now freed. If we want to take ownership of it and avoid it being freed, the new hfile_mem_steal_buffer function can be called. Also changed the prototype of hfile_mem_get_buffer to return the buffer directly instead of via a pointer to a pointer as it's simpler and permits inline usage. Changed preload to work on more than straight hFILE_fd, although in the current incarnation it still won't work on anything that attempts to use the varags hopen interface for anything else. --- hfile.c | 134 +++++++++++++++++++++++++++---------------------- htslib/hfile.h | 17 ++++++- test/hfile.c | 9 ++-- 3 files changed, 93 insertions(+), 67 deletions(-) diff --git a/hfile.c b/hfile.c index 16547d4d0..158b805a0 100644 --- a/hfile.c +++ b/hfile.c @@ -83,15 +83,15 @@ then there is a non-empty read buffer, and if begin == end then both buffers are empty. In all cases, the stream's file position indicator corresponds to the position pointed to by begin. -The above is the normal scenario of a mobile window. For in-memory streams, -a fixed (immobile) buffer can be used as the full contents without any separate -backend behind it. These always have at_eof set, offset set to 0, need no -read() method, and should just return EINVAL for seek(): +The above is the normal scenario of a mobile window. For in-memory +streams (eg via hfile_init_fixed) the buffer can be used as the full +contents without any separate backend behind it. These always have at_eof +set, offset set to 0, need no read() method, and should just return EINVAL +for seek(): abcdefghijkLMNOPQRSTUVWXYZ------ ^buffer ^begin ^end ^limit - -Use hfile_init_fixed() to create one of these. */ +*/ hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity) { @@ -143,7 +143,7 @@ static const struct hFILE_backend mem_backend; void hfile_destroy(hFILE *fp) { int save = errno; - if (fp && fp->backend != &mem_backend) free(fp->buffer); + if (fp) free(fp->buffer); free(fp); errno = save; } @@ -617,36 +617,44 @@ static hFILE *hopen_fd(const char *filename, const char *mode) return NULL; } -static hFILE *hpreload_fd(const char *filename, const char *mode) -{ - if(mode == NULL || !strchr(mode, 'r')) - { - return NULL; +// Loads the contents of filename to produced a read-only, in memory, +// immobile hfile. fp is the already opened file. We always close this +// input fp, irrespective of whether we error or whether we return a new +// immobile hfile. +static hFILE *hopen_preload(hFILE *fp) { + hFILE *mem_fp; + char *buf = NULL; + off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len; + + for (;;) { + if (buf_a - buf_sz < 5000) { + buf_a += buf_inc; + char *t = realloc(buf, buf_a); + if (!t) goto err; + buf = t; + if (buf_inc < 1000000) buf_inc *= 1.3; + } + len = hread(fp, buf+buf_sz, buf_a-buf_sz); + if (len > 0) + buf_sz += len; + else + break; } - - hFILE_fd *fp = NULL; - FILE *file = fopen(filename, mode); - if (!file) goto error; - if(fseek(file, 0, SEEK_END) != 0) goto error; - int len = ftell(file); - fseek(file, 0, SEEK_SET); + if (len < 0) goto err; + mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a); + if (!mem_fp) goto err; + mem_fp->backend = &mem_backend; - char* buffer = malloc(len); - if(buffer == NULL) goto error; - if(fread(buffer, 1, len, file) != len) goto error; - - fp = (hFILE_fd *) hfile_init_fixed(sizeof (hFILE_fd), mode, buffer, len, len); - if (fp == NULL) goto error; - - fp->fd = fileno(file); - fp->is_socket = 0; - fp->base.backend = &fd_backend; - return &fp->base; + if (hclose(fp) < 0) { + hclose_abruptly(mem_fp); + goto err; + } + return mem_fp; -error: - if (file) { int save = errno; (void) fclose(file); errno = save; } - hfile_destroy((hFILE *) fp); + err: + free(buf); + hclose_abruptly(fp); return NULL; } @@ -796,7 +804,7 @@ hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) va_end(args); hFILE* hf; - + if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){ free(buffer); return NULL; @@ -805,16 +813,23 @@ hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) return hf; } -int hfile_mem_get_buffer(hFILE *file, char **buffer, size_t *length){ - if(file->backend != &mem_backend) { +char *hfile_mem_get_buffer(hFILE *file, size_t *length) { + if (file->backend != &mem_backend) { errno = EINVAL; - return -1; + return NULL; } - *buffer = file->buffer; - *length = file->buffer - file->limit; + if (length) + *length = file->buffer - file->limit; - return 0; + return file->buffer; +} + +char *hfile_mem_steal_buffer(hFILE *file, size_t *length) { + char *buf = hfile_mem_get_buffer(file, length); + if (buf) + file->buffer = NULL; + return buf; } int hfile_plugin_init_mem(struct hFILE_plugin *self) @@ -964,25 +979,11 @@ static hFILE *hopen_unknown_scheme(const char *fname, const char *mode) return fp; } -static hFILE *hopenv_unknown_scheme(const char *fname, const char *mode, va_list args) -{ - char* method_type = va_arg(args, char*); - va_end(args); - if(!strcmp(method_type, "preload")){ - errno = EPROTONOSUPPORT; - return NULL; - } - - hFILE *fp = hpreload_fd(fname, mode); - if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT; - return fp; -} - /* Returns the appropriate handler, or NULL if the string isn't an URL. */ static const struct hFILE_scheme_handler *find_scheme_handler(const char *s) { static const struct hFILE_scheme_handler unknown_scheme = - { hopen_unknown_scheme, hfile_always_local, "built-in", 2000 + 50, hopenv_unknown_scheme }; + { hopen_unknown_scheme, hfile_always_local, "built-in", 0 }; char scheme[12]; int i; @@ -1007,21 +1008,34 @@ static const struct hFILE_scheme_handler *find_scheme_handler(const char *s) hFILE *hopen(const char *fname, const char *mode, ...) { + hFILE *fp = NULL; + const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); if (handler) { - if (strchr(mode, ':') == NULL) return handler->open(fname, mode); + if (strchr(mode, ':') == NULL) fp = handler->open(fname, mode); else if (handler->priority >= 2000 && handler->vopen) { - hFILE *fp; va_list arg; va_start(arg, mode); fp = handler->vopen(fname, mode, arg); va_end(arg); - return fp; } else { errno = ENOTSUP; return NULL; } } - else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode); - else return hopen_fd(fname, mode); + else if (strcmp(fname, "-") == 0) fp = hopen_fd_stdinout(mode); + else fp = hopen_fd(fname, mode); + + if (!fp) return NULL; + + if (strchr(mode, 'r') && strchr(mode, ':')) { + va_list arg; + va_start(arg, mode); + const char *argtype = va_arg(arg, const char *); + if (strcmp(argtype, "preload") == 0) + fp = hopen_preload(fp); + va_end(arg); + } + + return fp; } int hfile_always_local (const char *fname) { return 0; } diff --git a/htslib/hfile.h b/htslib/hfile.h index d214a2d2a..5b53b0386 100644 --- a/htslib/hfile.h +++ b/htslib/hfile.h @@ -263,9 +263,22 @@ This includes low-level flushing such as via `fdatasync(2)`. int hflush(hFILE *fp) HTS_RESULT_USED; /// For hfile_mem: get the internal buffer and it's size from a hfile -/** @return 0 if successful, or -1 if an error occurred +/** @return buffer if successful, or NULL if an error occurred + +The buffer returned should not be freed as this will happen when the +hFILE is closed. +*/ +char *hfile_mem_get_buffer(hFILE *file, size_t *length); + +/// For hfile_mem: get the internal buffer and it's size from a hfile. +/** @return buffer if successful, or NULL if an error occurred + +This is similar to hfile_mem_get_buffer except that ownership of the +buffer is granted to the caller, who now has responsibility for freeing +it. From this point onwards, the hFILE should not be used for any +purpose other than closing. */ -int hfile_mem_get_buffer(hFILE *file, char **buffer, size_t *length); +char *hfile_mem_steal_buffer(hFILE *file, size_t *length); #ifdef __cplusplus } diff --git a/test/hfile.c b/test/hfile.c index 2c1a1e721..affdc85f8 100644 --- a/test/hfile.c +++ b/test/hfile.c @@ -219,18 +219,17 @@ int main(void) fail("hopen('mem:', 'r') missread '%s' != '%s'", buffer, test_string); char* internal_buf; size_t interval_buf_len; - if(hfile_mem_get_buffer(fin, &internal_buf, &interval_buf_len) != 0){ + if((internal_buf = hfile_mem_get_buffer(fin, &interval_buf_len)) == NULL){ fail("hopen('mem:', 'r') failed to get internal buffer"); } if (hclose(fin) != 0) fail("hclose mem for reading"); - free(internal_buf); test_string = strdup("Test string"); fin = hopen("mem:", "wr:", test_string, 12); if (fin == NULL) fail("hopen(\"mem:\", \"w:\", ...)"); if (hseek(fin, -1, SEEK_END) < 0) fail("hopen('mem:', 'wr') failed seek"); - if (hwrite(fin, strdup(" extra"), 7) != 7) + if (hwrite(fin, " extra", 7) != 7) fail("hopen('mem:', 'wr') failed write"); if (hseek(fin, 0, SEEK_SET) < 0) fail("hopen('mem:', 'wr') failed seek"); @@ -238,11 +237,11 @@ int main(void) fail("hopen('mem:', 'wr') failed read"); if (strcmp(buffer, "Test string extra") != 0) fail("hopen('mem:', 'wr') misswrote '%s' != '%s'", buffer, "Test string extra"); - if(hfile_mem_get_buffer(fin, &internal_buf, &interval_buf_len) != 0){ + if((internal_buf = hfile_mem_steal_buffer(fin, &interval_buf_len)) == NULL){ fail("hopen('mem:', 'wr') failed to get internal buffer"); } - if (hclose(fin) != 0) fail("hclose mem for writing"); free(internal_buf); + if (hclose(fin) != 0) fail("hclose mem for writing"); fin = hopen("data:,hello, world!%0A", "r"); if (fin == NULL) fail("hopen(\"data:...\")"); From 5fa67e97b2bb60371049fa5f94ad7fa14720f0bc Mon Sep 17 00:00:00 2001 From: ThomasHickman Date: Tue, 12 Dec 2017 17:10:11 +0000 Subject: [PATCH 9/9] Create preload scheme Replaces the vargargs method of specifying that the url should be preloaded. hopen_preload has also been renamed to hpreload to make way for a hopen_preload function that acts as an interface for hfile_add_scheme_handler --- hfile.c | 24 +++++++++++++----------- test/hfile.c | 4 ++-- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/hfile.c b/hfile.c index 158b805a0..48a124469 100644 --- a/hfile.c +++ b/hfile.c @@ -621,7 +621,7 @@ static hFILE *hopen_fd(const char *filename, const char *mode) // immobile hfile. fp is the already opened file. We always close this // input fp, irrespective of whether we error or whether we return a new // immobile hfile. -static hFILE *hopen_preload(hFILE *fp) { +static hFILE *hpreload(hFILE *fp) { hFILE *mem_fp; char *buf = NULL; off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len; @@ -658,6 +658,15 @@ static hFILE *hopen_preload(hFILE *fp) { return NULL; } +static int is_preload_url_remote(const char *url){ + return hisremote(url + 8); // len("preload:") = 8 +} + +static hFILE *hopen_preload(const char *url, const char *mode){ + hFILE* fp = hopen(url + 8, mode); + return hpreload(fp); +} + hFILE *hdopen(int fd, const char *mode) { hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); @@ -924,13 +933,15 @@ static void load_hfile_plugins() { static const struct hFILE_scheme_handler data = { hopen_mem, hfile_always_local, "built-in", 80 }, - file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 }; + file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 }, + preload = { hopen_preload, is_preload_url_remote, "built-in", 80 }; schemes = kh_init(scheme_string); if (schemes == NULL) abort(); hfile_add_scheme_handler("data", &data); hfile_add_scheme_handler("file", &file); + hfile_add_scheme_handler("preload", &preload); init_add_plugin(NULL, hfile_plugin_init_net, "knetfile"); init_add_plugin(NULL, hfile_plugin_init_mem, "mem"); @@ -1026,15 +1037,6 @@ hFILE *hopen(const char *fname, const char *mode, ...) if (!fp) return NULL; - if (strchr(mode, 'r') && strchr(mode, ':')) { - va_list arg; - va_start(arg, mode); - const char *argtype = va_arg(arg, const char *); - if (strcmp(argtype, "preload") == 0) - fp = hopen_preload(fp); - va_end(arg); - } - return fp; } diff --git a/test/hfile.c b/test/hfile.c index affdc85f8..7e09ba03c 100644 --- a/test/hfile.c +++ b/test/hfile.c @@ -202,8 +202,8 @@ int main(void) if ((c = hgetc(fin)) != EOF) fail("chars: hgetc (EOF) returned %d", c); if (hclose(fin) != 0) fail("hclose(test/hfile_chars.tmp) for reading"); - fin = hopen("test/hfile_chars.tmp", "r:", "preload"); - if (fin == NULL) fail("preloading hopen(\"test/hfile_chars.tmp\") for reading"); + fin = hopen("preload:test/hfile_chars.tmp", "r"); + if (fin == NULL) fail("preloading \"test/hfile_chars.tmp\" for reading"); for (i = 0; i < 256; i++) if ((c = hgetc(fin)) != i) fail("preloading chars: hgetc (%d = 0x%x) returned %d = 0x%x", i, i, c, c);