Skip to content

Commit c4c0f8d

Browse files
committed
Import upstream htslib/samtools 1.22.1
For each package: rm -rf htslib; python3 devtools/import.py htslib .../htslib-1.22.1 rm -rf samtools; python3 devtools/import.py samtools .../samtools-1.22.1 Take care to preserve removing the regeneration of htscodecs.mk from htslib/Makefile.
1 parent 1190ab7 commit c4c0f8d

25 files changed

+253
-140
lines changed

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ as it resolves non-python dependencies and uses pre-configured
2525
compilation options. Especially for OS X this will potentially save a
2626
lot of trouble.
2727

28-
The current version of pysam wraps 3rd-party code from htslib-1.22, samtools-1.22, and bcftools-1.22.
28+
The current version of pysam wraps 3rd-party code from htslib-1.22.1, samtools-1.22.1, and bcftools-1.22.
2929

3030
Pysam is available through `PyPI <https://pypi.org/project/pysam/>`_.
3131
To install, type::

doc/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
# Included at the end of each rst file
5757
rst_epilog = '''
5858
.. _pysam: https://github.com/pysam-developers/pysam
59-
.. _samtools: https://www.htslib.org/doc/1.22/samtools.html
59+
.. _samtools: https://www.htslib.org/doc/1.22.1/samtools.html
6060
.. _bcftools: https://www.htslib.org/doc/1.22/bcftools.html
6161
.. _htslib: https://www.htslib.org/
6262
.. _tabix: https://www.htslib.org/doc/tabix.html

doc/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ This module provides a low-level wrapper around the htslib_ C-API as
1818
using cython and a high-level, pythonic API for convenient access to
1919
the data within genomic file formats.
2020

21-
The current version wraps *htslib-1.22*, *samtools-1.22*, and *bcftools-1.22*.
21+
The current version wraps *htslib-1.22.1*, *samtools-1.22.1*, and *bcftools-1.22*.
2222

2323
To install the latest release, type::
2424

htslib/configure

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#! /bin/sh
22
# Guess values for system-dependent variables and create Makefiles.
3-
# Generated by GNU Autoconf 2.72 for HTSlib 1.22.
3+
# Generated by GNU Autoconf 2.72 for HTSlib 1.22.1.
44
#
55
# Report bugs to <[email protected]>.
66
#
@@ -609,8 +609,8 @@ MAKEFLAGS=
609609
# Identity of this package.
610610
PACKAGE_NAME='HTSlib'
611611
PACKAGE_TARNAME='htslib'
612-
PACKAGE_VERSION='1.22'
613-
PACKAGE_STRING='HTSlib 1.22'
612+
PACKAGE_VERSION='1.22.1'
613+
PACKAGE_STRING='HTSlib 1.22.1'
614614
PACKAGE_BUGREPORT='[email protected]'
615615
PACKAGE_URL='http://www.htslib.org/'
616616

@@ -1312,7 +1312,7 @@ if test "$ac_init_help" = "long"; then
13121312
# Omit some internal or obsolete options to make the list less imposing.
13131313
# This message is too long to be a string in the A/UX 3.1 sh.
13141314
cat <<_ACEOF
1315-
'configure' configures HTSlib 1.22 to adapt to many kinds of systems.
1315+
'configure' configures HTSlib 1.22.1 to adapt to many kinds of systems.
13161316
13171317
Usage: $0 [OPTION]... [VAR=VALUE]...
13181318
@@ -1378,7 +1378,7 @@ fi
13781378

13791379
if test -n "$ac_init_help"; then
13801380
case $ac_init_help in
1381-
short | recursive ) echo "Configuration of HTSlib 1.22:";;
1381+
short | recursive ) echo "Configuration of HTSlib 1.22.1:";;
13821382
esac
13831383
cat <<\_ACEOF
13841384
@@ -1491,7 +1491,7 @@ fi
14911491
test -n "$ac_init_help" && exit $ac_status
14921492
if $ac_init_version; then
14931493
cat <<\_ACEOF
1494-
HTSlib configure 1.22
1494+
HTSlib configure 1.22.1
14951495
generated by GNU Autoconf 2.72
14961496
14971497
Copyright (C) 2023 Free Software Foundation, Inc.
@@ -1818,7 +1818,7 @@ cat >config.log <<_ACEOF
18181818
This file contains any messages produced by compilers while
18191819
running configure, to aid debugging if configure makes a mistake.
18201820
1821-
It was created by HTSlib $as_me 1.22, which was
1821+
It was created by HTSlib $as_me 1.22.1, which was
18221822
generated by GNU Autoconf 2.72. Invocation command line was
18231823
18241824
$ $0$ac_configure_args_raw
@@ -6941,7 +6941,7 @@ then :
69416941
ref_cache="enabled"
69426942
else case e in #(
69436943
e)
6944-
if test "x$enable_ref_cache" = check
6944+
if test "x$enable_ref_cache" = xcheck
69456945
then :
69466946

69476947
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: ref-cache not enabled: requires libcurl" >&5
@@ -8851,7 +8851,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
88518851
# report actual input values of CONFIG_FILES etc. instead of their
88528852
# values after options handling.
88538853
ac_log="
8854-
This file was extended by HTSlib $as_me 1.22, which was
8854+
This file was extended by HTSlib $as_me 1.22.1, which was
88558855
generated by GNU Autoconf 2.72. Invocation command line was
88568856
88578857
CONFIG_FILES = $CONFIG_FILES
@@ -8924,7 +8924,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
89248924
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
89258925
ac_cs_config='$ac_cs_config_escaped'
89268926
ac_cs_version="\\
8927-
HTSlib config.status 1.22
8927+
HTSlib config.status 1.22.1
89288928
configured by $0, generated by GNU Autoconf 2.72,
89298929
with options \\"\$ac_cs_config\\"
89308930

htslib/configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ AS_IF([test "x$enable_ref_cache" != xno],
645645
[AS_CASE([$PLATFORM],
646646
[Darwin | default],[
647647
AS_IF([test "x$libcurl" = xenabled], [ref_cache="enabled"], [
648-
AS_IF([test "x$enable_ref_cache" = check], [
648+
AS_IF([test "x$enable_ref_cache" = xcheck], [
649649
AC_MSG_WARN([ref-cache not enabled: requires libcurl])
650650
],[
651651
MSG_ERROR([ref-cache not enabled

htslib/cram/cram_codecs.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3575,12 +3575,18 @@ static int cram_byte_array_stop_decode_char(cram_slice *slice, cram_codec *c,
35753575

35763576
cp = (char *)b->data + b->idx;
35773577
if (out) {
3578+
// memccpy equivalent but without copying the terminating byte
3579+
ssize_t term = MIN(*out_size, b->uncomp_size - b->idx);
35783580
while ((ch = *cp) != (char)c->u.byte_array_stop.stop) {
3579-
if (cp - (char *)b->data >= b->uncomp_size)
3580-
return -1;
3581+
if (term-- < 0)
3582+
break;
35813583
*out++ = ch;
35823584
cp++;
35833585
}
3586+
3587+
// Attempted overrun on input or output
3588+
if (ch != (char)c->u.byte_array_stop.stop)
3589+
return -1;
35843590
} else {
35853591
// Consume input, but produce no output
35863592
while ((ch = *cp) != (char)c->u.byte_array_stop.stop) {

htslib/cram/cram_decode.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,7 +1275,7 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
12751275

12761276
switch(op) {
12771277
case 'S': { // soft clip: IN
1278-
int32_t out_sz2 = 1;
1278+
int32_t out_sz2 = cr->len ? cr->len-(pos-1) : 1;
12791279
int have_sc = 0;
12801280

12811281
if (cig_len) {
@@ -1431,7 +1431,7 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
14311431
}
14321432

14331433
case 'I': { // Insertion (several bases); IN
1434-
int32_t out_sz2 = 1;
1434+
int32_t out_sz2 = cr->len ? cr->len-(pos-1) : 1;
14351435

14361436
if (cig_len && cig_op != BAM_CINS) {
14371437
cigar[ncigar++] = (cig_len<<4) + cig_op;
@@ -1473,7 +1473,7 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
14731473
}
14741474

14751475
case 'b': { // Several bases
1476-
int32_t len = 1;
1476+
int32_t len = cr->len ? cr->len-(pos-1) : 1;
14771477

14781478
if (cig_len && cig_op != BAM_CMATCH) {
14791479
cigar[ncigar++] = (cig_len<<4) + cig_op;
@@ -1523,7 +1523,7 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
15231523
}
15241524

15251525
case 'q': { // Several quality values
1526-
int32_t len = 1;
1526+
int32_t len = cr->len ? cr->len - (pos-1) : 1;
15271527

15281528
if (cig_len && cig_op != BAM_CMATCH) {
15291529
cigar[ncigar++] = (cig_len<<4) + cig_op;
@@ -2298,9 +2298,13 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
22982298
// However it's likely that this also saves memory as own growth
22992299
// factor (*=1.5) is never applied.
23002300
{
2301+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2302+
int qsize=0, nsize=0, q_id=0;
2303+
#else
23012304
int qsize, nsize, q_id;
23022305
cram_decode_estimate_sizes(c->comp_hdr, s, &qsize, &nsize, &q_id);
23032306
//fprintf(stderr, "qsize=%d nsize=%d\n", qsize, nsize);
2307+
#endif
23042308

23052309
if (qsize && (ds & CRAM_RL)) BLOCK_RESIZE_EXACT(s->seqs_blk, qsize+1);
23062310
if (qsize && (ds & CRAM_RL)) BLOCK_RESIZE_EXACT(s->qual_blk, qsize+1);
@@ -2639,7 +2643,7 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
26392643
cr->name_len = 0;
26402644

26412645
if (c->comp_hdr->read_names_included) {
2642-
int32_t out_sz2 = 1;
2646+
int32_t out_sz2 = 1; // block auto grows in decode()
26432647

26442648
// Read directly into name cram_block
26452649
cr->name = BLOCK_SIZE(s->name_blk);
@@ -2800,15 +2804,15 @@ int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s,
28002804
/* Fake up dynamic string growth and appending */
28012805
if (ds & CRAM_RL) {
28022806
cr->seq = BLOCK_SIZE(s->seqs_blk);
2803-
BLOCK_GROW(s->seqs_blk, cr->len);
2807+
BLOCK_RESIZE(s->seqs_blk, cr->seq + cr->len);
28042808
seq = (char *)BLOCK_END(s->seqs_blk);
28052809
BLOCK_SIZE(s->seqs_blk) += cr->len;
28062810

28072811
if (!seq)
28082812
goto block_err;
28092813

28102814
cr->qual = BLOCK_SIZE(s->qual_blk);
2811-
BLOCK_GROW(s->qual_blk, cr->len);
2815+
BLOCK_RESIZE(s->qual_blk, cr->qual + cr->len);
28122816
qual = (char *)BLOCK_END(s->qual_blk);
28132817
BLOCK_SIZE(s->qual_blk) += cr->len;
28142818

htslib/cram/cram_io.c

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,24 +1899,21 @@ static char *cram_compress_by_method(cram_slice *s, char *in, size_t in_size,
18991899
return NULL;
19001900
}
19011901

1902-
19031902
/*
1904-
* Compresses a block using one of two different zlib strategies. If we only
1905-
* want one choice set strat2 to be -1.
1906-
*
1907-
* The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
1908-
* or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
1909-
* significantly faster.
1910-
*
1911-
* Method and level -1 implies defaults, as specified in cram_fd.
1903+
* A copy of cram_compress_block2 with added recursion detection.
1904+
* This is only called for error handling where the auto-tuning has failed.
1905+
* The simplest way of doing this is recusion + an additional argument, but
1906+
* we didn't want to complicate the existing code hence this is static.
19121907
*/
1913-
int cram_compress_block2(cram_fd *fd, cram_slice *s,
1914-
cram_block *b, cram_metrics *metrics,
1915-
int method, int level) {
1908+
static int cram_compress_block3(cram_fd *fd, cram_slice *s,
1909+
cram_block *b, cram_metrics *metrics,
1910+
int method, int level,
1911+
int recurse) {
19161912

19171913
if (!b)
19181914
return 0;
19191915

1916+
int orig_method = method;
19201917
char *comp = NULL;
19211918
size_t comp_size = 0;
19221919
int strat;
@@ -2249,8 +2246,23 @@ int cram_compress_block2(cram_fd *fd, cram_slice *s,
22492246
b->content_id, &comp_size, method,
22502247
method == GZIP_1 ? 1 : level,
22512248
strat);
2252-
if (!comp)
2249+
if (!comp) {
2250+
// Our cached best method failed, but maybe another works?
2251+
// Rerun with trial mode engaged again.
2252+
if (!recurse) {
2253+
hts_log_warning("Compressed block ID %d method %s failed, "
2254+
"redoing trial", b->content_id,
2255+
cram_block_method2str(method));
2256+
pthread_mutex_lock(&fd->metrics_lock);
2257+
metrics->trial = NTRIALS;
2258+
metrics->next_trial = TRIAL_SPAN;
2259+
metrics->revised_method = orig_method;
2260+
pthread_mutex_unlock(&fd->metrics_lock);
2261+
return cram_compress_block3(fd, s, b, metrics, method,
2262+
level, 1);
2263+
}
22532264
return -1;
2265+
}
22542266

22552267
if (comp_size < b->uncomp_size) {
22562268
free(b->data);
@@ -2290,6 +2302,19 @@ int cram_compress_block2(cram_fd *fd, cram_slice *s,
22902302

22912303
return 0;
22922304
}
2305+
2306+
/*
2307+
* Compresses a block using a selection of compression codecs and options.
2308+
* The best is learnt and used for subsequent slices, periodically resampling.
2309+
*
2310+
* Method and level -1 implies defaults, as specified in cram_fd.
2311+
*/
2312+
int cram_compress_block2(cram_fd *fd, cram_slice *s,
2313+
cram_block *b, cram_metrics *metrics,
2314+
int method, int level) {
2315+
return cram_compress_block3(fd, s, b, metrics, method, level, 0);
2316+
}
2317+
22932318
int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
22942319
int method, int level) {
22952320
return cram_compress_block2(fd, NULL, b, metrics, method, level);

htslib/cram/cram_io.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,12 @@ static inline int block_resize(cram_block *b, size_t len) {
227227
if (b->alloc > len)
228228
return 0;
229229

230+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
231+
// Removal of extra padding causes many more reallocs, but detects
232+
// more buffer overruns.
233+
return block_resize_exact(b, len?len:1);
234+
#endif
235+
230236
size_t alloc = b->alloc+800;
231237
alloc = MAX(alloc + (alloc>>2), len);
232238
return block_resize_exact(b, alloc);

htslib/hfile_libcurl.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,8 @@ libcurl_open(const char *url, const char *modes, http_headers *headers)
12221222

12231223
// Avoid many repeated CWD calls with FTP, instead requesting the filename
12241224
// by full path (but not strictly compliant with RFC1738).
1225-
err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_NOCWD);
1225+
err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD,
1226+
(long) CURLFTPMETHOD_NOCWD);
12261227

12271228
if (mode == 'r') {
12281229
err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);

0 commit comments

Comments
 (0)