@@ -3336,50 +3336,26 @@ static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p
33363336
33373337 v -> n_info = 0 ;
33383338 if (* (q - 1 ) == ';' ) * (q - 1 ) = 0 ;
3339-
3340- // Parsing consists of processing key=value; or key; so we only need
3341- // to track the next = and ; symbols, plus end of string.
3342- // We could process 1 char at a time, but this is inefficient compared
3343- // to strchr which can process word by word. Hence even doing two strchrs
3344- // is quicker than byte by byte processing.
3345- char * next_equals = strchr (p , '=' );
3346- char * next_semicolon = strchr (p , ';' );
3347- r = p ;
3348- while (* r ) {
3349- // Look for key=value or just key.
3350- char * val , * end , * from ;
3351- key = r ;
3352- if (next_equals && (!next_semicolon || next_equals < next_semicolon )) {
3353- // key=value;
3354- * next_equals = 0 ;
3355- from = val = next_equals + 1 ;
3356- // Prefetching d->keys[hash] helps here provided we avoid
3357- // computing hash twice (needs API change), but not universally.
3358- // It may be significant for other applications though so it's
3359- // something to consider for the future.
3360- } else {
3361- // key;
3362- val = NULL ;
3363- from = key ;
3364- }
3365-
3366- // Update location of next ; and if used =
3367- if (next_semicolon ) {
3368- end = next_semicolon ;
3369- r = end + 1 ;
3370- next_semicolon = strchr (end + 1 , ';' );
3371- if (val )
3372- next_equals = strchr (end , '=' );
3373- } else {
3374- // find nul location, starting from key or val.
3375- r = end = from + strlen (from );
3339+ for (r = key = p ;; ++ r ) {
3340+ int c ;
3341+ char * val , * end ;
3342+ while (* r > '=' || (* r != ';' && * r != '=' && * r != 0 )) r ++ ;
3343+ if (v -> n_info == UINT16_MAX ) {
3344+ hts_log_error ("Too many INFO entries at %s:%" PRIhts_pos ,
3345+ bcf_seqname_safe (h ,v ), v -> pos + 1 );
3346+ v -> errcode |= BCF_ERR_LIMITS ;
3347+ goto fail ;
33763348 }
3349+ val = end = NULL ;
3350+ c = * r ; * r = 0 ;
3351+ if (c == '=' ) {
3352+ val = r + 1 ;
33773353
3378- * end = 0 ;
3379-
3380- // We've now got key and val (maybe NULL), so process it
3354+ for (end = val ; * end != ';' && * end != 0 ; ++ end );
3355+ c = * end ; * end = 0 ;
3356+ } else end = r ;
3357+ if ( !* key ) { if (c == 0 ) break ; r = end ; key = r + 1 ; continue ; } // faulty VCF, ";;" in the INFO
33813358 k = kh_get (vdict , d , key );
3382- // 15 is default (unknown) type. See bcf_idinfo_def at top
33833359 if (k == kh_end (d ) || kh_val (d , k ).info [BCF_HL_INFO ] == 15 )
33843360 {
33853361 hts_log_warning ("INFO '%s' is not defined in the header, assuming Type=String" , key );
@@ -3481,8 +3457,8 @@ static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p
34813457 } else {
34823458 bcf_enc_vint (str , n_val , a_val , -1 );
34833459 }
3484- if (n_val == 1 && (val1 != bcf_int32_missing || is_int64 ) &&
3485- memcmp (key , "END" , 4 ) == 0 )
3460+ if (n_val == 1 && (val1 != bcf_int32_missing || is_int64 )
3461+ && memcmp (key , "END" , 4 ) == 0 )
34863462 {
34873463 if ( val1 <= v -> pos )
34883464 {
@@ -3508,6 +3484,9 @@ static int vcf_parse_info(kstring_t *str, const bcf_hdr_t *h, bcf1_t *v, char *p
35083484 bcf_enc_vfloat (str , n_val , val_f );
35093485 }
35103486 }
3487+ if (c == 0 ) break ;
3488+ r = end ;
3489+ key = r + 1 ;
35113490 }
35123491
35133492 free (a_val );
0 commit comments