Skip to content

Commit a121c84

Browse files
daviesrobwhitwham
authored andcommitted
Prevent out-of-memory reports when fuzzing
* Limit max. IDX numbers in VCF to prevent large allocations * Limit max. sum of shared_len + indiv_len in bcf_read1_core() * Limit max. header size in bcf_hdr_read() * Limit max. header size in cram_read_SAM_hdr() * Limit max. header size in bam_hdr_read() * Limit max. n_targets in bam_hdr_read() * Limit max. number of landmarks in cram_read_container() * Limit max. number of huffman codes in cram_huffman_decode_init() * Limit max. record size in sam_realloc_bam_data() Adds a header where the memory limit for fuzzing can be set. This involves a bit more work, but there is benefit to having this in one clearly defined place.
1 parent 9c7e845 commit a121c84

File tree

6 files changed

+100
-7
lines changed

6 files changed

+100
-7
lines changed

Makefile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ cram_samtools_h = cram/cram_samtools.h $(htslib_sam_h)
252252
cram_structs_h = cram/cram_structs.h $(htslib_thread_pool_h) $(htslib_cram_h) cram/string_alloc.h cram/mFILE.h $(htslib_khash_h)
253253
cram_open_trace_file_h = cram/open_trace_file.h cram/mFILE.h
254254
bcf_sr_sort_h = bcf_sr_sort.h $(htslib_synced_bcf_reader_h) $(htslib_kbitset_h)
255+
fuzz_settings_h = fuzz_settings.h
255256
header_h = header.h cram/string_alloc.h cram/pooled_alloc.h $(htslib_khash_h) $(htslib_kstring_h) $(htslib_sam_h)
256257
hfile_internal_h = hfile_internal.h $(htslib_hts_defs_h) $(htslib_hfile_h) $(textutils_internal_h)
257258
hts_internal_h = hts_internal.h $(htslib_hts_h) $(textutils_internal_h)
@@ -456,8 +457,8 @@ hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h
456457
hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h)
457458
hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h)
458459
hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c
459-
vcf.o vcf.pico: vcf.c config.h $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h)
460-
sam.o sam.pico: sam.c config.h $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h)
460+
vcf.o vcf.pico: vcf.c config.h $(fuzz_settings_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h)
461+
sam.o sam.pico: sam.c config.h $(fuzz_settings_h) $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h)
461462
sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h)
462463
tbx.o tbx.pico: tbx.c config.h $(htslib_tbx_h) $(htslib_bgzf_h) $(htslib_hts_endian_h) $(hts_internal_h) $(htslib_khash_h)
463464
faidx.o faidx.pico: faidx.c config.h $(htslib_bgzf_h) $(htslib_faidx_h) $(htslib_hfile_h) $(htslib_khash_h) $(htslib_kstring_h) $(hts_internal_h)
@@ -475,12 +476,12 @@ probaln.o probaln.pico: probaln.c config.h $(htslib_hts_h)
475476
realn.o realn.pico: realn.c config.h $(htslib_hts_h) $(htslib_sam_h)
476477
textutils.o textutils.pico: textutils.c config.h $(htslib_hfile_h) $(htslib_kstring_h) $(htslib_sam_h) $(hts_internal_h)
477478

478-
cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h)
479+
cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(fuzz_settings_h) $(htslib_hts_endian_h) $(htscodecs_varint_h) $(htscodecs_pack_h) $(htscodecs_rle_h) $(cram_h)
479480
cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h)
480481
cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(sam_internal_h) $(htslib_hts_h) $(htslib_hts_endian_h) $(textutils_internal_h)
481482
cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htscodecs_rANS_static4x16_h) $(htslib_hfile_h) $(cram_h)
482483
cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h)
483-
cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h)
484+
cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h os/lzma_stub.h $(fuzz_settings_h) $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) $(htscodecs_rANS_static_h) $(htscodecs_rANS_static4x16_h) $(htscodecs_arith_dynamic_h) $(htscodecs_tokenise_name3_h) $(htscodecs_fqzcomp_qual_h) $(htscodecs_varint_h) $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h)
484485
cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h)
485486
cram/mFILE.o cram/mFILE.pico: cram/mFILE.c config.h $(htslib_hts_log_h) $(cram_os_h) cram/mFILE.h
486487
cram/open_trace_file.o cram/open_trace_file.pico: cram/open_trace_file.c config.h $(cram_os_h) $(cram_open_trace_file_h) $(cram_misc_h) $(htslib_hfile_h) $(htslib_hts_log_h) $(htslib_hts_h)

cram/cram_codecs.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4444
#include <errno.h>
4545
#include <stddef.h>
4646

47+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
48+
#include "../fuzz_settings.h"
49+
#endif
50+
4751
#include "../htslib/hts_endian.h"
4852

4953
#if defined(HAVE_EXTERNAL_LIBHTSCODECS)
@@ -2795,7 +2799,12 @@ cram_codec *cram_huffman_decode_init(cram_block_compression_hdr *hdr,
27952799
errno = ENOMEM;
27962800
return NULL;
27972801
}
2798-
2802+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
2803+
if (ncodes > FUZZ_ALLOC_LIMIT / sizeof(*codes)) {
2804+
errno = ENOMEM;
2805+
return NULL;
2806+
}
2807+
#endif
27992808
h = calloc(1, sizeof(*h));
28002809
if (!h)
28012810
return NULL;

cram/cram_io.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6969
#define crc32(a,b,c) libdeflate_crc32((a),(b),(c))
7070
#endif
7171

72+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
73+
#include "../fuzz_settings.h"
74+
#endif
75+
7276
#include "cram.h"
7377
#include "os.h"
7478
#include "../htslib/hts.h"
@@ -3857,7 +3861,13 @@ cram_container *cram_read_container(cram_fd *fd) {
38573861
return NULL;
38583862

38593863
*c = c2;
3860-
3864+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
3865+
if (c->num_landmarks > FUZZ_ALLOC_LIMIT/sizeof(int32_t)) {
3866+
fd->err = errno = ENOMEM;
3867+
cram_free_container(c);
3868+
return NULL;
3869+
}
3870+
#endif
38613871
if (c->num_landmarks && !(c->landmark = malloc(c->num_landmarks * sizeof(int32_t)))) {
38623872
fd->err = errno;
38633873
cram_free_container(c);
@@ -4689,6 +4699,11 @@ sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd) {
46894699
if (-1 == int32_decode(fd, &header_len))
46904700
return NULL;
46914701

4702+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
4703+
if (header_len > FUZZ_ALLOC_LIMIT)
4704+
return NULL;
4705+
#endif
4706+
46924707
/* Alloc and read */
46934708
if (header_len < 0 || NULL == (header = malloc((size_t) header_len+1)))
46944709
return NULL;

fuzz_settings.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/* fuzz_settings.h -- fuzz-tester specific definitions
2+
3+
Copyright (C) 2023 Genome Research Ltd.
4+
5+
Author: Rob Davies <rmd@sanger.ac.uk>
6+
7+
Permission is hereby granted, free of charge, to any person obtaining a copy
8+
of this software and associated documentation files (the "Software"), to deal
9+
in the Software without restriction, including without limitation the rights
10+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the Software is
12+
furnished to do so, subject to the following conditions:
13+
14+
The above copyright notice and this permission notice shall be included in
15+
all copies or substantial portions of the Software.
16+
17+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20+
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23+
DEALINGS IN THE SOFTWARE. */
24+
25+
#ifndef HTSLIB_FUZZ_SETTINGS_H
26+
#define HTSLIB_FUZZ_SETTINGS_H
27+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
28+
29+
#ifndef FUZZ_ALLOC_LIMIT
30+
// By default libfuzzer reports out-of-memory on allocations > 2 Gbytes
31+
#define FUZZ_ALLOC_LIMIT 2000000000ULL
32+
#endif
33+
34+
#endif
35+
#endif

sam.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ DEALINGS IN THE SOFTWARE. */
3737
#include <inttypes.h>
3838
#include <unistd.h>
3939

40+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
41+
#include "fuzz_settings.h"
42+
#endif
43+
4044
// Suppress deprecation message for cigar_tab, which we initialise
4145
#include "htslib/hts_defs.h"
4246
#undef HTS_DEPRECATED
@@ -251,6 +255,9 @@ sam_hdr_t *bam_hdr_read(BGZF *fp)
251255

252256
bufsize = h->l_text + 1;
253257
if (bufsize < h->l_text) goto nomem; // so large that adding 1 overflowed
258+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
259+
if (bufsize > FUZZ_ALLOC_LIMIT) goto nomem;
260+
#endif
254261
h->text = (char*)malloc(bufsize);
255262
if (!h->text) goto nomem;
256263
h->text[h->l_text] = 0; // make sure it is NULL terminated
@@ -264,6 +271,10 @@ sam_hdr_t *bam_hdr_read(BGZF *fp)
264271
if (h->n_targets < 0) goto invalid;
265272

266273
// read reference sequence names and lengths
274+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
275+
if (h->n_targets > (FUZZ_ALLOC_LIMIT - bufsize)/(sizeof(char*)+sizeof(uint32_t)))
276+
goto nomem;
277+
#endif
267278
if (h->n_targets > 0) {
268279
h->target_name = (char**)calloc(h->n_targets, sizeof(char*));
269280
if (!h->target_name) goto nomem;
@@ -425,6 +436,12 @@ int sam_realloc_bam_data(bam1_t *b, size_t desired)
425436
errno = ENOMEM; // Not strictly true but we can't store the size
426437
return -1;
427438
}
439+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
440+
if (new_m_data > FUZZ_ALLOC_LIMIT) {
441+
errno = ENOMEM;
442+
return -1;
443+
}
444+
#endif
428445
if ((bam_get_mempolicy(b) & BAM_USER_OWNS_DATA) == 0) {
429446
new_data = realloc(b->data, new_m_data);
430447
} else {

vcf.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ DEALINGS IN THE SOFTWARE. */
3737
#include <inttypes.h>
3838
#include <errno.h>
3939

40+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
41+
#include "fuzz_settings.h"
42+
#endif
43+
4044
#include "htslib/vcf.h"
4145
#include "htslib/bgzf.h"
4246
#include "htslib/tbx.h"
@@ -703,6 +707,11 @@ static int bcf_hdr_set_idx(bcf_hdr_t *hdr, int dict_type, const char *tag, bcf_i
703707
}
704708

705709
new_n = idinfo->id >= hdr->n[dict_type] ? idinfo->id+1 : hdr->n[dict_type];
710+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
711+
// hts_resize() can attempt to allocate up to 2 * requested items
712+
if (new_n > FUZZ_ALLOC_LIMIT/(2 * sizeof(bcf_idpair_t)))
713+
return -1;
714+
#endif
706715
if (hts_resize(bcf_idpair_t, new_n, &hdr->m[dict_type],
707716
&hdr->id[dict_type], HTS_RESIZE_CLEAR)) {
708717
return -1;
@@ -1489,6 +1498,9 @@ bcf_hdr_t *bcf_hdr_read(htsFile *hfp)
14891498
if (bgzf_read(fp, buf, 4) != 4) goto fail;
14901499
hlen = buf[0] | (buf[1] << 8) | (buf[2] << 16) | ((size_t) buf[3] << 24);
14911500
if (hlen >= SIZE_MAX) { errno = ENOMEM; goto fail; }
1501+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1502+
if (hlen > FUZZ_ALLOC_LIMIT) { errno = ENOMEM; goto fail; }
1503+
#endif
14921504
htxt = (char*)malloc(hlen + 1);
14931505
if (!htxt) goto fail;
14941506
if (bgzf_read(fp, htxt, hlen) != hlen) goto fail;
@@ -1615,8 +1627,12 @@ static inline int bcf_read1_core(BGZF *fp, bcf1_t *v)
16151627
shared_len = le_to_u32(x);
16161628
if (shared_len < 24) return -2;
16171629
shared_len -= 24; // to exclude six 32-bit integers
1618-
if (ks_resize(&v->shared, shared_len ? shared_len : 1) != 0) return -2;
16191630
indiv_len = le_to_u32(x + 4);
1631+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1632+
// ks_resize() normally allocates 1.5 * requested size to allow for growth
1633+
if ((uint64_t) shared_len + indiv_len > FUZZ_ALLOC_LIMIT / 3 * 2) return -2;
1634+
#endif
1635+
if (ks_resize(&v->shared, shared_len ? shared_len : 1) != 0) return -2;
16201636
if (ks_resize(&v->indiv, indiv_len ? indiv_len : 1) != 0) return -2;
16211637
v->rid = le_to_i32(x + 8);
16221638
v->pos = le_to_u32(x + 12);

0 commit comments

Comments
 (0)