Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,14 @@ PROTECT_FREE_BIG_ZONES = -DPROTECT_FREE_BIG_ZONES=0
## incurs a small performance cost
MASK_PTRS = -DMASK_PTRS=1

## IsoAlloc uses ARM64 Neon instructions where possible. You can
## explicitly disable that here
## IsoAlloc uses ARM64 Neon instructions where possible. Automatically
## enabled on ARM/AArch64 hosts, disabled everywhere else.
ARCH := $(shell uname -m)
ifneq ($(filter aarch64 arm%,$(ARCH)),)
DONT_USE_NEON = -DDONT_USE_NEON=0
else
DONT_USE_NEON = -DDONT_USE_NEON=1
endif

## We start with the standard C++ specifics but giving
## the liberty to choose the gnu++* variants and/or
Expand Down Expand Up @@ -350,9 +355,22 @@ library: clean
## ABORT_ON_UNOWNED_PTR=0 silently drops pointers not owned by isoalloc
## (e.g. those allocated by libc before the isoalloc constructor fires)
## instead of aborting. All other flags are identical to 'library'.
library_perf: ABORT_ON_UNOWNED_PTR = -DABORT_ON_UNOWNED_PTR=0
library_perf: clean
@echo "make library_perf"
library_less_strict: ABORT_ON_UNOWNED_PTR = -DABORT_ON_UNOWNED_PTR=0
library_less_strict: clean
@echo "make library_less_strict"
$(CC) $(CFLAGS) $(LIBRARY) $(OPTIMIZE) $(OS_FLAGS) $(C_SRCS) -o $(BUILD_DIR)/$(LIBNAME)
$(STRIP)

## Build a performance-optimized library with the most expensive security
## features disabled. Intended for benchmarking and performance measurement.
Comment on lines +364 to +365
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey, sounds like cheating to me :P

## All other flags inherit from the top-level defaults.
library_benchmark: DISABLE_CANARY = -DDISABLE_CANARY=1
library_benchmark: PRE_POPULATE_PAGES = -DPRE_POPULATE_PAGES=1
library_benchmark: RANDOMIZE_FREELIST = -DRANDOMIZE_FREELIST=0
library_benchmark: MASK_PTRS = -DMASK_PTRS=0
library_benchmark: ABORT_ON_UNOWNED_PTR = -DABORT_ON_UNOWNED_PTR=0
library_benchmark: clean
@echo "make library_benchmark"
$(CC) $(CFLAGS) $(LIBRARY) $(OPTIMIZE) $(OS_FLAGS) $(C_SRCS) -o $(BUILD_DIR)/$(LIBNAME)
$(STRIP)

Expand Down Expand Up @@ -456,7 +474,7 @@ libc_sanity_tests: clean library_debug_unit_tests
$(CC) $(CFLAGS) $(EXE_CFLAGS) $(DEBUG_LOG_FLAGS) $(GDB_FLAGS) $(OS_FLAGS) tests/memcpy_sanity.c $(ISO_ALLOC_PRINTF_SRC) -o $(BUILD_DIR)/memcpy_sanity $(LDFLAGS)
$(CC) $(CFLAGS) $(EXE_CFLAGS) $(DEBUG_LOG_FLAGS) $(GDB_FLAGS) $(OS_FLAGS) tests/memmove_sanity.c $(ISO_ALLOC_PRINTF_SRC) -o $(BUILD_DIR)/memmove_sanity $(LDFLAGS)
$(CC) $(CFLAGS) $(EXE_CFLAGS) $(DEBUG_LOG_FLAGS) $(GDB_FLAGS) $(OS_FLAGS) tests/bzero_sanity.c $(ISO_ALLOC_PRINTF_SRC) -o $(BUILD_DIR)/bzero_sanity $(LDFLAGS)
build/memset_sanity ; build/memcpy_sanity; build/memmove_sanity; build/bzero_sanity ;
LD_LIBRARY_PATH=build/ build/memset_sanity ; LD_LIBRARY_PATH=build/ build/memcpy_sanity; LD_LIBRARY_PATH=build/ build/memmove_sanity; LD_LIBRARY_PATH=build/ build/bzero_sanity

fuzz_test: clean library_debug_unit_tests
@echo "make fuzz_test"
Expand Down
54 changes: 33 additions & 21 deletions PERFORMANCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ If you know your program will not require multi-threaded access to IsoAlloc you

`DISABLE_CANARY` can be set to 1 to disable the creation and verification of canary chunks. This removes a useful security feature but will significantly improve performance and RSS.

`MASK_PTRS` is enabled by default and causes the `user_pages_start` and `bitmap_start` pointers stored in every zone's metadata to be XOR'd with a per-zone random secret between alloc and free operations. This protects against attackers who can read or corrupt zone metadata. Each alloc and free pays a small cost for these XOR operations. Setting `MASK_PTRS=0` removes this overhead at the cost of this security property.

`CANARY_COUNT_DIV` in `conf.h` controls what fraction of chunks in a zone are reserved as canaries. It is used as a right-shift on the total chunk count: `chunk_count >> CANARY_COUNT_DIV`. The default value of 7 reserves less than 1% of chunks. Increasing this value reduces canary density and frees more chunks for user allocations; decreasing it increases security coverage at the cost of usable memory.

`ZONE_ALLOC_RETIRE` in `conf.h` controls how frequently zones are retired and replaced. A zone is retired once it has completed `ZONE_ALLOC_RETIRE * max_chunk_count_for_zone` total alloc/free cycles. Lowering this value causes zones to be replaced more often, reducing the window for use-after-free exploitation but increasing the frequency of zone creation. `BIG_ZONE_ALLOC_RETIRE` is the equivalent for big zones.

`SMALL_MEM_STARTUP` reduces the number and size of default zones created at startup. This decreases initial RSS at the cost of more frequent zone creation for programs with diverse allocation sizes.

`STRONG_SIZE_ISOLATION` enforces stricter isolation by size class. When enabled, chunk sizes are rounded up to a smaller set of buckets which increases isolation between differently-sized allocations. This may increase per-allocation waste but reduces cross-size heap exploitation primitives.

By default IsoAlloc will attempt to use Huge Pages (for both Linux and Mac OS) for any allocations that are a multiple of 2 mb in size. This is the default huge page size on most systems but it might not be on yours. On Linux you can check the value for your system by running the following command:

```
Expand Down Expand Up @@ -143,35 +153,37 @@ iso_realloc/iso_free 1834784 tests completed in 0.901481 seconds
The following benchmarks were collected from [mimalloc-bench](https://github.com/daanx/mimalloc-bench) with the default configuration of IsoAlloc. As you can see from the data IsoAlloc is competitive with other allocators for some benchmarks but clearly falls behind on others. For any benchmark that IsoAlloc scores poorly on I was able to tweak its build to improve the CPU time and memory consumption. It's worth noting that IsoAlloc was able to stay competitive even with performing many security checks not present in other allocators. Please note these are 'best case' measurements, not averages.

```
make library_benchmark

#------------------------------------------------------------------
# test alloc time rss user sys page-faults page-reclaims
cfrac je 02.99 4912 2.99 0.00 0 454
cfrac mi 03.01 2484 3.00 0.00 0 346
cfrac iso 05.84 26616 5.75 0.09 0 6502
cfrac je 03.07 4552 3.06 0.00 0 454
cfrac mi 02.97 2484 2.96 0.00 0 347
cfrac iso 04.78 30612 4.69 0.09 0 7503

espresso je 02.52 4872 2.50 0.01 0 538
espresso mi 02.46 3060 2.45 0.01 0 3637
espresso iso 03.65 69876 3.56 0.09 0 21695
espresso je 02.51 4872 2.50 0.01 0 540
espresso mi 02.43 3032 2.42 0.01 0 3630
espresso iso 03.16 69608 3.07 0.07 0 30334

barnes je 01.62 60268 1.59 0.02 0 16687
barnes mi 01.71 57672 1.68 0.02 0 16550
barnes iso 01.66 74628 1.62 0.03 0 20851
barnes je 01.71 59900 1.67 0.03 0 16686
barnes mi 01.65 57672 1.62 0.02 0 16550
barnes iso 01.65 74812 1.62 0.03 0 20849

gs je 00.16 37592 0.15 0.01 0 5808
gs mi 00.16 32588 0.13 0.02 0 5109
gs iso 00.23 71152 0.16 0.07 0 19698
gs je 00.17 37748 0.15 0.01 0 5814
gs mi 00.16 33888 0.14 0.01 0 5109
gs iso 00.22 68136 0.15 0.06 0 18916

larsonN je 1.171 266596 98.81 0.92 0 409842
larsonN mi 1.016 299768 99.38 0.44 0 83755
larsonN iso 918.582 126528 99.64 0.37 0 31368
larsonN je 1.188 261884 98.91 0.92 0 421848
larsonN mi 1.016 299752 99.53 0.38 0 80202
larsonN iso 1328.904 121096 6.15 69.78 0 30219

rocksdb je 02.48 162424 2.05 0.63 0 38384
rocksdb mi 02.48 159812 2.04 0.66 0 37464
rocksdb iso 02.74 197220 2.49 0.55 0 46815
rocksdb je 02.46 162340 2.05 0.63 0 38383
rocksdb mi 02.33 160156 1.92 0.63 0 37585
rocksdb iso 02.96 195948 2.64 0.66 0 46584

redis je 3.180 9496 0.14 0.02 0 1538
redis mi 3.080 7088 0.12 0.03 0 1256
redis iso 6.880 52816 0.31 0.05 0 16317
redis je 3.160 9492 0.13 0.02 0 1528
redis mi 2.780 7084 0.12 0.02 0 1257
redis iso 7.579 50516 0.35 0.05 0 15187
```

IsoAlloc isn't quite ready for performance sensitive server workloads. However it's more than fast enough for client side mobile/desktop applications with risky C/C++ attack surfaces. These environments have threat models similar to what IsoAlloc was designed for.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ When enabled, the `CPU_PIN` feature will restrict allocations from a given zone
* A chunk can be permanently free'd with a call to `iso_free_permanently`.
* If `SANITIZE_CHUNKS` is set all user chunks are cleared when passed to `iso_free` with the constant `0xde`.
* When freeing a chunk the canary in adjacent chunks above/below are verified.
* Some important zone metadata pointers are masked in-between `iso_alloc` and `iso_free` operations.
* When `MASK_PTRS` is enabled (default) the `user_pages_start` and `bitmap_start` pointers stored in zone metadata are XOR'd with a per-zone random secret between alloc and free operations, making them useless to an attacker who reads or corrupts zone metadata.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to explicit where the "per-zone random secret" is stored.

* Passing a pointer to `iso_free` that was not allocated with `iso_alloc` will abort.
* Pointers passed to `iso_free` must be 8 byte aligned, and a multiple of the zone chunk size.
* The free bit slot cache provides a chunk quarantine or delayed free mechanism.
Expand All @@ -76,6 +76,7 @@ When enabled, the `CPU_PIN` feature will restrict allocations from a given zone
* Randomized hints are passed to `mmap` to ensure contiguous page ranges are not allocated.
* When `ABORT_ON_NULL` is enabled IsoAlloc will abort instead of returning `NULL`.
* By default `NO_ZERO_ALLOCATIONS` will return a pointer to a page marked `PROT_NONE` for all `0` sized allocations.
* When `ABORT_ON_UNOWNED_PTR` is enabled (default) IsoAlloc will abort whenever it is passed a pointer it does not own.
* When `ABORT_NO_ENTROPY` is enabled IsoAlloc will abort when it can't gather enough entropy.
* When `RANDOMIZE_FREELIST` is enabled IsoAlloc will randomize the free list upon creation. May have a perf hit.
* Zones are retired and replaced after they've allocated and freed a specific number of chunks. This is calculated as `ZONE_ALLOC_RETIRE * max_chunk_count_for_zone`.
Expand All @@ -94,6 +95,8 @@ The Makefile targets are very simple:

`make library` - Builds a release version of the library without C++ support

`make library_less_strict` - Builds a release library with `ABORT_ON_UNOWNED_PTR=0`. Recommended when using IsoAlloc via `LD_PRELOAD`.

`make library_debug` - Builds a debug version of the library

`make library_debug_no_output` - Builds a debug version of the library with no logging output
Expand Down
13 changes: 8 additions & 5 deletions include/iso_alloc_ds.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#define SZ_TO_ZONE_LOOKUP_IDX(size) size >> 4

#define CHUNK_TO_ZONE_TABLE_SZ (65535 * sizeof(uint16_t))
#define ADDR_TO_CHUNK_TABLE(p) (((uintptr_t) p >> 32) & 0xffff)
#define ADDR_TO_CHUNK_TABLE(p) (((uintptr_t) p >> 22) & 0xffff)

typedef int64_t bit_slot_t;
typedef int64_t bitmap_index_t;
Expand All @@ -36,7 +36,7 @@ typedef struct {
int64_t next_free_bit_slot; /* The last bit slot returned by get_next_free_bit_slot */
uint64_t canary_secret; /* Each zone has its own canary secret */
uint64_t pointer_mask; /* Each zone has its own pointer protection secret */
bitmap_index_t max_bitmap_idx; /* Max bitmap index for this bitmap */
uint16_t max_bitmap_idx; /* Max bitmap index for this bitmap */
uint32_t chunk_size; /* Size of chunks managed by this zone */
free_bit_slot_t free_bit_slots_usable; /* The oldest members of the free cache are served first */
free_bit_slot_t free_bit_slots_index; /* Tracks how many entries in the cache are filled */
Expand All @@ -50,7 +50,7 @@ typedef struct {
uint8_t cpu_core; /* What CPU core this zone is pinned to */
#endif
/* Warm/cold fields: accessed less frequently */
uint32_t bitmap_size; /* Size of the bitmap in bytes */
uint16_t bitmap_size; /* Size of the bitmap in bytes */
uint32_t af_count; /* Increment/Decrement with each alloc/free operation */
uint32_t chunk_count; /* Total number of chunks in this zone */
uint32_t alloc_count; /* Total number of lifetime allocations */
Expand Down Expand Up @@ -133,9 +133,12 @@ typedef struct {
* it can find the next zone that holds the same size
* chunks. The lookup table helps us find the first zone
* that holds a specific size in O(1) time */
zone_lookup_table_t zone_lookup_table[ZONE_LOOKUP_TABLE_SZ];
/* Array sized to cover indices 0..(SMALL_SIZE_MAX>>4) inclusive, then
* rounded to a multiple of 4 entries so the array occupies a whole
* number of 8-byte words and bitmaps[] remains naturally aligned. */
zone_lookup_table_t zone_lookup_table[(SMALL_SIZE_MAX >> 4) + 4];
/* For chunk sizes >= 1024 our bitmap size is smaller
* than a page. This optimization preallocates pages to
* than a page. This optimization preallocates pages tog
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tog?

* hold multiple bitmaps for these zones */
iso_alloc_bitmap_t bitmaps[sizeof(small_bitmap_sizes) / sizeof(int)];
uint64_t zone_handle_mask;
Expand Down
14 changes: 11 additions & 3 deletions include/iso_alloc_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,14 @@ extern uint32_t g_page_size_shift;
static_assert(SMALLEST_CHUNK_SZ >= 16, "SMALLEST_CHUNK_SZ is too small, must be at least 16");
static_assert(SMALL_SIZE_MAX <= 131072, "SMALL_SIZE_MAX is too big, cannot exceed 131072");

/* bitmap_size = (ZONE_USER_SIZE / SMALLEST_CHUNK_SZ) * BITS_PER_CHUNK / BITS_PER_BYTE
* max_bitmap_idx = bitmap_size / sizeof(uint64_t)
* Both fields are uint16_t in iso_alloc_zone_t, so verify they fit. */
static_assert((ZONE_USER_SIZE * BITS_PER_CHUNK / BITS_PER_BYTE / SMALLEST_CHUNK_SZ) <= UINT16_MAX,
"bitmap_size overflows uint16_t: SMALLEST_CHUNK_SZ is too small (must be > 16)");
static_assert((ZONE_USER_SIZE * BITS_PER_CHUNK / BITS_PER_BYTE / SMALLEST_CHUNK_SZ / sizeof(uint64_t)) <= UINT16_MAX,
"max_bitmap_idx overflows uint16_t: SMALLEST_CHUNK_SZ is too small");

#if THREAD_SUPPORT
#if USE_SPINLOCK
extern atomic_flag root_busy_flag;
Expand Down Expand Up @@ -368,7 +376,7 @@ INTERNAL_HIDDEN INLINE void populate_zone_cache(iso_alloc_zone_t *zone);
INTERNAL_HIDDEN INLINE void flush_chunk_quarantine(void);
INTERNAL_HIDDEN INLINE void clear_zone_cache(void);
INTERNAL_HIDDEN iso_alloc_big_zone_t *iso_find_big_zone(void *p, bool remove);
INTERNAL_HIDDEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t size);
INTERNAL_HIDDEN FLATTEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t size);
INTERNAL_HIDDEN iso_alloc_zone_t *find_suitable_zone(size_t size);
INTERNAL_HIDDEN iso_alloc_zone_t *iso_new_zone(size_t size, bool internal);
INTERNAL_HIDDEN iso_alloc_zone_t *_iso_new_zone(size_t size, bool internal, int32_t index);
Expand All @@ -377,7 +385,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_range(void *p);
INTERNAL_HIDDEN iso_alloc_zone_t *search_chunk_lookup_table(const void *p);
INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot_slow(iso_alloc_zone_t *zone);
INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot(iso_alloc_zone_t *zone);
INTERNAL_HIDDEN bit_slot_t get_next_free_bit_slot(iso_alloc_zone_t *zone);
INTERNAL_HIDDEN INLINE bit_slot_t get_next_free_bit_slot(iso_alloc_zone_t *zone);
INTERNAL_HIDDEN iso_alloc_root *iso_alloc_new_root(void);
INTERNAL_HIDDEN bool is_pow2(uint64_t sz);
INTERNAL_HIDDEN bool _is_zone_retired(iso_alloc_zone_t *zone);
Expand Down Expand Up @@ -408,7 +416,7 @@ INTERNAL_HIDDEN void *_untag_ptr(void *p, iso_alloc_zone_t *zone);
INTERNAL_HIDDEN void _free_big_zone_list(iso_alloc_big_zone_t *head);
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_big_alloc(size_t size);
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc(iso_alloc_zone_t *zone, size_t size);
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bitslot, iso_alloc_zone_t *zone);
INTERNAL_HIDDEN INLINE ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bitslot, iso_alloc_zone_t *zone);
INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_calloc(size_t nmemb, size_t size);
INTERNAL_HIDDEN void *_iso_alloc_ptr_search(void *n, bool poison);
INTERNAL_HIDDEN INLINE uint64_t us_rand_uint64(uint64_t *seed);
Expand Down
Loading
Loading