@@ -43,6 +43,7 @@ tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \
tst-tcfree1 tst-tcfree2 tst-tcfree3 \
tst-safe-linking \
tst-mallocalign1 \
+ tst-memalign-2
tests-static := \
tst-interpose-static-nothread \
@@ -72,7 +73,7 @@ test-srcs = tst-mtrace
# with MALLOC_CHECK_=3 because they expect a specific failure.
tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
tst-mxfast tst-safe-linking \
- tst-compathooks-off tst-compathooks-on
+ tst-compathooks-off tst-compathooks-on tst-memalign-2
# Run all tests with MALLOC_CHECK_=3
tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
@@ -3557,6 +3557,30 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
alignment = a;
}
+#if USE_TCACHE
+ {
+ size_t tbytes;
+ tbytes = checked_request2size (bytes);
+ if (tbytes == 0)
+ {
+ __set_errno (ENOMEM);
+ return NULL;
+ }
+ size_t tc_idx = csize2tidx (tbytes);
+
+ MAYBE_INIT_TCACHE ();
+
+ if (tc_idx < mp_.tcache_bins
+ && tcache
+ && tcache->counts[tc_idx] > 0
+ && ((intptr_t)tcache->entries[tc_idx] & (alignment - 1)) == 0)
+ {
+ void *victim = tcache_get (tc_idx);
+ return tag_new_usable (victim);
+ }
+ }
+#endif
+
if (SINGLE_THREAD_P)
{
p = _int_memalign (&main_arena, alignment, bytes);
@@ -4937,6 +4961,43 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
------------------------------ memalign ------------------------------
*/
+/* Returns 0 if the chunk is not and does not contain the requested
+ aligned sub-chunk, else returns the amount of "waste" from
+ trimming. BYTES is the *user* byte size, not the chunk byte
+ size. */
+static int
+chunk_ok_for_memalign (mchunkptr p, size_t alignment, size_t bytes)
+{
+ void *m = chunk2mem (p);
+ INTERNAL_SIZE_T size = memsize (p);
+ void *aligned_m = m;
+
+ if (__glibc_unlikely (misaligned_chunk (p)))
+ malloc_printerr ("_int_memalign(): unaligned chunk detected");
+
+ aligned_m = PTR_ALIGN_UP (m, alignment);
+
+ INTERNAL_SIZE_T front_extra = (intptr_t) aligned_m - (intptr_t) m;
+
+ /* We can't trim off the front as it's too small. */
+ if (front_extra > 0 && front_extra < MINSIZE)
+ return 0;
+
+ /* If it's a perfect fit, it's an exception to the return value rule
+ (we would return zero waste, which looks like "not usable"), so
+ handle it here by returning a small non-zero value instead. */
+ if (size == bytes && front_extra == 0)
+ return 1;
+
+ /* If the block we need fits in the chunk, calculate total waste. */
+ if (size > bytes + front_extra)
+ return size - bytes;
+
+ /* Can't use this chunk. */
+ return 0;
+}
+
+/* BYTES is user requested bytes, not requested chunksize bytes. */
static void *
_int_memalign (mstate av, size_t alignment, size_t bytes)
{
@@ -4950,8 +5011,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
mchunkptr remainder; /* spare room at end to split off */
unsigned long remainder_size; /* its size */
INTERNAL_SIZE_T size;
-
-
+ mchunkptr victim;
nb = checked_request2size (bytes);
if (nb == 0)
@@ -4960,29 +5020,142 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
return NULL;
}
- /*
- Strategy: find a spot within that chunk that meets the alignment
+ /* We can't check tcache here because we hold the arena lock, which
+ tcache doesn't expect. We expect it has been checked
+ earlier. */
+
+ /* Strategy: search the bins looking for an existing block that
+ meets our needs. We scan a range of bins from "exact size" to
+ "just under 2x", spanning the small/large barrier if needed. If
+ we don't find anything in those bins, the common malloc code will
+ scan starting at 2x. */
+
+ /* This will be set if we found a candidate chunk. */
+ victim = NULL;
+
+ /* Fast bins are singly-linked, hard to remove a chunk from the middle
+ and unlikely to meet our alignment requirements. We have not done
+ any experimentation with searching for aligned fastbins. */
+
+ int first_bin_index;
+ int first_largebin_index;
+ int last_bin_index;
+
+ if (in_smallbin_range (nb))
+ first_bin_index = smallbin_index (nb);
+ else
+ first_bin_index = largebin_index (nb);
+
+ if (in_smallbin_range (nb * 2))
+ last_bin_index = smallbin_index (nb * 2);
+ else
+ last_bin_index = largebin_index (nb * 2);
+
+ first_largebin_index = largebin_index (MIN_LARGE_SIZE);
+
+ int victim_index; /* its bin index */
+
+ for (victim_index = first_bin_index;
+ victim_index < last_bin_index;
+ victim_index ++)
+ {
+ victim = NULL;
+
+ if (victim_index < first_largebin_index)
+ {
+ /* Check small bins. Small bin chunks are doubly-linked despite
+ being the same size. */
+
+ mchunkptr fwd; /* misc temp for linking */
+ mchunkptr bck; /* misc temp for linking */
+
+ bck = bin_at (av, victim_index);
+ fwd = bck->fd;
+ while (fwd != bck)
+ {
+ if (chunk_ok_for_memalign (fwd, alignment, bytes) > 0)
+ {
+ victim = fwd;
+
+ /* Unlink it */
+ victim->fd->bk = victim->bk;
+ victim->bk->fd = victim->fd;
+ break;
+ }
+
+ fwd = fwd->fd;
+ }
+ }
+ else
+ {
+ /* Check large bins. */
+ mchunkptr fwd; /* misc temp for linking */
+ mchunkptr bck; /* misc temp for linking */
+ mchunkptr best = NULL;
+ size_t best_size = 0;
+
+ bck = bin_at (av, victim_index);
+ fwd = bck->fd;
+
+ while (fwd != bck)
+ {
+ int extra;
+
+ if (chunksize (fwd) < nb)
+ break;
+ extra = chunk_ok_for_memalign (fwd, alignment, bytes);
+ if (extra > 0
+ && (extra <= best_size || best == NULL))
+ {
+ best = fwd;
+ best_size = extra;
+ }
+
+ fwd = fwd->fd;
+ }
+ victim = best;
+
+ if (victim != NULL)
+ {
+ unlink_chunk (av, victim);
+ break;
+ }
+ }
+
+ if (victim != NULL)
+ break;
+ }
+
+ /* Strategy: find a spot within that chunk that meets the alignment
request, and then possibly free the leading and trailing space.
- */
+ This strategy is incredibly costly and can lead to external
+ fragmentation if header and footer chunks are unused. */
- /* Call malloc with worst case padding to hit alignment. */
+ if (victim != NULL)
+ {
+ p = victim;
+ m = chunk2mem (p);
+ set_inuse (p);
+ }
+ else
+ {
+ /* Call malloc with worst case padding to hit alignment. */
- m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+ m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
- if (m == 0)
- return 0; /* propagate failure */
+ if (m == 0)
+ return 0; /* propagate failure */
- p = mem2chunk (m);
+ p = mem2chunk (m);
+ }
if ((((unsigned long) (m)) % alignment) != 0) /* misaligned */
-
- { /*
- Find an aligned spot inside chunk. Since we need to give back
- leading space in a chunk of at least MINSIZE, if the first
- calculation places us at a spot with less than MINSIZE leader,
- we can move to the next aligned spot -- we've allocated enough
- total room so that this is always possible.
- */
+ {
+ /* Find an aligned spot inside chunk. Since we need to give back
+ leading space in a chunk of at least MINSIZE, if the first
+ calculation places us at a spot with less than MINSIZE leader,
+ we can move to the next aligned spot -- we've allocated enough
+ total room so that this is always possible. */
brk = (char *) mem2chunk (((unsigned long) (m + alignment - 1)) &
- ((signed long) alignment));
if ((unsigned long) (brk - (char *) (p)) < MINSIZE)
new file mode 100644
@@ -0,0 +1,115 @@
+/* Test for memalign chunk reuse
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <array_length.h>
+
+#include <support/check.h>
+
+typedef struct TestCase {
+ size_t size;
+ size_t alignment;
+ void *ptr1;
+ void *ptr2;
+} TestCase;
+
+static TestCase tcache_allocs[] = {
+ { 24, 8, NULL, NULL },
+ { 24, 16, NULL, NULL },
+ { 128, 32, NULL, NULL }
+};
+#define TN array_length (tcache_allocs)
+
+static TestCase large_allocs[] = {
+ { 23450, 64, NULL, NULL },
+ { 23450, 64, NULL, NULL },
+ { 23550, 64, NULL, NULL },
+ { 23550, 64, NULL, NULL },
+ { 23650, 64, NULL, NULL },
+ { 23650, 64, NULL, NULL },
+ { 33650, 64, NULL, NULL },
+ { 33650, 64, NULL, NULL }
+};
+#define LN array_length (large_allocs)
+
+void *p;
+
+static int
+do_test (void)
+{
+ int i, j;
+ int count;
+
+ /* TCache test. */
+
+ for (i = 0; i < TN; ++ i)
+ {
+ tcache_allocs[i].ptr1 = memalign (tcache_allocs[i].alignment, tcache_allocs[i].size);
+ free (tcache_allocs[i].ptr1);
+ /* This should return the same chunk as was just free'd. */
+ tcache_allocs[i].ptr2 = memalign (tcache_allocs[i].alignment, tcache_allocs[i].size);
+ free (tcache_allocs[i].ptr2);
+
+ TEST_VERIFY (tcache_allocs[i].ptr1 == tcache_allocs[i].ptr2);
+ }
+
+ /* Large bins test. */
+
+ for (i = 0; i < LN; ++ i)
+ {
+ large_allocs[i].ptr1 = memalign (large_allocs[i].alignment, large_allocs[i].size);
+ /* Keep chunks from combining by fragmenting the heap. */
+ p = malloc (512);
+ }
+
+ for (i = 0; i < LN; ++ i)
+ free (large_allocs[i].ptr1);
+
+ /* Force the unsorted bins to be scanned and moved to small/large
+ bins. */
+ p = malloc (60000);
+
+ for (i = 0; i < LN; ++ i)
+ large_allocs[i].ptr2 = memalign (large_allocs[i].alignment, large_allocs[i].size);
+
+ count = 0;
+ for (i = 0; i < LN; ++ i)
+ {
+ int ok = 0;
+ for (j = 0; j < LN; ++ j)
+ if (large_allocs[i].ptr1 == large_allocs[j].ptr2)
+ ok = 1;
+ if (ok == 1)
+ count ++;
+ }
+
+ /* The allocation algorithm is complicated outside of the memalign
+ logic, so just make sure it's working for most of the
+ allocations. This avoids possible boundary conditions with
+ empty/full heaps. */
+ TEST_VERIFY (count > LN / 2);
+
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"