blob: c1831c6f6e4e162c8c20ab579fe6b26d9208359f [file] [log] [blame]
This patch is not necessary for the bug fix, just makes concurrency
code review easier (removes a data race and overflow from a broken
code path).
dlopen can oom crash anyway in _dl_resize_dtv and it's probably
better to crash than leave half setup modules around.
2016-11-30 Szabolcs Nagy <szabolcs.nagy@arm.com>
* elf/dl-tls.c (_dl_add_to_slotinfo): OOM crash.
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 60f4c1d..2feee67 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -959,18 +959,10 @@ _dl_add_to_slotinfo (struct link_map *l)
+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
if (listp == NULL)
{
- /* We ran out of memory. We will simply fail this
- call but don't undo anything we did so far. The
- application will crash or be terminated anyway very
- soon. */
-
- /* We have to do this since some entries in the dtv
- slotinfo array might already point to this
- generation. */
- ++GL(dl_tls_generation);
-
- _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
-cannot create TLS data structures"));
+ /* We ran out of memory in dlopen while updating tls structures.
+ TODO: side-effects should be rolled back and the failure should
+ be reported to the caller, but that's hard. */
+ oom ();
}
listp->len = TLS_SLOTINFO_SURPLUS;
This fixes a subset of the issues described in
https://sourceware.org/ml/libc-alpha/2016-11/msg01026.html
without adding locks to pthread_create.
Only races between dlopen and pthread_create were considered,
and the asserts got removed that tried to check for concurrency
issues.
The patch is incomplete because dlclose, tls access and
dl_iterate_phdr related code paths are not modified.
dlclose should be updated in a similar fashion to dlopen
to make the patch complete alternatively pthread_create
may take the GL(dl_load_write_lock) to sync with dlclose
or the GL(dl_load_lock) to sync with dlopen and dlclose
(that would simplify the concurrency design, but increase
lock contention on the locks).
2016-11-30 Szabolcs Nagy <szabolcs.nagy@arm.com>
[BZ #19329]
* elf/dl-open.c (dl_open_worker): Write GL(dl_tls_generation)
atomically.
* elf/dl-tls.c (_dl_allocate_tls_init): Read GL(dl_tls_generation),
GL(dl_tls_max_dtv_idx), slotinfo entries and listp->next atomically.
Remove assertions that cannot be guaranteed.
(_dl_add_to_slotinfo): Write the slotinfo entries and listp->next
atomically.
diff --git a/elf/dl-open.c b/elf/dl-open.c
index f5ca261..e6f6e0b 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -524,9 +524,17 @@ dl_open_worker (void *a)
}
/* Bump the generation number if necessary. */
- if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0))
- _dl_fatal_printf (N_("\
+ if (any_tls)
+ {
+ /* This cannot be in a data-race so non-atomic load is valid too. */
+ size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1;
+ /* Synchronize with _dl_allocate_tls_init (see notes there) and
+ avoid storing an overflowed counter. */
+ if (__builtin_expect (newgen == 0, 0))
+ _dl_fatal_printf (N_("\
TLS generation counter wrapped! Please report this."));
+ atomic_store_release (&GL(dl_tls_generation), newgen);
+ }
/* We need a second pass for static tls data, because _dl_update_slotinfo
must not be run while calls to _dl_add_to_slotinfo are still pending. */
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 2feee67..7dbee0d 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -470,6 +470,36 @@ _dl_resize_dtv (dtv_t *dtv)
}
+/*
+CONCURRENCY NOTES
+
+dlopen (and dlclose) holds the GL(dl_load_lock) while writing shared state,
+which may be concurrently read by pthread_create and tls access without taking
+the lock, so atomic access should be used. The shared state:
+
+ GL(dl_tls_max_dtv_idx) - max modid assigned, (modid can be reused).
+ GL(dl_tls_generation) - generation count, incremented by dlopen and dlclose.
+ GL(dl_tls_dtv_slotinfo_list) - list of entries, contains generation count
+ and link_map for each module with a modid.
+
+A module gets a modid assigned if it has tls, a modid identifies a slotinfo
+entry and it is the index of the corresponding dtv slot. The generation count
+is assigned to slotinfo entries of a newly loaded or unloaded module and its
+newly loaded or unloaded dependencies.
+
+TODO: dlclose may free memory read by a concurrent pthread_create or tls
+access. This is broken now, so it is assumed that dlclose does not free
+link_map structures while pthread_create or __tls_get_addr is reading them.
+
+pthread_create calls _dl_allocate_tls_init (before creating the new thread),
+which should guarantee that the dtv is in a consistent state at the end:
+
+All slotinfo updates with generation <= dtv[0].counter are reflected in the
+dtv and arbitrary later module unloads may also be reflected as unallocated
+entries. (Note: a modid reuse implies a module unload and accessing tls in
+an unloaded module is undefined.)
+*/
+
void *
internal_function
_dl_allocate_tls_init (void *result)
@@ -482,12 +512,24 @@ _dl_allocate_tls_init (void *result)
struct dtv_slotinfo_list *listp;
size_t total = 0;
size_t maxgen = 0;
+ /* Synchronizes with the increments in dl_{open,close}_worker.
+ Slotinfo updates of this generation are sequenced before the
+ write we read from here. */
+ size_t gen_count = atomic_load_acquire (&GL(dl_tls_generation));
+ /* Either reads from the last write that is sequenced before the
+ generation counter increment we synchronized with or a write
+ made by a later dlopen/dlclose. dlclose may decrement this,
+ but only if related modules are unloaded. So it is an upper
+ bound on non-unloaded modids up to gen_count generation. */
+ size_t dtv_slots = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
/* Check if the current dtv is big enough. */
- if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
+ if (dtv[-1].counter < dtv_slots)
{
/* Resize the dtv. */
dtv = _dl_resize_dtv (dtv);
+ /* _dl_resize_dtv rereads GL(dl_tls_max_dtv_idx) which may decrease. */
+ dtv_slots = dtv[-1].counter;
/* Install this new dtv in the thread data structures. */
INSTALL_DTV (result, &dtv[-1]);
@@ -504,22 +546,33 @@ _dl_allocate_tls_init (void *result)
for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
{
struct link_map *map;
+ size_t gen;
void *dest;
/* Check for the total number of used slots. */
- if (total + cnt > GL(dl_tls_max_dtv_idx))
+ if (total + cnt > dtv_slots)
break;
- map = listp->slotinfo[cnt].map;
+ /* Synchronize with dl_add_to_slotinfo and remove_slotinfo. */
+ map = atomic_load_acquire (&listp->slotinfo[cnt].map);
if (map == NULL)
/* Unused entry. */
continue;
+ /* Consistent generation count with the map read above.
+ Inconsistent gen may be read if the entry is being reused,
+ in which case it is larger than gen_count and we skip it. */
+ gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
+ if (gen > gen_count)
+ /* New entry. */
+ continue;
+
/* Keep track of the maximum generation number. This might
not be the generation counter. */
- assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation));
- maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
+ maxgen = MAX (maxgen, gen);
+ /* TODO: concurrent dlclose may free map which would break
+ the rest of the code below. */
dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
dtv[map->l_tls_modid].pointer.to_free = NULL;
@@ -549,11 +602,15 @@ _dl_allocate_tls_init (void *result)
}
total += cnt;
- if (total >= GL(dl_tls_max_dtv_idx))
+ if (total > dtv_slots)
break;
- listp = listp->next;
- assert (listp != NULL);
+ /* Synchronize with dl_add_to_slotinfo. */
+ listp = atomic_load_acquire (&listp->next);
+ /* dtv_slots is an upper bound on the number of entries we care
+ about, the list may end sooner. */
+ if (listp == NULL)
+ break;
}
/* The DTV version is up-to-date now. */
@@ -954,7 +1011,7 @@ _dl_add_to_slotinfo (struct link_map *l)
the first slot. */
assert (idx == 0);
- listp = prevp->next = (struct dtv_slotinfo_list *)
+ listp = (struct dtv_slotinfo_list *)
malloc (sizeof (struct dtv_slotinfo_list)
+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
if (listp == NULL)
@@ -969,9 +1026,17 @@ _dl_add_to_slotinfo (struct link_map *l)
listp->next = NULL;
memset (listp->slotinfo, '\0',
TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+ /* Add the new list item and synchronize with _dl_allocate_tls_init. */
+ atomic_store_release (&prevp->next, listp);
}
/* Add the information into the slotinfo data structure. */
- listp->slotinfo[idx].map = l;
- listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1;
+
+ /* This cannot be in a data-race so non-atomic load would be valid too. */
+ size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1;
+ /* TODO: Concurrent readers may see an overflowed gen, which is bad,
+ but overflow is guaranteed to crash the dlopen that is executing. */
+ atomic_store_relaxed (&listp->slotinfo[idx].gen, newgen);
+ /* Synchronize with _dl_allocate_tls_init (see notes there). */
+ atomic_store_release (&listp->slotinfo[idx].map, l);
}