diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index d19d88bdc..a20d365af 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,3 @@ 6901dc07127f54c060ec4046e21d05ccd7f437ab 3ddc9da40f8b34565c90d17ef83a9ef95a9deb18 +d1196d946c6551b205791f47ee952412e1a3e9bc diff --git a/config/qthread_check_hwloc.m4 b/config/qthread_check_hwloc.m4 index 997dc051f..dd00ef3ab 100644 --- a/config/qthread_check_hwloc.m4 +++ b/config/qthread_check_hwloc.m4 @@ -21,13 +21,6 @@ AC_DEFUN([QTHREAD_CHECK_HWLOC], [ [AS_HELP_STRING([--disable-hwloc-has-distance], [Disable hwloc distance support])]) - AC_ARG_WITH([hwloc-get-topology-function], - [AS_HELP_STRING([--with-hwloc-get-topology-function=[[name]]], - [specify function to get hwloc topology (otherwise uses hwloc_topology_init and hwloc_topology_load)])]) - - AS_IF([test "x$with_hwloc_get_topology_function" != x], - AC_DEFINE_UNQUOTED([HWLOC_GET_TOPOLOGY_FUNCTION],[$with_hwloc_get_topology_function], - [Define to specify function that returns hwloc topology])) hwloc_saved_CPPFLAGS="$CPPFLAGS" hwloc_saved_LDFLAGS="$LDFLAGS" AS_IF([test "x$with_hwloc" != x], diff --git a/include/qt_affinity.h b/include/qt_affinity.h index bc989e60f..761afb3d5 100644 --- a/include/qt_affinity.h +++ b/include/qt_affinity.h @@ -42,6 +42,8 @@ void INTERNAL qt_topology_init(qthread_shepherd_id_t *nshepherds, qthread_worker_id_t *nworkerspershep, size_t *hw_par); +void INTERNAL qt_topology_deinit(void); + /** * qt_affinity_init() - initialize affinity layer * @nbshepherds: User hint for number of shepherds to use. @@ -54,6 +56,9 @@ void INTERNAL qt_topology_init(qthread_shepherd_id_t *nshepherds, void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, qthread_worker_id_t *nbworkers, size_t *hw_par); + +void INTERNAL qt_affinity_deinit(void); + /** * qt_affinity_set() - bind a worker to a set of resources * @me: The worker to bind. diff --git a/include/qt_atomics.h b/include/qt_atomics.h index 32f9de39b..2ac887ede 100644 --- a/include/qt_atomics.h +++ b/include/qt_atomics.h @@ -175,6 +175,18 @@ static inline int QTHREAD_TRYLOCK_TRY(qt_spin_trylock_t *x) { qassert(pthread_cond_destroy(&(c)), 0); \ qassert(pthread_mutex_destroy(&(c##_lock)), 0); \ } while (0) +#ifdef NDEBUG +#define QTHREAD_COND_WAIT(c) \ + do { \ + struct timespec t; \ + struct timeval n; \ + gettimeofday(&n, NULL); \ + t.tv_nsec = (n.tv_usec * 1000) + 500000000; \ + t.tv_sec = n.tv_sec + ((t.tv_nsec >= 1000000000) ? 1 : 0); \ + t.tv_nsec -= ((t.tv_nsec >= 1000000000) ? 1000000000 : 0); \ + pthread_cond_timedwait(&(c), &(c##_lock), &t); \ + } while (0) +#else #define QTHREAD_COND_WAIT(c) \ do { \ struct timespec t; \ @@ -186,6 +198,7 @@ static inline int QTHREAD_TRYLOCK_TRY(qt_spin_trylock_t *x) { int val = pthread_cond_timedwait(&(c), &(c##_lock), &t); \ assert(!(val == EINVAL || val == EPERM)); \ } while (0) +#endif #define QTHREAD_COND_WAIT_DUO(c, m) \ do { \ struct timespec t; \ diff --git a/include/qt_hash.h b/include/qt_hash.h index 1b220bb88..649445398 100644 --- a/include/qt_hash.h +++ b/include/qt_hash.h @@ -77,6 +77,9 @@ int INTERNAL qt_hash_remove(qt_hash h, qt_key_t const key); */ int INTERNAL qt_hash_remove_locked(qt_hash h, qt_key_t const key); +int INTERNAL qt_hash_pop(void **val, qt_hash h, qt_key_t const key); +int INTERNAL qt_hash_pop_locked(void **val, qt_hash h, qt_key_t const key); + /*! * @fn qt_hash_get(qt_hash h, * const qt_key_t key) diff --git a/src/affinity/binders.c b/src/affinity/binders.c index 40d090820..ea5e375df 100644 --- a/src/affinity/binders.c +++ b/src/affinity/binders.c @@ -83,10 +83,6 @@ static int qt_affinity_compact(int num_workers, hwloc_obj_t obj) { void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, qthread_worker_id_t *nbworkers, size_t *hw_par) { -#ifdef HWLOC_GET_TOPOLOGY_FUNCTION - extern void *HWLOC_GET_TOPOLOGY_FUNCTION; - topology = (hwloc_topology_t)HWLOC_GET_TOPOLOGY_FUNCTION; -#endif // Note: the lack of a teardown routine will cause topology initialization // to be skipped if qthreads is re-initialized if (topology == NULL) { @@ -100,7 +96,7 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, qt_internal_get_env_num("NUM_WORKERS_PER_SHEPHERD", 0, 0); size_t num_workers = qt_internal_get_env_num("HWPAR", 0, 0); char const *affinity = qt_internal_get_env_str("LAYOUT", "NOT_SET"); - hwloc_const_bitmap_t allowed = + hwloc_bitmap_t allowed = hwloc_bitmap_dup(hwloc_topology_get_allowed_cpuset(topology)); if (!allowed) { printf("hwloc detection of allowed cpus failed\n"); @@ -125,11 +121,11 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, workers.binds = qt_malloc(sizeof(hwloc_cpuset_t) * workers.num); for (int i = 0; i < workers.num; i++) { - workers.binds[i] = hwloc_bitmap_alloc(); workers.binds[i] = hwloc_bitmap_dup(allowed); } hwloc_obj_t obj = hwloc_get_obj_inside_cpuset_by_depth(topology, allowed, 0, 0); + hwloc_bitmap_free(allowed); if (affinity && strcmp("COMPACT", affinity) == 0) { qt_affinity_compact(workers.num, obj); } @@ -188,6 +184,15 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, } } +void INTERNAL qt_affinity_deinit() { + if (sheps.binds) { + for (int i = 0; i < sheps.num; i++) { hwloc_bitmap_free(sheps.binds[i]); } + qt_free(sheps.binds); + } + for (int i = 0; i < workers.num; i++) { hwloc_bitmap_free(workers.binds[i]); } + qt_free(workers.binds); +} + void INTERNAL qt_affinity_set(qthread_worker_t *me, unsigned int nworkerspershep) { hwloc_set_cpubind( diff --git a/src/affinity/common.c b/src/affinity/common.c index 65ee82209..e57b3fae3 100644 --- a/src/affinity/common.c +++ b/src/affinity/common.c @@ -83,3 +83,5 @@ void INTERNAL qt_topology_init(qthread_shepherd_id_t *nshepherds, *nworkerspershep = num_wps; *hw_par = num_workers; } + +void INTERNAL qt_topology_deinit(void) { qt_affinity_deinit(); } diff --git a/src/affinity/hwloc.c b/src/affinity/hwloc.c index c29f42ce7..7e215a81b 100644 --- a/src/affinity/hwloc.c +++ b/src/affinity/hwloc.c @@ -86,10 +86,6 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, qthread_worker_id_t *nbworkers, size_t *hw_par) { /*{{{ */ if (qthread_cas(&initialized, 0, 1) == 0) { -#ifdef HWLOC_GET_TOPOLOGY_FUNCTION - extern void *HWLOC_GET_TOPOLOGY_FUNCTION; - topology = (hwloc_topology_t)HWLOC_GET_TOPOLOGY_FUNCTION; -#endif if (topology == NULL) { qassert(hwloc_topology_init(&topology), 0); qassert(hwloc_topology_load(topology), 0); @@ -265,6 +261,8 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, } } /*}}} */ +void INTERNAL qt_affinity_deinit(void) {} + #ifdef QTHREAD_HAVE_MEM_AFFINITY void INTERNAL qt_affinity_mem_tonode(void *addr, size_t bytes, diff --git a/src/affinity/no.c b/src/affinity/no.c index fa5beac4d..2f0d8bd32 100644 --- a/src/affinity/no.c +++ b/src/affinity/no.c @@ -15,6 +15,8 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds, if (*nbworkers == 0) { *nbworkers = 1; } } /*}}} */ +void INTERNAL qt_affinity_deinit(void) {} + void INTERNAL qt_affinity_set(qthread_worker_t *me, unsigned int Q_UNUSED(nw)) { } diff --git a/src/hashmap.c b/src/hashmap.c index fd6b24283..dbf7f84ef 100644 --- a/src/hashmap.c +++ b/src/hashmap.c @@ -391,6 +391,54 @@ int INTERNAL qt_hash_remove_locked(qt_hash h, qt_key_t const key) { /*{{{*/ return 1; } /*}}}*/ +int INTERNAL qt_hash_pop(void **val, qt_hash h, qt_key_t const key) { /*{{{*/ + int ret; + + assert(h); + if (h->lock) { QTHREAD_FASTLOCK_LOCK(h->lock); } + ret = qt_hash_pop_locked(val, h, key); + if (h->lock) { QTHREAD_FASTLOCK_UNLOCK(h->lock); } + return ret; +} /*}}}*/ + +int INTERNAL qt_hash_pop_locked(void **val, + qt_hash h, + qt_key_t const key) { /*{{{*/ + hash_entry *p; + void **value; + + assert(h); + if ((key == KEY_DELETED) || (key == KEY_NULL)) { + if (h->has_key[(uintptr_t)key] == 0) { + *val = NULL; + return 0; + } else { + h->has_key[(uintptr_t)key] = 0; + h->value[(uintptr_t)key] = NULL; + } + *val = NULL; + return 1; + } + value = qt_hash_internal_find(h, key); + if (value == NULL) { + *val = NULL; + return 0; + } + *val = atomic_load_explicit((void *_Atomic *)value, memory_order_relaxed); + p = (hash_entry *)(value - 1); // sneaky way to recover the hash_entry ptr + atomic_store_explicit(&p->key, KEY_DELETED, memory_order_relaxed); + size_t deletes_loc = + atomic_fetch_add_explicit(&h->deletes, 1u, memory_order_relaxed) + 1u; + size_t population_loc = + atomic_fetch_sub_explicit(&h->population, 1u, memory_order_relaxed) - 1u; + if (deletes_loc + population_loc >= h->tidy_up_size) { + brehash(h, h->num_entries); + } else if (h->population < h->shrink_size) { + brehash(h, h->num_entries / 2); + } + return 1; +} /*}}}*/ + void INTERNAL *qt_hash_get(qt_hash h, qt_key_t const key) { /*{{{*/ void *ret; diff --git a/src/locks.c b/src/locks.c index a356f7244..b55cb09c3 100644 --- a/src/locks.c +++ b/src/locks.c @@ -49,9 +49,11 @@ static int lock_hashmap_put(aligned_t const *key, qthread_spinlock_t *val) { static int lock_hashmap_remove(aligned_t const *key) { if (!qthread_spinlock_buckets) return QTHREAD_OPFAIL; - - if (qt_hash_remove(qthread_spinlock_buckets[LOCKBIN(key)], key)) + void *val = NULL; + if (qt_hash_pop(&val, qthread_spinlock_buckets[LOCKBIN(key)], key)) { + if (val) qt_free(val); return QTHREAD_SUCCESS; + } return QTHREAD_OPFAIL; } diff --git a/src/qthread.c b/src/qthread.c index c8e9c0423..9ede8ab69 100644 --- a/src/qthread.c +++ b/src/qthread.c @@ -1202,6 +1202,8 @@ void API_FUNC qthread_finalize(void) { /*{{{ */ qlib = NULL; TLS_DELETE(shepherd_structs); + qt_topology_deinit(); + #ifndef NDEBUG MACHINE_FENCE; qthread_library_initialized = 0;