Skip to content

Commit

Permalink
UCS/RCACHE: Use built-in atomics for rwlock
Browse files Browse the repository at this point in the history
  • Loading branch information
Artemy-Mellanox committed Dec 2, 2024
1 parent 1c22ffc commit 36702e8
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 36 deletions.
1 change: 1 addition & 0 deletions src/ucs/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ nobase_dist_libucs_la_HEADERS = \
type/param.h \
type/init_once.h \
type/spinlock.h \
type/rwlock.h \
type/status.h \
type/thread_mode.h \
type/cpu_set.h \
Expand Down
49 changes: 20 additions & 29 deletions src/ucs/memory/rcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <ucs/sys/string.h>
#include <ucs/arch/cpu.h>
#include <ucs/type/spinlock.h>
#include <ucs/type/rwlock.h>
#include <ucs/vfs/base/vfs_obj.h>
#include <ucm/api/ucm.h>

Expand Down Expand Up @@ -424,7 +425,7 @@ void ucs_mem_region_destroy_internal(ucs_rcache_t *rcache,
UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_DEREGS, 1);

if (drop_lock) {
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_write_unlock(&rcache->pgt_lock);
}

UCS_PROFILE_NAMED_CALL_VOID_ALWAYS("mem_dereg",
Expand All @@ -433,7 +434,7 @@ void ucs_mem_region_destroy_internal(ucs_rcache_t *rcache,
region);

if (drop_lock) {
pthread_rwlock_wrlock(&rcache->pgt_lock);
ucs_rwlock_write_lock(&rcache->pgt_lock);
}
}

Expand Down Expand Up @@ -493,14 +494,14 @@ static inline void ucs_rcache_region_put_internal(ucs_rcache_t *rcache,

/* Destroy region and de-register memory */
if (flags & UCS_RCACHE_REGION_PUT_FLAG_TAKE_PGLOCK) {
pthread_rwlock_wrlock(&rcache->pgt_lock);
ucs_rwlock_write_lock(&rcache->pgt_lock);
}

ucs_mem_region_destroy_internal(rcache, region,
flags & UCS_RCACHE_REGION_PUT_FLAG_TAKE_PGLOCK);

if (flags & UCS_RCACHE_REGION_PUT_FLAG_TAKE_PGLOCK) {
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_write_unlock(&rcache->pgt_lock);
}
}

Expand Down Expand Up @@ -649,15 +650,15 @@ static void ucs_rcache_unmapped_callback(ucm_event_type_t event_type,
* no rcache operations are performed to clean it.
*/
if (!(rcache->params.flags & UCS_RCACHE_FLAG_SYNC_EVENTS) &&
!pthread_rwlock_trywrlock(&rcache->pgt_lock)) {
!ucs_rwlock_write_trylock(&rcache->pgt_lock)) {
/* coverity[double_lock] */
ucs_rcache_invalidate_range(rcache, start, end,
UCS_RCACHE_REGION_PUT_FLAG_ADD_TO_GC);
UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_UNMAPS, 1);
/* coverity[double_lock] */
ucs_rcache_check_inv_queue(rcache, UCS_RCACHE_REGION_PUT_FLAG_ADD_TO_GC);
/* coverity[double_unlock] */
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_write_unlock(&rcache->pgt_lock);
return;
}

Expand Down Expand Up @@ -703,11 +704,11 @@ static void ucs_rcache_purge(ucs_rcache_t *rcache)
/* Lock must be held in write mode */
static void ucs_rcache_clean(ucs_rcache_t *rcache)
{
pthread_rwlock_wrlock(&rcache->pgt_lock);
ucs_rwlock_write_lock(&rcache->pgt_lock);
/* coverity[double_lock]*/
ucs_rcache_check_inv_queue(rcache, 0);
ucs_rcache_check_gc_list(rcache, 1);
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_write_unlock(&rcache->pgt_lock);
}

/* Lock must be held in write mode */
Expand Down Expand Up @@ -940,7 +941,7 @@ ucs_status_t ucs_rcache_create_region(ucs_rcache_t *rcache, void *address,
ucs_trace_func("rcache=%s, address=%p, length=%zu", rcache->name, address,
length);

pthread_rwlock_wrlock(&rcache->pgt_lock);
ucs_rwlock_write_lock(&rcache->pgt_lock);

retry:
/* Align to page size */
Expand Down Expand Up @@ -1061,7 +1062,7 @@ ucs_status_t ucs_rcache_create_region(ucs_rcache_t *rcache, void *address,
*region_p = region;
out_unlock:
/* coverity[double_unlock]*/
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_write_unlock(&rcache->pgt_lock);
return status;
}

Expand All @@ -1082,7 +1083,7 @@ ucs_status_t ucs_rcache_get(ucs_rcache_t *rcache, void *address, size_t length,
ucs_trace_func("rcache=%s, address=%p, length=%zu", rcache->name, address,
length);

pthread_rwlock_rdlock(&rcache->pgt_lock);
ucs_rwlock_read_lock(&rcache->pgt_lock);
UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_GETS, 1);
if (ucs_queue_is_empty(&rcache->inv_q)) {
pgt_region = UCS_PROFILE_CALL(ucs_pgtable_lookup, &rcache->pgtable,
Expand All @@ -1096,12 +1097,12 @@ ucs_status_t ucs_rcache_get(ucs_rcache_t *rcache, void *address, size_t length,
ucs_rcache_region_lru_get(rcache, region);
*region_p = region;
UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_HITS_FAST, 1);
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_read_unlock(&rcache->pgt_lock);
return UCS_OK;
}
}
}
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_read_unlock(&rcache->pgt_lock);

/* Fall back to slow version (with rw lock) in following cases:
* - invalidation list not empty
Expand Down Expand Up @@ -1132,7 +1133,7 @@ void ucs_rcache_region_invalidate(ucs_rcache_t *rcache,
comp = ucs_mpool_get(&rcache->mp);
ucs_spin_unlock(&rcache->lock);

pthread_rwlock_wrlock(&rcache->pgt_lock);
ucs_rwlock_write_lock(&rcache->pgt_lock);
if (comp != NULL) {
comp->func = cb;
comp->arg = arg;
Expand All @@ -1145,7 +1146,7 @@ void ucs_rcache_region_invalidate(ucs_rcache_t *rcache,
/* coverity[double_lock] */
ucs_rcache_region_invalidate_internal(rcache, region, 0);
/* coverity[double_unlock] */
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_write_unlock(&rcache->pgt_lock);
UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_PUTS, 1);
}

Expand All @@ -1170,10 +1171,10 @@ static void ucs_rcache_before_fork(void)
* again on-demand.
* - Other use cases shouldn't be affected
*/
pthread_rwlock_wrlock(&rcache->pgt_lock);
ucs_rwlock_write_lock(&rcache->pgt_lock);
/* coverity[double_lock] */
ucs_rcache_invalidate_range(rcache, 0, UCS_PGT_ADDR_MAX, 0);
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_write_unlock(&rcache->pgt_lock);
}
}
pthread_mutex_unlock(&ucs_rcache_global_context.lock);
Expand Down Expand Up @@ -1272,7 +1273,6 @@ static UCS_CLASS_INIT_FUNC(ucs_rcache_t, const ucs_rcache_params_t *params,
{
ucs_status_t status;
size_t mp_obj_size, mp_align;
int ret;
ucs_mpool_params_t mp_params;

if (params->region_struct_size < sizeof(ucs_rcache_region_t)) {
Expand All @@ -1294,16 +1294,10 @@ static UCS_CLASS_INIT_FUNC(ucs_rcache_t, const ucs_rcache_params_t *params,

self->params = *params;

ret = pthread_rwlock_init(&self->pgt_lock, NULL);
if (ret) {
ucs_error("pthread_rwlock_init() failed: %m");
status = UCS_ERR_INVALID_PARAM;
goto err_destroy_stats;
}

ucs_rwlock_init(&self->pgt_lock);
status = ucs_spinlock_init(&self->lock, 0);
if (status != UCS_OK) {
goto err_destroy_rwlock;
goto err_destroy_stats;
}

status = ucs_pgtable_init(&self->pgtable, ucs_rcache_pgt_dir_alloc,
Expand Down Expand Up @@ -1376,8 +1370,6 @@ static UCS_CLASS_INIT_FUNC(ucs_rcache_t, const ucs_rcache_params_t *params,
ucs_pgtable_cleanup(&self->pgtable);
err_destroy_inv_q_lock:
ucs_spinlock_destroy(&self->lock);
err_destroy_rwlock:
pthread_rwlock_destroy(&self->pgt_lock);
err_destroy_stats:
UCS_STATS_NODE_FREE(self->stats);
err_free_name:
Expand Down Expand Up @@ -1408,7 +1400,6 @@ static UCS_CLASS_CLEANUP_FUNC(ucs_rcache_t)
ucs_mpool_cleanup(&self->mp, 1);
ucs_pgtable_cleanup(&self->pgtable);
ucs_spinlock_destroy(&self->lock);
pthread_rwlock_destroy(&self->pgt_lock);
UCS_STATS_NODE_FREE(self->stats);
ucs_free(self->name);
ucs_free(self->distribution);
Expand Down
3 changes: 2 additions & 1 deletion src/ucs/memory/rcache_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <ucs/stats/stats.h>
#include <ucs/sys/ptr_arith.h>
#include <ucs/type/spinlock.h>
#include <ucs/type/rwlock.h>


#define ucs_rcache_region_log_lvl(_level, _message, ...) \
Expand Down Expand Up @@ -66,7 +67,7 @@ typedef struct ucs_rcache_distribution {
struct ucs_rcache {
ucs_rcache_params_t params; /**< rcache parameters (immutable) */

pthread_rwlock_t pgt_lock; /**< Protects the page table and all
ucs_rwlock_t pgt_lock; /**< Protects the page table and all
regions whose refcount is 0 */
ucs_pgtable_t pgtable; /**< page table to hold the regions */

Expand Down
4 changes: 2 additions & 2 deletions src/ucs/memory/rcache_vfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ static void ucs_rcache_vfs_show_primitive(void *obj, ucs_string_buffer_t *strb,
{
ucs_rcache_t *rcache = obj;

pthread_rwlock_rdlock(&rcache->pgt_lock);
ucs_rwlock_read_lock(&rcache->pgt_lock);
ucs_vfs_show_primitive(obj, strb, arg_ptr, arg_u64);
pthread_rwlock_unlock(&rcache->pgt_lock);
ucs_rwlock_read_unlock(&rcache->pgt_lock);
}

static void ucs_rcache_vfs_init_regions_distribution(ucs_rcache_t *rcache)
Expand Down
105 changes: 105 additions & 0 deletions src/ucs/type/rwlock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#ifndef UCS_RWLOCK_H
#define UCS_RWLOCK_H

/**
* The ucs_rwlock_t type.
*
* Readers increment the counter by UCS_RWLOCK_READ (4)
* Writers set the UCS_RWLOCK_WRITE bit when lock is held
* and set the UCS_RWLOCK_WAIT bit while waiting.
*
* 31 2 1 0
* +-------------------+-+-+
* | readers | | |
* +-------------------+-+-+
* ^ ^
* | |
* WRITE: lock held ----/ |
* WAIT: writer pending --/
*/

#define UCS_RWLOCK_WAIT 0x1 /* Writer is waiting */
#define UCS_RWLOCK_WRITE 0x2 /* Writer has the lock */
#define UCS_RWLOCK_MASK (UCS_RWLOCK_WAIT | UCS_RWLOCK_WRITE)
/* Writer is waiting or has lock */
#define UCS_RWLOCK_READ 0x4 /* Reader increment */


/**
* Read-write lock.
*/
typedef struct {
volatile int l;
} ucs_rwlock_t;


static inline void ucs_rwlock_read_lock(ucs_rwlock_t *lock) {
int x;

while (1) {
while (lock->l & UCS_RWLOCK_MASK) {
sched_yield();
}

x = __sync_fetch_and_add(&lock->l, UCS_RWLOCK_READ);
if (!(x & UCS_RWLOCK_MASK)) {
return;
}

__sync_fetch_and_sub(&lock->l, UCS_RWLOCK_READ);
}
}


static inline void ucs_rwlock_read_unlock(ucs_rwlock_t *lock) {
__sync_fetch_and_sub(&lock->l, UCS_RWLOCK_READ);
}


static inline void ucs_rwlock_write_lock(ucs_rwlock_t *lock) {
int x;

while (1) {
x = lock->l;
if ((x < UCS_RWLOCK_WRITE) &&
(__sync_val_compare_and_swap(&lock->l, x, UCS_RWLOCK_WRITE) == x)) {
return;
}

__sync_fetch_and_or(&lock->l, UCS_RWLOCK_WAIT);
while (lock->l > UCS_RWLOCK_WAIT) {
sched_yield();
}
}
}


static inline int ucs_rwlock_write_trylock(ucs_rwlock_t *lock) {
int x;

x = lock->l;
if ((x < UCS_RWLOCK_WRITE) &&
(__sync_val_compare_and_swap(&lock->l, x, UCS_RWLOCK_WRITE) == x)) {
return 0;
}

return -EBUSY;
}


static inline void ucs_rwlock_write_unlock(ucs_rwlock_t *lock) {
__sync_fetch_and_sub(&lock->l, UCS_RWLOCK_WRITE);
}


static inline void ucs_rwlock_init(ucs_rwlock_t *lock) {
lock->l = 0;
}

#endif
8 changes: 4 additions & 4 deletions test/gtest/ucs/test_rcache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -972,9 +972,9 @@ UCS_TEST_F(test_rcache_stats, unmap_dereg_with_lock) {
* We can have more unmap events if releasing the region structure triggers
* releasing memory back to the OS.
*/
pthread_rwlock_wrlock(&m_rcache->pgt_lock);
ucs_rwlock_write_lock(&m_rcache->pgt_lock);
munmap(mem, size1);
pthread_rwlock_unlock(&m_rcache->pgt_lock);
ucs_rwlock_write_unlock(&m_rcache->pgt_lock);

EXPECT_GE(get_counter(UCS_RCACHE_UNMAPS), 1);
EXPECT_EQ(0, get_counter(UCS_RCACHE_UNMAP_INVALIDATES));
Expand Down Expand Up @@ -1026,9 +1026,9 @@ UCS_TEST_F(test_rcache_stats, hits_slow) {
r1 = get(mem2, size1);

/* generate unmap event under lock, to roce using invalidation queue */
pthread_rwlock_rdlock(&m_rcache->pgt_lock);
ucs_rwlock_read_lock(&m_rcache->pgt_lock);
munmap(mem1, size1);
pthread_rwlock_unlock(&m_rcache->pgt_lock);
ucs_rwlock_read_unlock(&m_rcache->pgt_lock);

EXPECT_EQ(1, get_counter(UCS_RCACHE_UNMAPS));

Expand Down

0 comments on commit 36702e8

Please sign in to comment.