Skip to content

Commit

Permalink
prov/util: Integrate kdreg2 into libfabric
Browse files Browse the repository at this point in the history
kdreg2 is a Linux kernel module used to enabled the libfabric MR cache
for FI_HMEM_SYSTEM.

Signed-off-by: Mike Uttormark <mike.uttormark@hpe.com>
Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
  • Loading branch information
muttormark authored and iziemba committed Oct 17, 2024
1 parent 9c23ff0 commit 210a853
Show file tree
Hide file tree
Showing 5 changed files with 469 additions and 7 deletions.
3 changes: 2 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ common_srcs = \
prov/coll/src/coll_eq.c \
prov/coll/src/coll_fabric.c \
prov/coll/src/coll_init.c \
prov/coll/src/coll.h
prov/coll/src/coll.h \
prov/util/src/kdreg2_mem_monitor.c

if MACOS
common_srcs += src/osx/osd.c
Expand Down
51 changes: 50 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,53 @@ AC_ARG_ENABLE([restricted_dl],
AC_DEFINE_UNQUOTED([HAVE_RESTRICTED_DL], [$restricted_dl],
[Define to 1 to only look for dl providers under default location if FI_PROVIDER_PATH is not set])

dnl Check kdreg2 support
kdreg2_enabled=1
have_kdreg2=0
have_kdreg2_include_path=0

AC_ARG_ENABLE([kdreg2],
[AC_HELP_STRING([--disable-kdreg2],
[Determine whether kdreg2 memory monitor is disabled.])],
[AS_IF([test "$enable_kdreg2" = "no"], [kdreg2_enabled=0])],
[])

AS_IF([test $kdreg2_enabled -ne 0 ],
[AC_CHECK_HEADER([linux/kdreg2.h], [have_kdreg2=1], [], [])
AC_ARG_WITH([kdreg2],
[AS_HELP_STRING([--with-kdreg2=DIR],
[Enable KDREG2 memory monitor.
Optional=<Path to kdreg2.h header file>.])],
[AS_CASE(["$with_kdreg2"],
["no"], [kdreg2_enabled=0],
["yes"], [],
[""], [],
[CPPFLAGS="$CPPFLAGS -I$with_kdreg2"
AC_CHECK_HEADER([kdreg2.h],
[have_kdreg2=1
have_kdreg2_include_path=1],
[have_kdreg2=0],
[])])
AS_IF([test $have_kdreg2 -eq 0 ],
[AC_MSG_ERROR([KDREG2 header not found in $with_kdreg2. Cannot enable KDREG2 memory monitor.])])
])
])

AS_IF([test $kdreg2_enabled -eq 0],
[AC_MSG_NOTICE([kdreg2 monitor disabled])],
[AS_IF([test $have_kdreg2 -ne 0],
[AC_MSG_NOTICE([kdreg2 present and enabled])])])

AC_DEFINE_UNQUOTED(HAVE_KDREG2, [$have_kdreg2],
[Define to 1 if kdreg2.h is available.])

AC_DEFINE_UNQUOTED(HAVE_KDREG2_INCLUDE_PATH, [$have_kdreg2_include_path],
[Define to 1 if kdreg2.h path is not <linux/kdreg2.h>.])

AC_DEFINE_UNQUOTED(HAVE_KDREG2_MONITOR, [$have_kdreg2],
[Define to 1 to enable kdreg2 memory monitor])

dnl Check support to intercept syscalls
AC_CHECK_HEADERS_ONCE(elf.h sys/auxv.h)

Expand Down Expand Up @@ -888,16 +935,18 @@ AC_DEFINE_UNQUOTED(ENABLE_UFFD_MONITOR, [$enable_uffd],
default_monitor=""
bad_default="0"
AC_ARG_WITH([default-monitor],
[AS_HELP_STRING([--with-default-monitor=<memhooks|uffd|disabled>],
[AS_HELP_STRING([--with-default-monitor=<memhooks|uffd|kdreg2|disabled>],
[Select the default memory monitor.])],
[AS_CASE([$with_default_monitor],
[memhooks],[default_monitor=memhooks],
[uffd],[default_monitor=uffd],
[kdreg2],[default_monitor=kdreg2]
[disabled], [default_monitor=disabled],
[AC_MSG_ERROR([Unknown monitor specified: $with_default_monitor. Choices are memhooks, uffd, or disabled.])])
AS_CASE([$default_monitor],
[memhooks], [AS_IF([test "$enable_memhooks" != "1"], [bad_default=1])],
[uffd], [AS_IF([test "$enable_uffd" != "1"], [bad_default=1])],
[kdreg2], [AS_IF([test "$kdreg2_enabled" != "1"], [bad_default=1])],
[])
AS_IF([test "$bad_default" != "0"],
[AC_MSG_ERROR(["Default memory monitor is not available: $default_monitor."])])
Expand Down
38 changes: 36 additions & 2 deletions include/ofi_mr.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2017-2019 Intel Corporation, Inc. All rights reserved.
* Copyright (c) 2019-2021 Amazon.com, Inc. or its affiliates.
* All rights reserved.
* (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* (C) Copyright 2020-2023 Hewlett Packard Enterprise Development LP
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
Expand Down Expand Up @@ -40,6 +40,8 @@
# include <config.h>
#endif /* HAVE_CONFIG_H */

struct ofi_mr;

#include <inttypes.h>
#include <stdbool.h>

Expand All @@ -48,6 +50,15 @@
#include <ofi_lock.h>
#include <ofi_list.h>
#include <ofi_tree.h>
#include <ofi_hmem.h>

#if HAVE_KDREG2_MONITOR
#if HAVE_KDREG2_INCLUDE_PATH
#include "kdreg2.h"
#else
#include "linux/kdreg2.h"
#endif
#endif

int ofi_open_mr_cache(uint32_t version, void *attr, size_t attr_len,
uint64_t flags, struct fid **fid, void *context);
Expand Down Expand Up @@ -128,6 +139,12 @@ struct ofi_mr_cache;
union ofi_mr_hmem_info {
uint64_t cuda_id;
uint64_t ze_id;
#if HAVE_KDREG2_MONITOR
struct {
kdreg2_cookie_t cookie;
struct kdreg2_monitoring_params monitoring_params;
} kdreg2;
#endif
};

struct ofi_mr_entry {
Expand Down Expand Up @@ -229,6 +246,23 @@ struct ofi_memhooks {

extern struct ofi_mem_monitor *memhooks_monitor;

/*
* Kdreg2 monitor
*/

struct kdreg2_status_data;

struct ofi_kdreg2 {
struct ofi_mem_monitor monitor;
pthread_t thread;
int fd;
int exit_pipe[2];
const struct kdreg2_status_data *status_data;
ofi_atomic64_t next_cookie;
};

extern struct ofi_mem_monitor *kdreg2_monitor;

extern struct ofi_mem_monitor *cuda_monitor;
extern struct ofi_mem_monitor *cuda_ipc_monitor;
extern struct ofi_mem_monitor *rocr_monitor;
Expand Down Expand Up @@ -368,7 +402,7 @@ struct ofi_mr_cache {
struct ofi_rbmap tree;
struct dlist_entry lru_list;
struct dlist_entry dead_region_list;
pthread_mutex_t lock;
pthread_mutex_t lock;

size_t cached_cnt;
size_t cached_size;
Expand Down
Loading

0 comments on commit 210a853

Please sign in to comment.