Skip to content

Commit

Permalink
prov/util: Narrow uffd pagefault handler for non-backed writes only.
Browse files Browse the repository at this point in the history
Page faults come in 3 flavors: reads, writes and writes to protected
pages.  The only ones we can handle are writes to non-backed pages.

NETCASSINI-3603

Signed-off-by: Mike Uttormark <mike.uttormark@hpe.com>
  • Loading branch information
muttormark authored and iziemba committed Oct 9, 2024
1 parent 30c74d4 commit d2b5554
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 77 deletions.
31 changes: 31 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,37 @@ AS_IF([test $have_uffd -eq 1],
AC_DEFINE_UNQUOTED([HAVE_UFFD_UNMAP], [$have_uffd],
[Define to 1 if platform supports userfault fd unmap])

dnl Check uffd thread id support
have_uffd_thread_id=0
AS_IF([test $have_uffd -eq 1],
[AC_MSG_CHECKING([for userfaultfd thread id support])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <sys/types.h>
#include <linux/userfaultfd.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/ioctl.h>
]],
[[
int fd;
struct uffdio_api api_obj;
api_obj.api = UFFD_API;
api_obj.features = UFFD_FEATURE_THREAD_ID |
UFFD_FEATURE_EVENT_UNMAP |
UFFD_FEATURE_EVENT_REMOVE |
UFFD_FEATURE_EVENT_REMAP;
fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
return ioctl(fd, UFFDIO_API, &api_obj);
]])
],
[AC_MSG_RESULT([yes])
have_uffd_thread_id=1],
[AC_MSG_RESULT([no])])])

AC_DEFINE_UNQUOTED([HAVE_UFFD_THREAD_ID], [$have_uffd_thread_id],
[Define to 1 if platform supports userfault fd thread id])

dnl restricted DL open
restricted_dl=0
AC_ARG_ENABLE([restricted_dl],
Expand Down
189 changes: 112 additions & 77 deletions prov/util/src/util_mem_monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2017-2021 Intel Inc. All rights reserved.
* Copyright (c) 2019-2021 Amazon.com, Inc. or its affiliates.
* All rights reserved.
* (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* (C) Copyright 2020-2024 Hewlett Packard Enterprise Development LP
* Copyright (C) 2024 Cornelis Networks. All rights reserved.
*
* This software is available to you under a choice of one of two
Expand Down Expand Up @@ -554,6 +554,8 @@ void ofi_monitor_unsubscribe_no_op(struct ofi_mem_monitor *notifier,
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>

static void ofi_uffd_pagefault_handler(struct uffd_msg *msg);

/* The userfault fd monitor requires for events that could
* trigger it to be handled outside of the monitor functions
* itself. When a fault occurs on a monitored region, the
Expand All @@ -566,12 +568,8 @@ void ofi_monitor_unsubscribe_no_op(struct ofi_mem_monitor *notifier,
static void *ofi_uffd_handler(void *arg)
{
struct uffd_msg msg;
struct uffdio_zeropage zp;
struct pollfd fds;
int ret;
int i;
void *address;
bool found;

fds.fd = uffd.fd;
fds.events = POLLIN;
Expand Down Expand Up @@ -612,78 +610,7 @@ static void *ofi_uffd_handler(void *arg)
(size_t) msg.arg.remap.len);
break;
case UFFD_EVENT_PAGEFAULT:

/* The event tells us the address of the fault
* (which can be anywhere on the page). It does not
* tell us the size of the page so we have to guess
* from the list of known page_sizes.
*
* We employ the standard resolution: install a zeroed page.
*/

address = (void *) (uintptr_t) msg.arg.pagefault.address;
found = false;

for (i = 0; i < num_page_sizes; ) {
/* setup a zeropage reqest for this pagesize */
zp.range.start = (uint64_t) (uintptr_t)
ofi_get_page_start(address, page_sizes[i]);
zp.range.len = (uint64_t) page_sizes[i];
zp.mode = 0;
zp.zeropage = 0;

ret = ioctl(uffd.fd, UFFDIO_ZEROPAGE, &zp);

if (0 == ret) { /* success */
found = true;
break;
}

/* Note: the documentation (man ioctl_userfaultfd) says
* that the ioctl() returns -1 on error and errno is set
* to indicate the error. It also says that the zeropage
* member of struct uffdio_zeropage is set to the negated
* error. The unit tests for uffd say
* real retval in uffdio_zeropage.zeropage
* so that's what we use here.
*/

if (-EAGAIN == zp.zeropage) {
/* This is a tough case. If the memory map is
* changing, the kernel returns EAGAIN before
* servicing the zeropage request. So the page
* fault has not been rectified. If we don't try
* again, the application will crash. If we add
* a maximum retry count we could still end up
* with an unresolved page fault.
*
* It's likely a kernel bug if it returns EAGAIN
* forever. So we retry until we get a return
* value from the ioctl that is not EAGAIN.
*/
continue;
}
i++;

if (-EINVAL == zp.zeropage) /* wrong page size */
continue;

/* If we get here we failed to install the zeroed
* page for this pagesize and it wasn't a size error.
* We could either stop trying or go on to the
* next pagesize. We choose to print a warning and try
* another pagesize.
*/

FI_DBG(&core_prov, FI_LOG_MR,
"Unable to install zeroed page of size %lu to rectify page fault."
" address = %p zeropage = %lld errno = %d\n",
page_sizes[i], address, zp.zeropage, errno);
}
if (!found)
FI_WARN(&core_prov, FI_LOG_MR,
"Unable to handle event UFFD_EVENT_PAGEFAULT for address %p.\n",
address);
ofi_uffd_pagefault_handler(&msg);
break;
default:
FI_WARN(&core_prov, FI_LOG_MR,
Expand All @@ -696,6 +623,114 @@ static void *ofi_uffd_handler(void *arg)
return NULL;
}

static void ofi_uffd_pagefault_handler(struct uffd_msg *msg)
{
struct uffdio_zeropage zp;
int i;
int ret;
void * const address = (void *) (uintptr_t) msg->arg.pagefault.address;
uint64_t const flags = (uint64_t) msg->arg.pagefault.flags;
#if HAVE_UFFD_THREAD_ID
uint32_t const ptid = (uint32_t) msg->arg.pagefault.feat.ptid;
#endif
/* ofi_uffd_register sets the mode to
* UFFDIO_REGISTER_MODE_MISSING. As a result, we can
* get read, write or write-protect notifications via
* UFFD_EVENT_PAGEFAULT. The only ones we can sensibly
* handle are writes to non-backed pages.
* (Read and write-protect nofications are likely
* application bugs.)
*/

if (UFFD_PAGEFAULT_FLAG_WRITE != flags) {
#if HAVE_UFFD_THREAD_ID
FI_WARN(&core_prov, FI_LOG_MR,
"UFFD pagefault with unrecognized flags: %lu, address %p, thread %u\n",
flags, address, ptid);
#else
FI_WARN(&core_prov, FI_LOG_MR,
"UFFD pagefault with unrecognized flags: %lu, address %p\n",
flags, address);
#endif
/* The faulting thread is halted at this point. In
* theory we could wake it up with UFFDIO_WAKE. In
* practice that requires the address range of the
* fault, information we don't have from the
* pagefault event.
*/

return;
}

/* The event tells us the address of the fault
* (which can be anywhere on the page). It does not
* tell us the size of the page so we have to guess
* from the list of known page_sizes.
*
* We employ the standard resolution: install a zeroed page.
*/

for (i = 0; i < num_page_sizes; ) {
/* setup a zeropage reqest for this pagesize */
zp.range.start = (uint64_t) (uintptr_t)
ofi_get_page_start(address, page_sizes[i]);
zp.range.len = (uint64_t) page_sizes[i];
zp.mode = 0;
zp.zeropage = 0;

ret = ioctl(uffd.fd, UFFDIO_ZEROPAGE, &zp);

if (0 == ret) /* success */
return;

/* Note: the documentation (man ioctl_userfaultfd) says
* that the ioctl() returns -1 on error and errno is set
* to indicate the error. It also says that the zeropage
* member of struct uffdio_zeropage is set to the negated
* error. The unit tests for uffd say
* real retval in uffdio_zeropage.zeropage
* so that's what we use here.
*/

if (-EAGAIN == zp.zeropage)
/* This is a tough case. If the memory map is
* changing, the kernel returns EAGAIN before
* installing the zeroed page. So the page
* fault has not been rectified. If we don't try
* again, the application will crash. If we add
* a maximum retry count we could still end up
* with an unresolved page fault.
*
* It's likely a kernel bug or (something else
* bad like OOM) if it returns EAGAIN forever.
* So we retry until we get something besides
* EAGAIN.
*/
continue; /* retry this page size */

i++; /* try next page size */

if (-EINVAL == zp.zeropage) /* wrong page size */
continue;

/* If we get here we failed to install the zeroed
* page for this page size and it wasn't a size error.
* We could either stop trying or go on to the
* next pagesize. We choose to print a message and try
* another page size.
*/

FI_DBG(&core_prov, FI_LOG_MR,
"Unable to install zeroed page of size %zu to handle page fault."
" address = %p zeropage = %lld errno = %d\n",
page_sizes[i], address, zp.zeropage, errno);
}

FI_WARN(&core_prov, FI_LOG_MR,
"Unable to handle event UFFD_EVENT_PAGEFAULT for address %p.\n",
address);
}

static int ofi_uffd_register(const void *addr, size_t len, size_t page_size)
{
struct uffdio_register reg;
Expand Down

0 comments on commit d2b5554

Please sign in to comment.