Skip to content

Commit

Permalink
Checkpoint restructuring of hardware resource types. (#98)
Browse files Browse the repository at this point in the history
Signed-off-by: Samuel K. Gutierrez <samuel@lanl.gov>
  • Loading branch information
samuelkgutierrez authored Mar 20, 2024
1 parent e63487c commit 8240172
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 144 deletions.
3 changes: 1 addition & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2023 Triad National Security, LLC
# Copyright (c) 2020-2024 Triad National Security, LLC
# All rights reserved.
#
# Copyright (c) 2020-2021 Lawrence Livermore National Security, LLC
Expand Down Expand Up @@ -29,7 +29,6 @@ add_library(
qvi-bbuff-rmi.h
qvi-context.h
qvi-task.h
qvi-devinfo.h
qvi-group.h
qvi-map.h
qvi-scope.h
Expand Down
2 changes: 1 addition & 1 deletion src/qvi-context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/

#include "qvi-common.h" // IWYU pragma: keep

#include "qvi-context.h"
#include "qvi-utils.h"

int
qvi_context_new(
Expand Down
98 changes: 0 additions & 98 deletions src/qvi-devinfo.h

This file was deleted.

38 changes: 19 additions & 19 deletions src/qvi-hwloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
#include "qvi-nvml.h"
#include "qvi-rsmi.h"

static constexpr int pci_bus_id_buff_size = 16;
static constexpr int dev_name_buff_size = 32;
static constexpr int uuid_buff_size = 64;
static constexpr int PCI_BUS_ID_BUFF_SIZE = 16;
static constexpr int DEV_NAME_BUFF_SIZE = 32;
static constexpr int UUID_BUFF_SIZE = 64;

/** Device list type. */
using qvi_hwloc_dev_list_t = std::vector<
Expand Down Expand Up @@ -70,11 +70,11 @@ typedef struct qvi_hwloc_device_s {
/** CUDA/ROCm visible devices ID */
int visdev_id = QVI_HWLOC_DEVICE_INVISIBLE_ID;
/** Device name */
char name[dev_name_buff_size] = {'\0'};
char name[DEV_NAME_BUFF_SIZE] = {'\0'};
/** PCI bus ID */
char pci_bus_id[pci_bus_id_buff_size] = {'\0'};
char pci_bus_id[PCI_BUS_ID_BUFF_SIZE] = {'\0'};
/** UUID */
char uuid[uuid_buff_size] = {'\0'};
char uuid[UUID_BUFF_SIZE] = {'\0'};
/** Constructor */
qvi_hwloc_device_s(void)
{
Expand Down Expand Up @@ -377,19 +377,19 @@ set_general_device_info(
// Save device name.
int nw = snprintf(
device->name,
dev_name_buff_size,
DEV_NAME_BUFF_SIZE,
"%s", obj->name
);
if (nw >= dev_name_buff_size) {
if (nw >= DEV_NAME_BUFF_SIZE) {
return QV_ERR_INTERNAL;
}
// Set the PCI bus ID.
nw = snprintf(
device->pci_bus_id,
pci_bus_id_buff_size,
PCI_BUS_ID_BUFF_SIZE,
"%s", pci_bus_id
);
if (nw >= pci_bus_id_buff_size) {
if (nw >= PCI_BUS_ID_BUFF_SIZE) {
return QV_ERR_INTERNAL;
}
// Set visible device ID, if applicable.
Expand All @@ -407,10 +407,10 @@ set_gpu_device_info(
if (sscanf(obj->name, "rsmi%d", &id) == 1) {
device->smi = id;
int nw = snprintf(
device->uuid, uuid_buff_size, "%s",
device->uuid, UUID_BUFF_SIZE, "%s",
hwloc_obj_get_info_by_name(obj, "AMDUUID")
);
if (nw >= uuid_buff_size) {
if (nw >= UUID_BUFF_SIZE) {
return QV_ERR_INTERNAL;
}
return qvi_hwloc_rsmi_get_device_cpuset_by_device_id(
Expand All @@ -423,10 +423,10 @@ set_gpu_device_info(
if (sscanf(obj->name, "nvml%d", &id) == 1) {
device->smi = id;
int nw = snprintf(
device->uuid, uuid_buff_size, "%s",
device->uuid, UUID_BUFF_SIZE, "%s",
hwloc_obj_get_info_by_name(obj, "NVIDIAUUID")
);
if (nw >= uuid_buff_size) {
if (nw >= UUID_BUFF_SIZE) {
return QV_ERR_INTERNAL;
}
return qvi_hwloc_nvml_get_device_cpuset_by_pci_bus_id(
Expand All @@ -446,11 +446,11 @@ set_of_device_info(
) {
// TODO(skg) Get cpuset, if available.
int nw = snprintf(
device->uuid, uuid_buff_size, "%s",
device->uuid, UUID_BUFF_SIZE, "%s",
hwloc_obj_get_info_by_name(obj, "NodeGUID")
);
// Internal error because our buffer is too small.
if (nw >= uuid_buff_size) return QV_ERR_INTERNAL;
if (nw >= UUID_BUFF_SIZE) return QV_ERR_INTERNAL;
return QV_SUCCESS;
}

Expand All @@ -471,7 +471,7 @@ discover_all_devices(
continue;
}
// Try to get the PCI object.
char busid[pci_bus_id_buff_size] = {'\0'};
char busid[PCI_BUS_ID_BUFF_SIZE] = {'\0'};
hwloc_obj_t pci_obj = get_pci_busid(obj, busid, sizeof(busid));
if (!pci_obj) continue;
// Have we seen this device already? For example, opencl0d0 and cuda0
Expand Down Expand Up @@ -509,7 +509,7 @@ discover_gpu_devices(
continue;
}
// Try to get the PCI object.
char busid[pci_bus_id_buff_size] = {'\0'};
char busid[PCI_BUS_ID_BUFF_SIZE] = {'\0'};
hwloc_obj_t pci_obj = get_pci_busid(obj, busid, sizeof(busid));
if (!pci_obj) continue;

Expand Down Expand Up @@ -573,7 +573,7 @@ discover_nic_devices(
while ((obj = hwloc_get_next_osdev(hwl->topo, obj)) != nullptr) {
if (obj->attr->osdev.type != HWLOC_OBJ_OSDEV_OPENFABRICS) continue;
// Try to get the PCI object.
char busid[pci_bus_id_buff_size] = {'\0'};
char busid[PCI_BUS_ID_BUFF_SIZE] = {'\0'};
hwloc_obj_t pci_obj = get_pci_busid(obj, busid, sizeof(busid));
if (!pci_obj) continue;

Expand Down
31 changes: 23 additions & 8 deletions src/qvi-hwpool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@
// is zero, then the resource is not in use. For devices, we can take a similar
// approach using the device IDs instead of the bit positions.

#include "qvi-common.h"

#include "qvi-hwpool.h"
#include "qvi-hwloc.h"
#include "qvi-utils.h"
Expand Down Expand Up @@ -93,14 +91,13 @@ struct qvi_hwpool_cpus_s : qvi_hwpool_resource_s {
int qvim_rc = QV_ERR_INTERNAL;
/** The cpuset of the maintained CPUs. */
qvi_hwloc_bitmap_t cpuset;

/** Constructor */
qvi_hwpool_cpus_s(void)
{
qvim_rc = qvi_construct_rc(cpuset);
}

virtual
~qvi_hwpool_cpus_s(void) = default;
/** Destructor */
virtual ~qvi_hwpool_cpus_s(void) = default;
};

struct qvi_hwpool_s {
Expand All @@ -109,7 +106,6 @@ struct qvi_hwpool_s {
qvi_hwpool_cpus_s cpus;
/** Device information. */
qvi_hwpool_devinfos_t devinfos;
// TODO(skg) Add owner to structure?
/** The obtained cpuset of this resource pool. */
hwloc_bitmap_t obcpuset = nullptr;

Expand Down Expand Up @@ -248,7 +244,7 @@ qvi_hwpool_add_device(
cstr_t uuid,
hwloc_const_cpuset_t affinity
) {
auto dinfo = std::make_shared<qvi_devinfo_t>(
auto dinfo = std::make_shared<qvi_hwpool_devinfo_s>(
type, id, pcibid, uuid, affinity
);
const int rc = qvi_construct_rc(dinfo);
Expand Down Expand Up @@ -485,6 +481,25 @@ qvi_hwpool_unpack(
return rc;
}

/**
* Extend namespace std so we can easily add qvi_devinfo_ts to
* unordered_sets.
*/
namespace std {
template <>
struct hash<qvi_hwpool_devinfo_s>
{
size_t
operator()(const qvi_hwpool_devinfo_s &x) const
{
const int a = x.id;
const int b = (int)x.type;
const int64_t c = qvi_cantor_pairing(a, b);
return hash<int64_t>()(c);
}
};
}

/*
* vim: ft=cpp ts=4 sts=4 sw=4 expandtab
*/
57 changes: 54 additions & 3 deletions src/qvi-hwpool.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* -*- Mode: C++; c-basic-offset:4; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2022 Triad National Security, LLC
* Copyright (c) 2022-2024 Triad National Security, LLC
* All rights reserved.
*
* This file is part of the quo-vadis project. See the LICENSE file at the
Expand All @@ -17,11 +17,62 @@
#define QVI_HWPOOL_H

#include "qvi-common.h"
#include "qvi-devinfo.h"
#include "qvi-line.h"

/** Device information. */
struct qvi_hwpool_devinfo_s {
int qvim_rc = QV_ERR_INTERNAL;
/** Device type. */
qv_hw_obj_type_t type = QV_HW_OBJ_LAST;
/** Device ID. */
int id = 0;
/** The PCI bus ID. */
char *pci_bus_id = nullptr;
/** UUID */
char *uuid = nullptr;
/** The bitmap encoding CPU affinity. */
hwloc_bitmap_t affinity = nullptr;
/** Constructor */
qvi_hwpool_devinfo_s(
qv_hw_obj_type_t t,
int i,
cstr_t pci_bus_id,
cstr_t uuid,
hwloc_const_cpuset_t c
) : type(t)
, id(i)
{
int nw = asprintf(&this->pci_bus_id, "%s", pci_bus_id);
if (nw == -1) {
qvim_rc = QV_ERR_OOR;
return;
}

nw = asprintf(&this->uuid, "%s", uuid);
if (nw == -1) {
qvim_rc = QV_ERR_OOR;
return;
}

qvim_rc = qvi_hwloc_bitmap_dup(c, &affinity);
}
/** Destructor */
~qvi_hwpool_devinfo_s(void)
{
qvi_hwloc_bitmap_free(&affinity);
free(pci_bus_id);
free(uuid);
}
/** Equality operator. */
bool
operator==(const qvi_hwpool_devinfo_s &x) const
{
return id == x.id && type == x.type;
}
};

using qvi_hwpool_devinfos_t = std::multimap<
int, std::shared_ptr<qvi_devinfo_t>
int, std::shared_ptr<qvi_hwpool_devinfo_s>
>;

struct qvi_hwpool_s;
Expand Down
2 changes: 1 addition & 1 deletion src/qvi-macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ do { \
} while (0)

/**
* Convenience wrapper around new(std::nothrow).
* Convenience macro for new(std::nothrow).
*/
#define qvi_new new(std::nothrow)

Expand Down
Loading

0 comments on commit 8240172

Please sign in to comment.