Skip to content
This repository has been archived by the owner on Dec 24, 2024. It is now read-only.

Fixing issue when user wraps around queue #30

Open
wants to merge 2 commits into
base: iree-aie
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 68 additions & 60 deletions rocrtst/suites/aie/aie_hsa_dispatch_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

constexpr int NUM_WRAP_AROUNDS = 256;

namespace {

hsa_status_t get_agent(hsa_agent_t agent, std::vector<hsa_agent_t> *agents,
Expand Down Expand Up @@ -194,7 +196,7 @@ int main(int argc, char **argv) {
uint32_t aie_max_queue_size;
r = hsa_agent_get_info(aie_agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &aie_max_queue_size);
assert(r == HSA_STATUS_SUCCESS);
int num_pkts = aie_max_queue_size;
int num_pkts = aie_max_queue_size * NUM_WRAP_AROUNDS;

// Load the DPU and PDI files into a global pool that doesn't support kernel
// args (DEV BO).
Expand Down Expand Up @@ -237,70 +239,76 @@ int main(int argc, char **argv) {

uint64_t wr_idx = 0;
uint64_t packet_id = 0;
uint32_t pkt_iter = 0;

for (int wrap_around = 0; wrap_around < NUM_WRAP_AROUNDS; wrap_around++) {
for (int queue_iter = 0; queue_iter < aie_max_queue_size; queue_iter++) {
r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0,
reinterpret_cast<void **>(&input[pkt_iter]));
assert(r == HSA_STATUS_SUCCESS);
r = hsa_amd_get_handle_from_vaddr(input[pkt_iter], &input_handle[pkt_iter]);
assert(r == HSA_STATUS_SUCCESS);
assert(input_handle[pkt_iter] != 0);

r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0,
reinterpret_cast<void **>(&output[pkt_iter]));
assert(r == HSA_STATUS_SUCCESS);
r = hsa_amd_get_handle_from_vaddr(output[pkt_iter], &output_handle[pkt_iter]);
assert(r == HSA_STATUS_SUCCESS);
assert(output_handle[pkt_iter] != 0);

for (std::size_t i = 0; i < num_data_elements; i++) {
*(input[pkt_iter] + i) = i * (pkt_iter + 1);
*(output[pkt_iter] + i) = 0xDEFACE;
}

for (int pkt_iter = 0; pkt_iter < num_pkts; pkt_iter++) {
r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0,
reinterpret_cast<void **>(&input[pkt_iter]));
assert(r == HSA_STATUS_SUCCESS);
r = hsa_amd_get_handle_from_vaddr(input[pkt_iter], &input_handle[pkt_iter]);
assert(r == HSA_STATUS_SUCCESS);
assert(input_handle[pkt_iter] != 0);

r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0,
reinterpret_cast<void **>(&output[pkt_iter]));
assert(r == HSA_STATUS_SUCCESS);
r = hsa_amd_get_handle_from_vaddr(output[pkt_iter], &output_handle[pkt_iter]);
assert(r == HSA_STATUS_SUCCESS);
assert(output_handle[pkt_iter] != 0);

for (std::size_t i = 0; i < num_data_elements; i++) {
*(input[pkt_iter] + i) = i * (pkt_iter + 1);
*(output[pkt_iter] + i) = 0xDEFACE;
// Getting a slot in the queue
wr_idx = hsa_queue_add_write_index_relaxed(aie_queue, 1);
packet_id = wr_idx % aie_queue->size;

// Creating a packet to store the command
hsa_amd_aie_ert_packet_t *cmd_pkt = static_cast<hsa_amd_aie_ert_packet_t *>(
aie_queue->base_address) + packet_id;
assert(r == HSA_STATUS_SUCCESS);
cmd_pkt->state = HSA_AMD_AIE_ERT_STATE_NEW;
cmd_pkt->count = 0xA; // # of arguments to put in command
cmd_pkt->opcode = HSA_AMD_AIE_ERT_START_CU;
cmd_pkt->header.AmdFormat = HSA_AMD_PACKET_TYPE_AIE_ERT;
cmd_pkt->header.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC
<< HSA_PACKET_HEADER_TYPE;

// Creating the payload for the packet
hsa_amd_aie_ert_start_kernel_data_t *cmd_payload = NULL;
assert(r == HSA_STATUS_SUCCESS);
r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, 64, 0,
reinterpret_cast<void **>(&cmd_payload));
assert(r == HSA_STATUS_SUCCESS);
// Selecting the PDI to use with this command
cmd_payload->cu_mask = 0x1;
// Transaction opcode
cmd_payload->data[0] = 0x3;
cmd_payload->data[1] = 0x0;
cmd_payload->data[2] = instr_handle;
cmd_payload->data[3] = 0x0;
cmd_payload->data[4] = num_instr;
cmd_payload->data[5] = input_handle[pkt_iter];
cmd_payload->data[6] = 0;
cmd_payload->data[7] = output_handle[pkt_iter];
cmd_payload->data[8] = 0;
cmd_pkt->payload_data = reinterpret_cast<uint64_t>(cmd_payload);

// Keeping track of payloads so we can free them at the end
cmd_payloads[pkt_iter] = cmd_payload;

// Updating out pkt count
pkt_iter++;
}

// Getting a slot in the queue
wr_idx = hsa_queue_add_write_index_relaxed(aie_queue, 1);
packet_id = wr_idx % aie_queue->size;

// Creating a packet to store the command
hsa_amd_aie_ert_packet_t *cmd_pkt = static_cast<hsa_amd_aie_ert_packet_t *>(
aie_queue->base_address) + packet_id;
assert(r == HSA_STATUS_SUCCESS);
cmd_pkt->state = HSA_AMD_AIE_ERT_STATE_NEW;
cmd_pkt->count = 0xA; // # of arguments to put in command
cmd_pkt->opcode = HSA_AMD_AIE_ERT_START_CU;
cmd_pkt->header.AmdFormat = HSA_AMD_PACKET_TYPE_AIE_ERT;
cmd_pkt->header.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC
<< HSA_PACKET_HEADER_TYPE;

// Creating the payload for the packet
hsa_amd_aie_ert_start_kernel_data_t *cmd_payload = NULL;
assert(r == HSA_STATUS_SUCCESS);
r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, 64, 0,
reinterpret_cast<void **>(&cmd_payload));
assert(r == HSA_STATUS_SUCCESS);
// Selecting the PDI to use with this command
cmd_payload->cu_mask = 0x1;
// Transaction opcode
cmd_payload->data[0] = 0x3;
cmd_payload->data[1] = 0x0;
cmd_payload->data[2] = instr_handle;
cmd_payload->data[3] = 0x0;
cmd_payload->data[4] = num_instr;
cmd_payload->data[5] = input_handle[pkt_iter];
cmd_payload->data[6] = 0;
cmd_payload->data[7] = output_handle[pkt_iter];
cmd_payload->data[8] = 0;
cmd_pkt->payload_data = reinterpret_cast<uint64_t>(cmd_payload);

// Keeping track of payloads so we can free them at the end
cmd_payloads[pkt_iter] = cmd_payload;
// Ringing the doorbell to dispatch each packet we added to
// the queue
hsa_signal_store_screlease(aie_queue->doorbell_signal, wr_idx);
}

// Ringing the doorbell to dispatch each packet we added to
// the queue
hsa_signal_store_screlease(aie_queue->doorbell_signal, wr_idx);

for (int pkt_iter = 0; pkt_iter < num_pkts; pkt_iter++) {
for (std::size_t i = 0; i < num_data_elements; i++) {
const auto expected = *(input[pkt_iter] + i) + 1;
Expand Down
4 changes: 2 additions & 2 deletions runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ class AieAqlQueue : public core::Queue,

static hsa_status_t SubmitCmd(
uint32_t hw_ctx_handle, int fd, void *queue_base,
uint64_t read_dispatch_id, uint64_t write_dispatch_id,
std::unordered_map<uint32_t, void *> &vmem_handle_mappings);
volatile uint64_t &read_dispatch_id, volatile uint64_t write_dispatch_id,
uint64_t queue_size, std::unordered_map<uint32_t, void *> &vmem_handle_mappings);

/// @brief Creates a command BO and returns a pointer to the memory and
// the corresponding handle
Expand Down
29 changes: 21 additions & 8 deletions runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) {

SubmitCmd(hw_ctx_handle_, fd, amd_queue_.hsa_queue.base_address,
amd_queue_.read_dispatch_id, amd_queue_.write_dispatch_id,
vmem_handle_mappings);
amd_queue_.hsa_queue.size, vmem_handle_mappings);
}

hsa_status_t AieAqlQueue::SyncBos(std::vector<uint32_t> &bo_args, int fd) {
Expand Down Expand Up @@ -333,18 +333,18 @@ hsa_status_t AieAqlQueue::CreateCmd(uint32_t size, uint32_t *handle,
}

hsa_status_t AieAqlQueue::SubmitCmd(
uint32_t hw_ctx_handle, int fd, void *queue_base, uint64_t read_dispatch_id,
uint64_t write_dispatch_id,
uint32_t hw_ctx_handle, int fd, void *queue_base, volatile uint64_t &read_dispatch_id,
volatile uint64_t write_dispatch_id, uint64_t queue_size,
std::unordered_map<uint32_t, void *> &vmem_handle_mappings) {
uint64_t cur_id = read_dispatch_id;
while (cur_id < write_dispatch_id) {
hsa_amd_aie_ert_packet_t *pkt =
static_cast<hsa_amd_aie_ert_packet_t *>(queue_base) + cur_id;

static_cast<hsa_amd_aie_ert_packet_t *>(queue_base) + (cur_id % queue_size);
// Get the packet header information
if (pkt->header.header != HSA_PACKET_TYPE_VENDOR_SPECIFIC ||
pkt->header.AmdFormat != HSA_AMD_PACKET_TYPE_AIE_ERT)
pkt->header.AmdFormat != HSA_AMD_PACKET_TYPE_AIE_ERT) {
return HSA_STATUS_ERROR;
}

// Get the payload information
switch (pkt->opcode) {
Expand All @@ -358,9 +358,16 @@ hsa_status_t AieAqlQueue::SubmitCmd(
// packets there are. All can be combined into a single chain.
int num_cont_start_cu_pkts = 1;
for (int peak_pkt_id = cur_id + 1; peak_pkt_id < write_dispatch_id; peak_pkt_id++) {
if (pkt->opcode != HSA_AMD_AIE_ERT_START_CU) {
hsa_amd_aie_ert_packet_t *peak_pkt =
static_cast<hsa_amd_aie_ert_packet_t *>(queue_base) + (peak_pkt_id % queue_size);

// Get the packet header information to make sure the packet is valid
if (peak_pkt->header.header != HSA_PACKET_TYPE_VENDOR_SPECIFIC ||
peak_pkt->header.AmdFormat != HSA_AMD_PACKET_TYPE_AIE_ERT ||
peak_pkt->opcode != HSA_AMD_AIE_ERT_START_CU) {
break;
}

num_cont_start_cu_pkts++;
}

Expand All @@ -369,7 +376,7 @@ hsa_status_t AieAqlQueue::SubmitCmd(

// Getting the current command packet
hsa_amd_aie_ert_packet_t *pkt =
static_cast<hsa_amd_aie_ert_packet_t *>(queue_base) + pkt_iter;
static_cast<hsa_amd_aie_ert_packet_t *>(queue_base) + (pkt_iter % queue_size);
hsa_amd_aie_ert_start_kernel_data_t *cmd_pkt_payload =
reinterpret_cast<hsa_amd_aie_ert_start_kernel_data_t *>(
pkt->payload_data);
Expand Down Expand Up @@ -400,6 +407,9 @@ hsa_status_t AieAqlQueue::SubmitCmd(
cmd_handles.push_back(cmd_bo_handle);
cmds.push_back(cmd);
cmd_sizes.push_back(cmd_size);

// Setting the pkt we just processed to invalid
pkt->header.header = HSA_PACKET_TYPE_INVALID;
}

// Creating a packet that contains the command chain
Expand Down Expand Up @@ -474,6 +484,9 @@ hsa_status_t AieAqlQueue::SubmitCmd(
}
}

// Updating the read_dispatch_id
read_dispatch_id = cur_id;

return HSA_STATUS_SUCCESS;
}

Expand Down
Loading