diff --git a/rocrtst/suites/aie/aie_hsa_dispatch_test.cc b/rocrtst/suites/aie/aie_hsa_dispatch_test.cc index 5d54d35ac..5da23d3a7 100644 --- a/rocrtst/suites/aie/aie_hsa_dispatch_test.cc +++ b/rocrtst/suites/aie/aie_hsa_dispatch_test.cc @@ -17,6 +17,8 @@ #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" +constexpr int NUM_WRAP_AROUNDS = 256; + namespace { hsa_status_t get_agent(hsa_agent_t agent, std::vector *agents, @@ -194,7 +196,7 @@ int main(int argc, char **argv) { uint32_t aie_max_queue_size; r = hsa_agent_get_info(aie_agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &aie_max_queue_size); assert(r == HSA_STATUS_SUCCESS); - int num_pkts = aie_max_queue_size; + int num_pkts = aie_max_queue_size * NUM_WRAP_AROUNDS; // Load the DPU and PDI files into a global pool that doesn't support kernel // args (DEV BO). @@ -237,70 +239,76 @@ int main(int argc, char **argv) { uint64_t wr_idx = 0; uint64_t packet_id = 0; + uint32_t pkt_iter = 0; + + for (int wrap_around = 0; wrap_around < NUM_WRAP_AROUNDS; wrap_around++) { + for (int queue_iter = 0; queue_iter < aie_max_queue_size; queue_iter++) { + r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0, + reinterpret_cast(&input[pkt_iter])); + assert(r == HSA_STATUS_SUCCESS); + r = hsa_amd_get_handle_from_vaddr(input[pkt_iter], &input_handle[pkt_iter]); + assert(r == HSA_STATUS_SUCCESS); + assert(input_handle[pkt_iter] != 0); + + r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0, + reinterpret_cast(&output[pkt_iter])); + assert(r == HSA_STATUS_SUCCESS); + r = hsa_amd_get_handle_from_vaddr(output[pkt_iter], &output_handle[pkt_iter]); + assert(r == HSA_STATUS_SUCCESS); + assert(output_handle[pkt_iter] != 0); + + for (std::size_t i = 0; i < num_data_elements; i++) { + *(input[pkt_iter] + i) = i * (pkt_iter + 1); + *(output[pkt_iter] + i) = 0xDEFACE; + } - for (int pkt_iter = 0; pkt_iter < num_pkts; pkt_iter++) { - r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0, - reinterpret_cast(&input[pkt_iter])); - assert(r == HSA_STATUS_SUCCESS); - r = hsa_amd_get_handle_from_vaddr(input[pkt_iter], &input_handle[pkt_iter]); - assert(r == HSA_STATUS_SUCCESS); - assert(input_handle[pkt_iter] != 0); - - r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0, - reinterpret_cast(&output[pkt_iter])); - assert(r == HSA_STATUS_SUCCESS); - r = hsa_amd_get_handle_from_vaddr(output[pkt_iter], &output_handle[pkt_iter]); - assert(r == HSA_STATUS_SUCCESS); - assert(output_handle[pkt_iter] != 0); - - for (std::size_t i = 0; i < num_data_elements; i++) { - *(input[pkt_iter] + i) = i * (pkt_iter + 1); - *(output[pkt_iter] + i) = 0xDEFACE; + // Getting a slot in the queue + wr_idx = hsa_queue_add_write_index_relaxed(aie_queue, 1); + packet_id = wr_idx % aie_queue->size; + + // Creating a packet to store the command + hsa_amd_aie_ert_packet_t *cmd_pkt = static_cast( + aie_queue->base_address) + packet_id; + assert(r == HSA_STATUS_SUCCESS); + cmd_pkt->state = HSA_AMD_AIE_ERT_STATE_NEW; + cmd_pkt->count = 0xA; // # of arguments to put in command + cmd_pkt->opcode = HSA_AMD_AIE_ERT_START_CU; + cmd_pkt->header.AmdFormat = HSA_AMD_PACKET_TYPE_AIE_ERT; + cmd_pkt->header.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC + << HSA_PACKET_HEADER_TYPE; + + // Creating the payload for the packet + hsa_amd_aie_ert_start_kernel_data_t *cmd_payload = NULL; + assert(r == HSA_STATUS_SUCCESS); + r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, 64, 0, + reinterpret_cast(&cmd_payload)); + assert(r == HSA_STATUS_SUCCESS); + // Selecting the PDI to use with this command + cmd_payload->cu_mask = 0x1; + // Transaction opcode + cmd_payload->data[0] = 0x3; + cmd_payload->data[1] = 0x0; + cmd_payload->data[2] = instr_handle; + cmd_payload->data[3] = 0x0; + cmd_payload->data[4] = num_instr; + cmd_payload->data[5] = input_handle[pkt_iter]; + cmd_payload->data[6] = 0; + cmd_payload->data[7] = output_handle[pkt_iter]; + cmd_payload->data[8] = 0; + cmd_pkt->payload_data = reinterpret_cast(cmd_payload); + + // Keeping track of payloads so we can free them at the end + cmd_payloads[pkt_iter] = cmd_payload; + + // Updating out pkt count + pkt_iter++; } - // Getting a slot in the queue - wr_idx = hsa_queue_add_write_index_relaxed(aie_queue, 1); - packet_id = wr_idx % aie_queue->size; - - // Creating a packet to store the command - hsa_amd_aie_ert_packet_t *cmd_pkt = static_cast( - aie_queue->base_address) + packet_id; - assert(r == HSA_STATUS_SUCCESS); - cmd_pkt->state = HSA_AMD_AIE_ERT_STATE_NEW; - cmd_pkt->count = 0xA; // # of arguments to put in command - cmd_pkt->opcode = HSA_AMD_AIE_ERT_START_CU; - cmd_pkt->header.AmdFormat = HSA_AMD_PACKET_TYPE_AIE_ERT; - cmd_pkt->header.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC - << HSA_PACKET_HEADER_TYPE; - - // Creating the payload for the packet - hsa_amd_aie_ert_start_kernel_data_t *cmd_payload = NULL; - assert(r == HSA_STATUS_SUCCESS); - r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, 64, 0, - reinterpret_cast(&cmd_payload)); - assert(r == HSA_STATUS_SUCCESS); - // Selecting the PDI to use with this command - cmd_payload->cu_mask = 0x1; - // Transaction opcode - cmd_payload->data[0] = 0x3; - cmd_payload->data[1] = 0x0; - cmd_payload->data[2] = instr_handle; - cmd_payload->data[3] = 0x0; - cmd_payload->data[4] = num_instr; - cmd_payload->data[5] = input_handle[pkt_iter]; - cmd_payload->data[6] = 0; - cmd_payload->data[7] = output_handle[pkt_iter]; - cmd_payload->data[8] = 0; - cmd_pkt->payload_data = reinterpret_cast(cmd_payload); - - // Keeping track of payloads so we can free them at the end - cmd_payloads[pkt_iter] = cmd_payload; + // Ringing the doorbell to dispatch each packet we added to + // the queue + hsa_signal_store_screlease(aie_queue->doorbell_signal, wr_idx); } - // Ringing the doorbell to dispatch each packet we added to - // the queue - hsa_signal_store_screlease(aie_queue->doorbell_signal, wr_idx); - for (int pkt_iter = 0; pkt_iter < num_pkts; pkt_iter++) { for (std::size_t i = 0; i < num_data_elements; i++) { const auto expected = *(input[pkt_iter] + i) + 1; diff --git a/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h b/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h index 224b85d7c..c39c6bb09 100644 --- a/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h +++ b/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h @@ -164,8 +164,8 @@ class AieAqlQueue : public core::Queue, static hsa_status_t SubmitCmd( uint32_t hw_ctx_handle, int fd, void *queue_base, - uint64_t read_dispatch_id, uint64_t write_dispatch_id, - std::unordered_map &vmem_handle_mappings); + volatile uint64_t &read_dispatch_id, volatile uint64_t write_dispatch_id, + uint64_t queue_size, std::unordered_map &vmem_handle_mappings); /// @brief Creates a command BO and returns a pointer to the memory and // the corresponding handle diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp index 283b5af60..38eb3e3a7 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp @@ -238,7 +238,7 @@ void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) { SubmitCmd(hw_ctx_handle_, fd, amd_queue_.hsa_queue.base_address, amd_queue_.read_dispatch_id, amd_queue_.write_dispatch_id, - vmem_handle_mappings); + amd_queue_.hsa_queue.size, vmem_handle_mappings); } hsa_status_t AieAqlQueue::SyncBos(std::vector &bo_args, int fd) { @@ -333,18 +333,18 @@ hsa_status_t AieAqlQueue::CreateCmd(uint32_t size, uint32_t *handle, } hsa_status_t AieAqlQueue::SubmitCmd( - uint32_t hw_ctx_handle, int fd, void *queue_base, uint64_t read_dispatch_id, - uint64_t write_dispatch_id, + uint32_t hw_ctx_handle, int fd, void *queue_base, volatile uint64_t &read_dispatch_id, + volatile uint64_t write_dispatch_id, uint64_t queue_size, std::unordered_map &vmem_handle_mappings) { uint64_t cur_id = read_dispatch_id; while (cur_id < write_dispatch_id) { hsa_amd_aie_ert_packet_t *pkt = - static_cast(queue_base) + cur_id; - + static_cast(queue_base) + (cur_id % queue_size); // Get the packet header information if (pkt->header.header != HSA_PACKET_TYPE_VENDOR_SPECIFIC || - pkt->header.AmdFormat != HSA_AMD_PACKET_TYPE_AIE_ERT) + pkt->header.AmdFormat != HSA_AMD_PACKET_TYPE_AIE_ERT) { return HSA_STATUS_ERROR; + } // Get the payload information switch (pkt->opcode) { @@ -358,9 +358,16 @@ hsa_status_t AieAqlQueue::SubmitCmd( // packets there are. All can be combined into a single chain. int num_cont_start_cu_pkts = 1; for (int peak_pkt_id = cur_id + 1; peak_pkt_id < write_dispatch_id; peak_pkt_id++) { - if (pkt->opcode != HSA_AMD_AIE_ERT_START_CU) { + hsa_amd_aie_ert_packet_t *peak_pkt = + static_cast(queue_base) + (peak_pkt_id % queue_size); + + // Get the packet header information to make sure the packet is valid + if (peak_pkt->header.header != HSA_PACKET_TYPE_VENDOR_SPECIFIC || + peak_pkt->header.AmdFormat != HSA_AMD_PACKET_TYPE_AIE_ERT || + peak_pkt->opcode != HSA_AMD_AIE_ERT_START_CU) { break; } + num_cont_start_cu_pkts++; } @@ -369,7 +376,7 @@ hsa_status_t AieAqlQueue::SubmitCmd( // Getting the current command packet hsa_amd_aie_ert_packet_t *pkt = - static_cast(queue_base) + pkt_iter; + static_cast(queue_base) + (pkt_iter % queue_size); hsa_amd_aie_ert_start_kernel_data_t *cmd_pkt_payload = reinterpret_cast( pkt->payload_data); @@ -400,6 +407,9 @@ hsa_status_t AieAqlQueue::SubmitCmd( cmd_handles.push_back(cmd_bo_handle); cmds.push_back(cmd); cmd_sizes.push_back(cmd_size); + + // Setting the pkt we just processed to invalid + pkt->header.header = HSA_PACKET_TYPE_INVALID; } // Creating a packet that contains the command chain @@ -474,6 +484,9 @@ hsa_status_t AieAqlQueue::SubmitCmd( } } + // Updating the read_dispatch_id + read_dispatch_id = cur_id; + return HSA_STATUS_SUCCESS; }