From 700572cc1a31544568286262826a55eb3b9b9ccf Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Wed, 18 Dec 2024 10:27:58 -0800 Subject: [PATCH] Adding experimental iree_io_file_map_view API. (#19515) This allows for any iree_io_file_handle_t to be mapped into host memory on platforms where doing so is supported. Eventually this will be replaced with a proper mapping object and view API that will allow us to unify the ELF loading, file_io utils, and parameter handling APIs. --- .../Transforms/ImportParameters.cpp | 4 +- runtime/bindings/python/io.cc | 9 +- runtime/src/iree/base/internal/file_io.c | 4 +- runtime/src/iree/io/file_handle.c | 452 +++++++++++++++++- runtime/src/iree/io/file_handle.h | 103 +++- .../src/iree/io/formats/gguf/gguf_parser.c | 28 +- .../src/iree/io/formats/gguf/gguf_parser.h | 6 +- .../iree/io/formats/gguf/gguf_parser_test.cc | 12 +- .../src/iree/io/formats/irpa/irpa_parser.c | 27 +- .../src/iree/io/formats/irpa/irpa_parser.h | 6 +- .../iree/io/formats/irpa/irpa_parser_test.cc | 12 +- runtime/src/iree/io/formats/parser_registry.c | 9 +- runtime/src/iree/io/formats/parser_registry.h | 5 +- .../formats/safetensors/safetensors_parser.c | 32 +- .../formats/safetensors/safetensors_parser.h | 6 +- .../safetensors/safetensors_parser_test.cc | 9 +- runtime/src/iree/tooling/parameter_util.c | 3 +- 17 files changed, 654 insertions(+), 73 deletions(-) diff --git a/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/ImportParameters.cpp b/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/ImportParameters.cpp index 288495eac3d6..91b6e690819b 100644 --- a/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/ImportParameters.cpp +++ b/compiler/src/iree/compiler/Modules/IO/Parameters/Transforms/ImportParameters.cpp @@ -80,10 +80,12 @@ loadParameterIndex(ModuleOp moduleOp, StringRef path, return failure(); // Parse the archive as a particular format. + iree_allocator_t hostAllocator = iree_allocator_system(); return handleRuntimeError( moduleOp, iree_io_parse_file_index(iree_make_string_view(path.data(), path.size()), - fileHandle->get(), parameterIndex), + fileHandle->get(), parameterIndex, + hostAllocator), "parsing parameter archive"); } diff --git a/runtime/bindings/python/io.cc b/runtime/bindings/python/io.cc index a558cb90a5b0..07218f585a4e 100644 --- a/runtime/bindings/python/io.cc +++ b/runtime/bindings/python/io.cc @@ -98,10 +98,11 @@ void ParameterIndexAddFromFileHandle(ParameterIndex &self, std::string &key, void ParameterIndexParseFileHandle(ParameterIndex &self, FileHandle &file_handle, std::string &format) { - CheckApiStatus(iree_io_parse_file_index( - iree_make_string_view(format.data(), format.size()), - file_handle.raw_ptr(), self.raw_ptr()), - "Could not parse parameter file index"); + CheckApiStatus( + iree_io_parse_file_index( + iree_make_string_view(format.data(), format.size()), + file_handle.raw_ptr(), self.raw_ptr(), iree_allocator_system()), + "Could not parse parameter file index"); } void ParameterIndexLoadFile(ParameterIndex &self, std::string &file_path, diff --git a/runtime/src/iree/base/internal/file_io.c b/runtime/src/iree/base/internal/file_io.c index 8ba489e63477..a0753f09a1fe 100644 --- a/runtime/src/iree/base/internal/file_io.c +++ b/runtime/src/iree/base/internal/file_io.c @@ -466,7 +466,9 @@ iree_status_t iree_file_create_mapped(const char* path, uint64_t file_size, IREE_TRACE_ZONE_BEGIN(z0); iree_file_contents_t* contents = NULL; - iree_allocator_malloc(allocator, sizeof(*contents), (void**)&contents); + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, + iree_allocator_malloc(allocator, sizeof(*contents), (void**)&contents)); contents->allocator = allocator; iree_status_t status = iree_file_create_mapped_platform( diff --git a/runtime/src/iree/io/file_handle.c b/runtime/src/iree/io/file_handle.c index dea7fcaee8b1..40d5f8562b2d 100644 --- a/runtime/src/iree/io/file_handle.c +++ b/runtime/src/iree/io/file_handle.c @@ -12,11 +12,14 @@ #if IREE_FILE_IO_ENABLE #if defined(IREE_PLATFORM_WINDOWS) -#include // _commit +#include // _commit +#include // WerRegisterExcludedMemoryBlock #else -#include // fsync +#include // mmap +#include // fstat +#include // fsync #endif // IREE_PLATFORM_WINDOWS #endif // IREE_FILE_IO_ENABLE @@ -157,6 +160,451 @@ iree_io_file_handle_flush(iree_io_file_handle_t* handle) { return status; } +//===----------------------------------------------------------------------===// +// iree_io_file_mapping_t support +//===----------------------------------------------------------------------===// + +static iree_status_t iree_io_calculate_file_view_range( + uint64_t file_size, uint64_t offset, iree_host_size_t length, + iree_host_size_t* out_adjusted_length) { + *out_adjusted_length = 0; + + // Check if the start of the range runs off the end of the buffer. + if (IREE_UNLIKELY(offset > file_size)) { + return iree_make_status(IREE_STATUS_OUT_OF_RANGE, + "attempted to access an address off the end of the " + "file range (offset=%" PRIu64 ", length=%" PRIhsz + ", file size=%" PRIu64 ")", + offset, length, file_size); + } + + // Calculate the real length adjusted for our region within the allocation. + const iree_host_size_t adjusted_length = + length == IREE_HOST_SIZE_MAX ? file_size - offset : length; + if (adjusted_length == 0) { + // Fine (but silly) to have a zero length. + return iree_ok_status(); + } + + // Check if the end runs over the allocation. + const uint64_t end = offset + adjusted_length - 1; + if (IREE_UNLIKELY(end >= file_size)) { + return iree_make_status(IREE_STATUS_OUT_OF_RANGE, + "attempted to access an address outside of the " + "file range (offset=%" PRIu64 + ", adjusted_length=%" PRIhsz ", end=%" PRIu64 + ", file size=%" PRIu64 ")", + offset, adjusted_length, end, file_size); + } + + *out_adjusted_length = adjusted_length; + return iree_ok_status(); +} + +static iree_status_t iree_io_file_mapping_from_host_allocation( + iree_byte_span_t buffer, uint64_t offset, iree_host_size_t length, + iree_byte_span_t* out_range) { + *out_range = iree_byte_span_empty(); + + iree_host_size_t adjusted_length = 0; + IREE_RETURN_IF_ERROR(iree_io_calculate_file_view_range( + (uint64_t)buffer.data_length, offset, length, &adjusted_length)); + + *out_range = iree_make_byte_span(buffer.data + offset, adjusted_length); + return iree_ok_status(); +} + +#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_IOS) || \ + defined(IREE_PLATFORM_LINUX) || defined(IREE_PLATFORM_MACOS) + +static iree_status_t iree_io_file_handle_to_fd( + iree_io_file_handle_primitive_t primitive, int* out_fd) { + *out_fd = -1; + switch (primitive.type) { + case IREE_IO_FILE_HANDLE_TYPE_FD: + *out_fd = primitive.value.fd; + return iree_ok_status(); + default: + return iree_make_status( + IREE_STATUS_UNIMPLEMENTED, + "no file descriptor available for file handles of type %d", + (int)primitive.type); + } +} + +static iree_status_t iree_io_platform_map_file_view( + iree_io_file_handle_primitive_t primitive, iree_io_file_access_t access, + uint64_t offset, iree_host_size_t length, + iree_io_file_mapping_flags_t flags, void** out_impl, + iree_byte_span_t* out_contents) { + *out_impl = NULL; + *out_contents = iree_byte_span_empty(); + + // Attempt to get a file descriptor from the provided IREE file handle. + int fd = -1; + IREE_RETURN_IF_ERROR(iree_io_file_handle_to_fd(primitive, &fd), + "mapping file handle to file descriptor"); + + // Query file size. We don't support extending/truncating files today and make + // the user do that - we just allow the length to be IREE_HOST_SIZE_MAX to + // indicate the remaining file should be mapped. + struct stat file_stat = {0}; + if (fstat(fd, &file_stat) == -1) { + return iree_make_status(iree_status_code_from_errno(errno), + "unable to query file size"); + } + const uint64_t file_size = file_stat.st_size; + + // Validate and adjust view size if needed. + iree_host_size_t adjusted_length = 0; + IREE_RETURN_IF_ERROR(iree_io_calculate_file_view_range( + file_size, offset, length, &adjusted_length)); + + int prot = 0; + if (iree_all_bits_set(access, IREE_IO_FILE_ACCESS_READ)) { + prot |= PROT_READ; + } + if (iree_all_bits_set(access, IREE_IO_FILE_ACCESS_WRITE)) { + prot |= PROT_WRITE; + } + + int map_flags = 0; + if (iree_all_bits_set(flags, IREE_IO_FILE_MAPPING_FLAG_PRIVATE)) { + map_flags |= MAP_PRIVATE; + } else { + map_flags |= MAP_SHARED; + } +#if defined(MAP_HUGETLB) + if (iree_all_bits_set(flags, IREE_IO_FILE_MAPPING_FLAG_LARGE_PAGES)) { + map_flags |= MAP_HUGETLB; + } +#endif // MAP_HUGETLB + + // Map the memory. + void* ptr = mmap(NULL, adjusted_length, prot, map_flags, fd, offset); + if (ptr == MAP_FAILED) { + return iree_make_status(iree_status_code_from_errno(errno), + "failed to map file handle range %" PRIu64 + "-%" PRIu64 " (%" PRIhsz + " bytes) from file of %" PRIu64 " total bytes", + offset, offset + length, length, file_size); + } + + // Pass hints to the memory manager - informational only. + int advice = 0; + if (iree_all_bits_set(flags, IREE_IO_FILE_MAPPING_FLAG_SEQUENTIAL_ACCESS)) { + advice |= MADV_SEQUENTIAL; + } +#if defined(MADV_DONTDUMP) + if (iree_all_bits_set(flags, IREE_IO_FILE_MAPPING_FLAG_EXCLUDE_FROM_DUMPS)) { + advice |= MADV_DONTDUMP; + } +#endif // MADV_DONTDUMP + if (advice) { + madvise(ptr, adjusted_length, advice); + } + + *out_impl = ptr; + *out_contents = iree_make_byte_span(ptr, adjusted_length); + return iree_ok_status(); +} + +static void iree_io_platform_unmap_file_view(iree_io_file_mapping_flags_t flags, + void* impl, + iree_byte_span_t contents) { + if (impl) { + munmap(impl, (size_t)contents.data_length); + } +} + +#elif defined(IREE_PLATFORM_WINDOWS) + +static iree_status_t iree_io_file_handle_to_win32_handle( + iree_io_file_handle_primitive_t primitive, HANDLE* out_handle) { + *out_handle = INVALID_HANDLE_VALUE; + switch (primitive.type) { + case IREE_IO_FILE_HANDLE_TYPE_FD: + *out_handle = (HANDLE)_get_osfhandle(primitive.value.fd); + if (*out_handle == INVALID_HANDLE_VALUE) { + return iree_make_status( + IREE_STATUS_INVALID_ARGUMENT, + "file descriptor is not backed by a valid Win32 HANDLE"); + } + return iree_ok_status(); + default: + return iree_make_status( + IREE_STATUS_UNIMPLEMENTED, + "no Win32 HANDLE available for file handles of type %d", + (int)primitive.type); + } +} + +static iree_status_t iree_io_platform_map_file_view( + iree_io_file_handle_primitive_t primitive, iree_io_file_access_t access, + uint64_t offset, iree_host_size_t length, + iree_io_file_mapping_flags_t flags, void** out_impl, + iree_byte_span_t* out_contents) { + *out_impl = NULL; + *out_contents = iree_byte_span_empty(); + + // Attempt to get a Win32 HANDLE from the provided IREE file handle. + HANDLE handle = INVALID_HANDLE_VALUE; + IREE_RETURN_IF_ERROR(iree_io_file_handle_to_win32_handle(primitive, &handle), + "mapping file handle to win32 handle"); + + // Query file size. We don't support extending/truncating files today and make + // the user do that - we just allow the length to be IREE_HOST_SIZE_MAX to + // indicate the remaining file should be mapped. + FILE_STANDARD_INFO file_info = {0}; + if (!GetFileInformationByHandleEx(handle, FileStandardInfo, &file_info, + (DWORD)sizeof(file_info))) { + return iree_make_status(iree_status_code_from_win32_error(GetLastError()), + "failed to query file handle information"); + } + const uint64_t file_size = file_info.AllocationSize.QuadPart; + + // Validate and adjust view size if needed. + iree_host_size_t adjusted_length = 0; + IREE_RETURN_IF_ERROR(iree_io_calculate_file_view_range( + file_size, offset, length, &adjusted_length)); + + // Create a file mapping object which will retain the file handle for the + // lifetime of the mapping. + DWORD protect = 0; + if (iree_all_bits_set(access, IREE_IO_FILE_ACCESS_WRITE)) { + protect |= PAGE_READWRITE; + } else if (iree_all_bits_set(access, IREE_IO_FILE_ACCESS_READ)) { + protect |= PAGE_READONLY; + } + if (iree_all_bits_set(flags, IREE_IO_FILE_MAPPING_FLAG_LARGE_PAGES)) { + protect |= SEC_LARGE_PAGES; + } + HANDLE mapping = + CreateFileMappingA(handle, NULL, protect, /*dwMaximumSizeHigh=*/0, + /*dwMaximumSizeLow=*/0, /*lpName=*/NULL); + if (!mapping) { + return iree_make_status(iree_status_code_from_win32_error(GetLastError()), + "failed to create file mapping for file handle"); + } + + // Map the requested range into the virtual address space of the process. + DWORD desired_access = 0; + if (iree_all_bits_set(access, IREE_IO_FILE_ACCESS_READ)) { + desired_access |= FILE_MAP_READ; + } else if (iree_all_bits_set(access, IREE_IO_FILE_ACCESS_WRITE)) { + desired_access |= FILE_MAP_WRITE; + } + LARGE_INTEGER offset_li = {0}; + offset_li.QuadPart = offset; + void* ptr = MapViewOfFileEx(mapping, desired_access, offset_li.HighPart, + offset_li.LowPart, (SIZE_T)adjusted_length, + /*lpBaseAddress=*/NULL); + if (!ptr) { + CloseHandle(mapping); + return iree_make_status( + iree_status_code_from_win32_error(GetLastError()), + "failed to map file handle range %" PRIu64 "-%" PRIu64 " (%" PRIhsz + " bytes) from file of %" PRIu64 " total bytes", + offset, offset + adjusted_length, adjusted_length, file_size); + } + +#if defined(WER_MAX_REGISTERED_ENTRIES) && \ + WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM) + // If the user specified that we should exclude the contents from dumps then + // we need to tell Windows Error Reporting. Unfortunately the API is broken + // and only accepts a DWORD (it was added in Windows 10 **and uses a DWORD for + // size** :facepalm:). This is informational so we just try and maybe fail. + // Note that there's also a very small limit on the number of exclusions + // (WER_MAX_REGISTERED_ENTRIES = 512) so we can't just loop and try to exclude + // 4GB blocks in all cases. We try anyway, though. Maybe this isn't even + // useful - the docs are iffy. Oh well. + if (iree_all_bits_set(flags, IREE_IO_FILE_MAPPING_FLAG_EXCLUDE_FROM_DUMPS)) { + iree_host_size_t bytes_excluded = 0; + iree_host_size_t bytes_remaining = adjusted_length; + while (bytes_remaining > 0) { + const DWORD bytes_to_exclude = iree_min(bytes_remaining, UINT32_MAX); + WerRegisterExcludedMemoryBlock((uint8_t*)ptr + bytes_excluded, + bytes_to_exclude); + bytes_excluded += bytes_to_exclude; + bytes_remaining -= bytes_to_exclude; + } + } +#endif // WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | + // WINAPI_PARTITION_SYSTEM) + + *out_impl = mapping; // transferred to caller + *out_contents = iree_make_byte_span(ptr, adjusted_length); + return iree_ok_status(); +} + +static void iree_io_platform_unmap_file_view(iree_io_file_mapping_flags_t flags, + void* impl, + iree_byte_span_t contents) { + if (contents.data) { + UnmapViewOfFile(contents.data); + } + +#if defined(WER_MAX_REGISTERED_ENTRIES) && \ + WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM) + if (contents.data && + iree_all_bits_set(flags, IREE_IO_FILE_MAPPING_FLAG_EXCLUDE_FROM_DUMPS)) { + WerUnregisterExcludedMemoryBlock(contents.data); + iree_host_size_t bytes_unexcluded = 0; + iree_host_size_t bytes_remaining = contents.data_length; + while (bytes_remaining > 0) { + const DWORD bytes_to_unexclude = iree_min(bytes_remaining, UINT32_MAX); + WerUnregisterExcludedMemoryBlock(contents.data + bytes_unexcluded); + bytes_unexcluded += bytes_to_unexclude; + bytes_remaining -= bytes_to_unexclude; + } + } +#endif // WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | + // WINAPI_PARTITION_SYSTEM) + + if (impl) { + CloseHandle((HANDLE)impl); + } +} + +#else + +static iree_status_t iree_io_platform_map_file_view( + iree_io_file_handle_primitive_t primitive, iree_io_file_access_t access, + uint64_t offset, iree_host_size_t length, + iree_io_file_mapping_flags_t flags, void** out_impl, + iree_byte_span_t* out_contents) { + *out_impl = NULL; + *out_contents = iree_byte_span_empty(); + return iree_make_status(IREE_STATUS_UNIMPLEMENTED, + "no support for mapping file views on this platform"); +} + +static void iree_io_platform_unmap_file_view(iree_io_file_mapping_flags_t flags, + void* impl, + iree_byte_span_t contents) {} + +#endif // IREE_PLATFORM_* + +//===----------------------------------------------------------------------===// +// iree_io_file_mapping_t +//===----------------------------------------------------------------------===// + +struct iree_io_file_mapping_t { + iree_atomic_ref_count_t ref_count; + iree_allocator_t host_allocator; + // File handle that owns the underlying file. Retained. + iree_io_file_handle_t* handle; + // Flags used when creating the mapping. + iree_io_file_mapping_flags_t flags; + // Platform-defined implementation handle. + // - mmap: base pointer returned from mmap + // - Win32: HANDLE returned by CreateFileMappingA + void* impl; + // Mapped contents in host memory. Access matches that requested on mapping. + iree_byte_span_t contents; +}; + +IREE_API_EXPORT iree_status_t iree_io_file_map_view( + iree_io_file_handle_t* handle, iree_io_file_access_t access, + uint64_t offset, iree_host_size_t length, + iree_io_file_mapping_flags_t flags, iree_allocator_t host_allocator, + iree_io_file_mapping_t** out_mapping) { + IREE_ASSERT_ARGUMENT(handle); + IREE_ASSERT_ARGUMENT(out_mapping); + *out_mapping = NULL; + IREE_TRACE_ZONE_BEGIN(z0); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, offset); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, length); + IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, flags); + + iree_io_file_mapping_t* mapping = NULL; + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_allocator_malloc(host_allocator, sizeof(*mapping), + (void**)&mapping)); + iree_atomic_ref_count_init(&mapping->ref_count); + mapping->host_allocator = host_allocator; + mapping->handle = handle; + iree_io_file_handle_retain(mapping->handle); + mapping->flags = flags; + mapping->contents = iree_byte_span_empty(); + + iree_status_t status = iree_ok_status(); + + // Special case for for host allocations: we can directly use them (with + // translation). Otherwise we let the platform-specific logic take care of + // things (if it exists). + iree_io_file_handle_primitive_t primitive = + iree_io_file_handle_primitive(handle); + if (primitive.type == IREE_IO_FILE_HANDLE_TYPE_HOST_ALLOCATION) { + iree_byte_span_t file_buffer = primitive.value.host_allocation; + status = iree_io_file_mapping_from_host_allocation( + file_buffer, offset, length, &mapping->contents); + } else { + // Use platform APIs to map the file. + status = + iree_io_platform_map_file_view(primitive, access, offset, length, flags, + &mapping->impl, &mapping->contents); + } + + if (iree_status_is_ok(status)) { + *out_mapping = mapping; + } else { + iree_io_file_mapping_release(mapping); + } + IREE_TRACE_ZONE_END(z0); + return status; +} + +static void iree_io_file_mapping_destroy(iree_io_file_mapping_t* mapping) { + IREE_ASSERT_ARGUMENT(mapping); + IREE_TRACE_ZONE_BEGIN(z0); + iree_allocator_t host_allocator = mapping->host_allocator; + + if (mapping->impl) { + iree_io_platform_unmap_file_view(mapping->flags, mapping->impl, + mapping->contents); + } + + iree_io_file_handle_release(mapping->handle); + + iree_allocator_free(host_allocator, mapping); + + IREE_TRACE_ZONE_END(z0); +} + +IREE_API_EXPORT void iree_io_file_mapping_retain( + iree_io_file_mapping_t* mapping) { + if (IREE_LIKELY(mapping)) { + iree_atomic_ref_count_inc(&mapping->ref_count); + } +} + +IREE_API_EXPORT void iree_io_file_mapping_release( + iree_io_file_mapping_t* mapping) { + if (IREE_LIKELY(mapping) && + iree_atomic_ref_count_dec(&mapping->ref_count) == 1) { + iree_io_file_mapping_destroy(mapping); + } +} + +IREE_API_EXPORT iree_host_size_t +iree_io_file_mapping_length(const iree_io_file_mapping_t* mapping) { + IREE_ASSERT_ARGUMENT(mapping); + return mapping->contents.data_length; +} + +IREE_API_EXPORT iree_const_byte_span_t +iree_io_file_mapping_contents_ro(const iree_io_file_mapping_t* mapping) { + return iree_make_const_byte_span(mapping->contents.data, + mapping->contents.data_length); +} + +IREE_API_EXPORT iree_byte_span_t +iree_io_file_mapping_contents_rw(iree_io_file_mapping_t* mapping) { + IREE_ASSERT_ARGUMENT(mapping); + return mapping->contents; +} + //===----------------------------------------------------------------------===// // iree_io_stream_t utilities //===----------------------------------------------------------------------===// diff --git a/runtime/src/iree/io/file_handle.h b/runtime/src/iree/io/file_handle.h index 6d46ad559d4c..53e5a37594cc 100644 --- a/runtime/src/iree/io/file_handle.h +++ b/runtime/src/iree/io/file_handle.h @@ -21,13 +21,13 @@ extern "C" { //===----------------------------------------------------------------------===// // Bits defining which operations are allowed on a file. +typedef uint32_t iree_io_file_access_t; enum iree_io_file_access_bits_t { // Allows operations that read from the file. IREE_IO_FILE_ACCESS_READ = 1u << 0, // Allows operations that write to the file. IREE_IO_FILE_ACCESS_WRITE = 1u << 1, }; -typedef uint32_t iree_io_file_access_t; //===----------------------------------------------------------------------===// // iree_io_file_handle_primitive_t @@ -156,6 +156,107 @@ static inline iree_io_file_handle_primitive_value_t iree_io_file_handle_value( IREE_API_EXPORT iree_status_t iree_io_file_handle_flush(iree_io_file_handle_t* handle); +//===----------------------------------------------------------------------===// +// iree_io_file_mapping_t +//===----------------------------------------------------------------------===// +// EXPERIMENTAL: this API may change once proper memory objects and views are +// added to iree/base/. This may just end up as a thin wrapper around that +// lower-level API with more fancy features (address placement, commit/decommit, +// etc) left to the lower-level API. We may add new APIs here for flush/sync +// as required. + +// Flags used to control the behavior of mapped file views. +typedef uint64_t iree_io_file_mapping_flags_t; +enum iree_io_file_mapping_flag_bits_t { + IREE_IO_FILE_MAPPING_FLAG_NONE = 0u, + + // Indicates that the memory access pattern of the view is mostly sequential. + // Hints to the system that an LRU page cache and sequential prefetching are + // likely to be worth it. + // + // Implemented by MADV_SEQUENTIAL. + IREE_IO_FILE_MAPPING_FLAG_SEQUENTIAL_ACCESS = 1ull << 0, + + // Enables large page support for the given view, if available. + // Certain mapping modes such as mapping of existing files or opening + // mappings from another process where the allocation was not made with large + // pages may not support large pages and the flag will be silently ignored. + // In either case the memory view will be padded to the + // iree_memory_info_t::large_page_size regardless of whether the pages are + // actually large to the system. + // + // Use large pages to reduce the overhead involved in accessing + // hot-but-non-localized memory views that may otherwise spend a significant + // amount of time/capacity maintaining the TLB. As the platform and + // machine-dependent large page size is often several orders of magnitude + // larger than the normal page size (MB vs. KB) care should be used to only + // apply this to large allocations. + // + // Implemented by FILE_MAP_LARGE_PAGES/MAP_HUGETLB, where available. + IREE_IO_FILE_MAPPING_FLAG_LARGE_PAGES = 1ull << 1, + + // Excludes the view memory from minidumps/coredumps. + // This is a hint that the memory in the ranges are not useful to include in + // dumps, such as large chunks of read-only file data (model weights, images, + // etc). + // + // Implemented by WerRegisterExcludedMemoryBlock/MADV_DONTDUMP, where + // available. + IREE_IO_FILE_MAPPING_FLAG_EXCLUDE_FROM_DUMPS = 1ull << 2, + + // Privately map the memory into the calling process. + // Other processes that may hold a reference to the file will not see changes. + // This is not a guarantee but an optimization to possibly avoid non-trivial + // kernel overheads. + // + // Implemented by MAP_PRIVATE, where available. + IREE_IO_FILE_MAPPING_FLAG_PRIVATE = 1ull << 3, +}; + +// A mapped file view into host memory. +// +// Thread-safe; the mapping is immutable and may be accessed from any thread. +// The **contents** of the mapping in the file should be coherent across threads +// within the same process but may not be across threads in different processes. +typedef struct iree_io_file_mapping_t iree_io_file_mapping_t; + +// Maps a view of a file into host-accessible memory. +// The provided file |handle| is retained for the lifetime of the view. +// To map the entire file specify a range of [0, IREE_HOST_SIZE_MAX]. +// +// If the provided file |handle| is already available for use as a host pointer +// it is returned directly. +IREE_API_EXPORT iree_status_t iree_io_file_map_view( + iree_io_file_handle_t* handle, iree_io_file_access_t access, + uint64_t offset, iree_host_size_t length, + iree_io_file_mapping_flags_t flags, iree_allocator_t host_allocator, + iree_io_file_mapping_t** out_mapping); + +// Retains the file |mapping| for the caller. The backing file handle will be +// retained as well. +IREE_API_EXPORT void iree_io_file_mapping_retain( + iree_io_file_mapping_t* mapping); + +// Releases the file |mapping| and its reference to the backing file handle. +// If the mapping was the last remaining retainer of the handle it will be +// closed. +IREE_API_EXPORT void iree_io_file_mapping_release( + iree_io_file_mapping_t* mapping); + +// Returns the length of the mapped view in bytes. +IREE_API_EXPORT iree_host_size_t +iree_io_file_mapping_length(const iree_io_file_mapping_t* mapping); + +// Returns a host-accessible read-only pointer to the file mapping memory. +// Returns iree_const_byte_span_empty if the mapping is not readable. +IREE_API_EXPORT iree_const_byte_span_t +iree_io_file_mapping_contents_ro(const iree_io_file_mapping_t* mapping); + +// Returns a host-accessible read-write pointer to the file mapping memory. +// Returns iree_byte_span_empty if the mapping is not writable. +IREE_API_EXPORT iree_byte_span_t +iree_io_file_mapping_contents_rw(iree_io_file_mapping_t* mapping); + //===----------------------------------------------------------------------===// // iree_io_stream_t utilities //===----------------------------------------------------------------------===// diff --git a/runtime/src/iree/io/formats/gguf/gguf_parser.c b/runtime/src/iree/io/formats/gguf/gguf_parser.c index 92230270fd4c..a48a2066c490 100644 --- a/runtime/src/iree/io/formats/gguf/gguf_parser.c +++ b/runtime/src/iree/io/formats/gguf/gguf_parser.c @@ -727,26 +727,24 @@ static iree_status_t iree_io_parse_gguf_index_from_memory( } IREE_API_EXPORT iree_status_t iree_io_parse_gguf_index( - iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index) { + iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index, + iree_allocator_t host_allocator) { IREE_ASSERT_ARGUMENT(index); IREE_TRACE_ZONE_BEGIN(z0); - // Today we only support memory files. - // TODO(benvanik): support iree_io_stream_t wrapping for parsing the index. - if (iree_io_file_handle_type(file_handle) != - IREE_IO_FILE_HANDLE_TYPE_HOST_ALLOCATION) { - IREE_TRACE_ZONE_END(z0); - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "non-memory gguf files not yet supported"); - } - iree_byte_span_t host_allocation = - iree_io_file_handle_primitive(file_handle).value.host_allocation; + // The parser requires a host pointer but will only reference the file handle + // in the index. + iree_io_file_mapping_t* file_mapping = NULL; + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_io_file_map_view(file_handle, IREE_IO_FILE_ACCESS_READ, 0, + IREE_HOST_SIZE_MAX, + IREE_IO_FILE_MAPPING_FLAG_EXCLUDE_FROM_DUMPS, + host_allocator, &file_mapping)); iree_status_t status = iree_io_parse_gguf_index_from_memory( - file_handle, - iree_make_const_byte_span(host_allocation.data, - host_allocation.data_length), - index); + file_handle, iree_io_file_mapping_contents_ro(file_mapping), index); + + iree_io_file_mapping_release(file_mapping); IREE_TRACE_ZONE_END(z0); return status; diff --git a/runtime/src/iree/io/formats/gguf/gguf_parser.h b/runtime/src/iree/io/formats/gguf/gguf_parser.h index fc0064baf989..e570e0c57dbe 100644 --- a/runtime/src/iree/io/formats/gguf/gguf_parser.h +++ b/runtime/src/iree/io/formats/gguf/gguf_parser.h @@ -19,8 +19,12 @@ extern "C" { // // Specification: // https://github.com/ggerganov/ggml/blob/master/docs/gguf.md +// +// The provided |host_allocator| may be used for allocations during parsing and +// is allowed to be an arena. IREE_API_EXPORT iree_status_t iree_io_parse_gguf_index( - iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index); + iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index, + iree_allocator_t host_allocator); #ifdef __cplusplus } // extern "C" diff --git a/runtime/src/iree/io/formats/gguf/gguf_parser_test.cc b/runtime/src/iree/io/formats/gguf/gguf_parser_test.cc index 1845e7afce2d..2477806999d4 100644 --- a/runtime/src/iree/io/formats/gguf/gguf_parser_test.cc +++ b/runtime/src/iree/io/formats/gguf/gguf_parser_test.cc @@ -38,7 +38,8 @@ TEST(GgufFormatTest, Empty) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("empty.gguf"); - IREE_ASSERT_OK(iree_io_parse_gguf_index(file_handle, index)); + IREE_ASSERT_OK( + iree_io_parse_gguf_index(file_handle, index, iree_allocator_system())); iree_io_file_handle_release(file_handle); iree_io_parameter_index_release(index); @@ -50,7 +51,8 @@ TEST(GgufFormatTest, SingleTensor) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("single.gguf"); - IREE_ASSERT_OK(iree_io_parse_gguf_index(file_handle, index)); + IREE_ASSERT_OK( + iree_io_parse_gguf_index(file_handle, index, iree_allocator_system())); iree_io_file_handle_release(file_handle); const iree_io_parameter_index_entry_t* entry0 = NULL; @@ -71,7 +73,8 @@ TEST(GgufFormatTest, SingleTensorV2) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("single_v2.gguf"); - IREE_ASSERT_OK(iree_io_parse_gguf_index(file_handle, index)); + IREE_ASSERT_OK( + iree_io_parse_gguf_index(file_handle, index, iree_allocator_system())); iree_io_file_handle_release(file_handle); const iree_io_parameter_index_entry_t* entry0 = NULL; @@ -91,7 +94,8 @@ TEST(GgufFormatTest, MultipleTensors) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("multiple.gguf"); - IREE_ASSERT_OK(iree_io_parse_gguf_index(file_handle, index)); + IREE_ASSERT_OK( + iree_io_parse_gguf_index(file_handle, index, iree_allocator_system())); iree_io_file_handle_release(file_handle); const iree_io_parameter_index_entry_t* entry0 = NULL; diff --git a/runtime/src/iree/io/formats/irpa/irpa_parser.c b/runtime/src/iree/io/formats/irpa/irpa_parser.c index dd09b3a8b441..fe69a4600435 100644 --- a/runtime/src/iree/io/formats/irpa/irpa_parser.c +++ b/runtime/src/iree/io/formats/irpa/irpa_parser.c @@ -322,27 +322,26 @@ static iree_status_t iree_io_parse_irpa_index_from_memory( } IREE_API_EXPORT iree_status_t iree_io_parse_irpa_index( - iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index) { + iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index, + iree_allocator_t host_allocator) { IREE_ASSERT_ARGUMENT(index); IREE_TRACE_ZONE_BEGIN(z0); - // Today we only support memory files. - // TODO(benvanik): support iree_io_stream_t wrapping for parsing the index. - if (iree_io_file_handle_type(file_handle) != - IREE_IO_FILE_HANDLE_TYPE_HOST_ALLOCATION) { - IREE_TRACE_ZONE_END(z0); - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "non-memory irpa files not yet supported"); - } - iree_byte_span_t host_allocation = - iree_io_file_handle_primitive(file_handle).value.host_allocation; + // The parser requires a host pointer but will only reference the file handle + // in the index. + iree_io_file_mapping_t* file_mapping = NULL; + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_io_file_map_view(file_handle, IREE_IO_FILE_ACCESS_READ, 0, + IREE_HOST_SIZE_MAX, + IREE_IO_FILE_MAPPING_FLAG_EXCLUDE_FROM_DUMPS, + host_allocator, &file_mapping)); iree_status_t status = iree_io_parse_irpa_index_from_memory( - file_handle, - iree_make_const_byte_span(host_allocation.data, - host_allocation.data_length), + file_handle, iree_io_file_mapping_contents_ro(file_mapping), /*base_offset=*/0, index); + iree_io_file_mapping_release(file_mapping); + IREE_TRACE_ZONE_END(z0); return status; } diff --git a/runtime/src/iree/io/formats/irpa/irpa_parser.h b/runtime/src/iree/io/formats/irpa/irpa_parser.h index 4423105606c7..e5283fdc725c 100644 --- a/runtime/src/iree/io/formats/irpa/irpa_parser.h +++ b/runtime/src/iree/io/formats/irpa/irpa_parser.h @@ -16,8 +16,12 @@ extern "C" { #endif // __cplusplus // Parses an IREE archive file and merges its contained resources into |index|. +// +// The provided |host_allocator| may be used for allocations during parsing and +// is allowed to be an arena. IREE_API_EXPORT iree_status_t iree_io_parse_irpa_index( - iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index); + iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index, + iree_allocator_t host_allocator); #ifdef __cplusplus } // extern "C" diff --git a/runtime/src/iree/io/formats/irpa/irpa_parser_test.cc b/runtime/src/iree/io/formats/irpa/irpa_parser_test.cc index 944c2b406568..c6f3bc7909a1 100644 --- a/runtime/src/iree/io/formats/irpa/irpa_parser_test.cc +++ b/runtime/src/iree/io/formats/irpa/irpa_parser_test.cc @@ -38,7 +38,8 @@ TEST(IrpaFormatTest, Empty) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("empty.irpa"); - IREE_ASSERT_OK(iree_io_parse_irpa_index(file_handle, index)); + IREE_ASSERT_OK( + iree_io_parse_irpa_index(file_handle, index, iree_allocator_system())); EXPECT_EQ(0, iree_io_parameter_index_count(index)); iree_io_file_handle_release(file_handle); @@ -51,7 +52,8 @@ TEST(IrpaFormatTest, SingleParameters) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("single.irpa"); - IREE_ASSERT_OK(iree_io_parse_irpa_index(file_handle, index)); + IREE_ASSERT_OK( + iree_io_parse_irpa_index(file_handle, index, iree_allocator_system())); EXPECT_EQ(1, iree_io_parameter_index_count(index)); iree_io_file_handle_release(file_handle); @@ -73,7 +75,8 @@ TEST(IrpaFormatTest, MultipleParameters) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("multiple.irpa"); - IREE_ASSERT_OK(iree_io_parse_irpa_index(file_handle, index)); + IREE_ASSERT_OK( + iree_io_parse_irpa_index(file_handle, index, iree_allocator_system())); EXPECT_EQ(2, iree_io_parameter_index_count(index)); iree_io_file_handle_release(file_handle); @@ -104,7 +107,8 @@ TEST(IrpaFormatTest, MixedDataAndSplats) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("mixed.irpa"); - IREE_ASSERT_OK(iree_io_parse_irpa_index(file_handle, index)); + IREE_ASSERT_OK( + iree_io_parse_irpa_index(file_handle, index, iree_allocator_system())); EXPECT_EQ(4, iree_io_parameter_index_count(index)); iree_io_file_handle_release(file_handle); diff --git a/runtime/src/iree/io/formats/parser_registry.c b/runtime/src/iree/io/formats/parser_registry.c index 716f07f905b5..0c6bac6b0e08 100644 --- a/runtime/src/iree/io/formats/parser_registry.c +++ b/runtime/src/iree/io/formats/parser_registry.c @@ -13,7 +13,7 @@ IREE_API_EXPORT iree_status_t iree_io_parse_file_index( iree_string_view_t path, iree_io_file_handle_t* file_handle, - iree_io_parameter_index_t* index) { + iree_io_parameter_index_t* index, iree_allocator_t host_allocator) { IREE_TRACE_ZONE_BEGIN(z0); IREE_TRACE_ZONE_APPEND_TEXT(z0, path.data, path.size); @@ -28,11 +28,12 @@ IREE_API_EXPORT iree_status_t iree_io_parse_file_index( iree_status_t status = iree_ok_status(); if (iree_string_view_equal_case(extension, IREE_SV("irpa"))) { - status = iree_io_parse_irpa_index(file_handle, index); + status = iree_io_parse_irpa_index(file_handle, index, host_allocator); } else if (iree_string_view_equal_case(extension, IREE_SV("gguf"))) { - status = iree_io_parse_gguf_index(file_handle, index); + status = iree_io_parse_gguf_index(file_handle, index, host_allocator); } else if (iree_string_view_equal_case(extension, IREE_SV("safetensors"))) { - status = iree_io_parse_safetensors_index(file_handle, index); + status = + iree_io_parse_safetensors_index(file_handle, index, host_allocator); } else { status = iree_make_status( IREE_STATUS_UNIMPLEMENTED, diff --git a/runtime/src/iree/io/formats/parser_registry.h b/runtime/src/iree/io/formats/parser_registry.h index c982d08a3edd..404f8ec6a4ed 100644 --- a/runtime/src/iree/io/formats/parser_registry.h +++ b/runtime/src/iree/io/formats/parser_registry.h @@ -19,9 +19,12 @@ extern "C" { // |path| is used for logging and file format identification. It may either be // the original file path of |file_handle| or an extension (such as `irpa`). // Upon return any parameters in the file are appended to the |index|. +// +// The provided |host_allocator| may be used for allocations during parsing and +// is allowed to be an arena. IREE_API_EXPORT iree_status_t iree_io_parse_file_index( iree_string_view_t path, iree_io_file_handle_t* file_handle, - iree_io_parameter_index_t* index); + iree_io_parameter_index_t* index, iree_allocator_t host_allocator); #ifdef __cplusplus } // extern "C" diff --git a/runtime/src/iree/io/formats/safetensors/safetensors_parser.c b/runtime/src/iree/io/formats/safetensors/safetensors_parser.c index 04eda053d1ae..66c842cc2bde 100644 --- a/runtime/src/iree/io/formats/safetensors/safetensors_parser.c +++ b/runtime/src/iree/io/formats/safetensors/safetensors_parser.c @@ -501,26 +501,28 @@ static iree_status_t iree_io_parse_safetensors_index_from_memory( } IREE_API_EXPORT iree_status_t iree_io_parse_safetensors_index( - iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index) { + iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index, + iree_allocator_t host_allocator) { IREE_ASSERT_ARGUMENT(index); IREE_TRACE_ZONE_BEGIN(z0); - // Today we only support memory files. - // TODO(benvanik): support iree_io_stream_t wrapping for parsing the index. - if (iree_io_file_handle_type(file_handle) != - IREE_IO_FILE_HANDLE_TYPE_HOST_ALLOCATION) { - IREE_TRACE_ZONE_END(z0); - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "non-memory safetensors files not yet supported"); - } - iree_byte_span_t host_allocation = - iree_io_file_handle_primitive(file_handle).value.host_allocation; + // The parser requires a host pointer but will only reference the file handle + // in the index. It'd be easy to change this parser to use stream-based + // reading as we could just read the header bytes and JSON blob into transient + // memory but the intent is that parameter parsing should not allocate large + // amounts of memory and this keeps the behavior consistent with other + // implementations. + iree_io_file_mapping_t* file_mapping = NULL; + IREE_RETURN_AND_END_ZONE_IF_ERROR( + z0, iree_io_file_map_view(file_handle, IREE_IO_FILE_ACCESS_READ, 0, + IREE_HOST_SIZE_MAX, + IREE_IO_FILE_MAPPING_FLAG_EXCLUDE_FROM_DUMPS, + host_allocator, &file_mapping)); iree_status_t status = iree_io_parse_safetensors_index_from_memory( - file_handle, - iree_make_const_byte_span(host_allocation.data, - host_allocation.data_length), - index); + file_handle, iree_io_file_mapping_contents_ro(file_mapping), index); + + iree_io_file_mapping_release(file_mapping); IREE_TRACE_ZONE_END(z0); return status; diff --git a/runtime/src/iree/io/formats/safetensors/safetensors_parser.h b/runtime/src/iree/io/formats/safetensors/safetensors_parser.h index c32c104f06a4..2ec2bcc82e65 100644 --- a/runtime/src/iree/io/formats/safetensors/safetensors_parser.h +++ b/runtime/src/iree/io/formats/safetensors/safetensors_parser.h @@ -31,8 +31,12 @@ extern "C" { // don't take that dependency for a testing tool. Users wanting to productionize // this should implement their own safetensors parser or use the rust one with // all the fun that entails. +// +// The provided |host_allocator| may be used for allocations during parsing and +// is allowed to be an arena. IREE_API_EXPORT iree_status_t iree_io_parse_safetensors_index( - iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index); + iree_io_file_handle_t* file_handle, iree_io_parameter_index_t* index, + iree_allocator_t host_allocator); #ifdef __cplusplus } // extern "C" diff --git a/runtime/src/iree/io/formats/safetensors/safetensors_parser_test.cc b/runtime/src/iree/io/formats/safetensors/safetensors_parser_test.cc index 44caf7bba949..821865c4be20 100644 --- a/runtime/src/iree/io/formats/safetensors/safetensors_parser_test.cc +++ b/runtime/src/iree/io/formats/safetensors/safetensors_parser_test.cc @@ -38,7 +38,8 @@ TEST(SafetensorsFormatTest, Empty) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("empty.safetensors"); - IREE_ASSERT_OK(iree_io_parse_safetensors_index(file_handle, index)); + IREE_ASSERT_OK(iree_io_parse_safetensors_index(file_handle, index, + iree_allocator_system())); iree_io_file_handle_release(file_handle); iree_io_parameter_index_release(index); @@ -50,7 +51,8 @@ TEST(SafetensorsFormatTest, SingleTensor) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("single.safetensors"); - IREE_ASSERT_OK(iree_io_parse_safetensors_index(file_handle, index)); + IREE_ASSERT_OK(iree_io_parse_safetensors_index(file_handle, index, + iree_allocator_system())); iree_io_file_handle_release(file_handle); const iree_io_parameter_index_entry_t* entry0 = NULL; @@ -70,7 +72,8 @@ TEST(SafetensorsFormatTest, MultipleTensors) { iree_io_parameter_index_create(iree_allocator_system(), &index)); iree_io_file_handle_t* file_handle = OpenTestFile("multiple.safetensors"); - IREE_ASSERT_OK(iree_io_parse_safetensors_index(file_handle, index)); + IREE_ASSERT_OK(iree_io_parse_safetensors_index(file_handle, index, + iree_allocator_system())); iree_io_file_handle_release(file_handle); const iree_io_parameter_index_entry_t* entry0 = NULL; diff --git a/runtime/src/iree/tooling/parameter_util.c b/runtime/src/iree/tooling/parameter_util.c index 07a52960ae0d..e311f4ff664a 100644 --- a/runtime/src/iree/tooling/parameter_util.c +++ b/runtime/src/iree/tooling/parameter_util.c @@ -106,7 +106,8 @@ static iree_status_t iree_io_append_parameter_file_to_index( z0, iree_io_open_parameter_file(path, host_allocator, &file_handle)); // Index the file based on its (inferred) format. - iree_status_t status = iree_io_parse_file_index(path, file_handle, index); + iree_status_t status = + iree_io_parse_file_index(path, file_handle, index, host_allocator); // Release our file reference - it's still retained by the index if it had any // parameters in it.