diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e4533fd7..c635d9ec3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `net:gethostname/0` on platforms with gethostname(3). - Added `socket:getopt/2` - Added `atomvm:subprocess/4` to perform pipe/fork/execve on POSIX platforms +- Added `externalterm_to_term_with_roots` to efficiently preserve roots when allocating memory for external terms. + +### Changed + +- Removed `externalterm_to_term_copy` added in [0.6.5] and introduced flags to `externalterm_to_term` to perform copy. ### Fixed diff --git a/src/libAtomVM/externalterm.c b/src/libAtomVM/externalterm.c index 10a3028f7..f6f998dd2 100644 --- a/src/libAtomVM/externalterm.c +++ b/src/libAtomVM/externalterm.c @@ -30,6 +30,7 @@ #include "bitstring.h" #include "defaultatoms.h" +#include "memory.h" #include "term.h" #include "unicode.h" #include "utils.h" @@ -81,20 +82,8 @@ static size_t compute_external_size(term t, GlobalContext *glb); static int externalterm_from_term(uint8_t **buf, size_t *len, term t, GlobalContext *glb); static int serialize_term(uint8_t *buf, term t, GlobalContext *glb); -/** - * @brief - * @param external_term buffer containing external term - * @param size size of the external_term - * @param ctx current context in which terms may be stored - * @param opts additional opts, such as ExternalTermToHeapFragment for storing parsed - * terms in a heap fragment. - * are stored in the context heap. - * @param bytes_read the number of bytes read off external_term in order to yield a term - * @param copy whether to copy binary data and atom strings (pass `true', unless `external_term' is a const binary and will not be deallocated) - * @return the parsed term - */ -static term externalterm_to_term_internal(const void *external_term, size_t size, Context *ctx, - ExternalTermOpts opts, size_t *bytes_read, bool copy) +term externalterm_to_term_with_roots(const void *external_term, size_t size, Context *ctx, + ExternalTermFlags flags, size_t *bytes_read, size_t num_roots, term *roots) { const uint8_t *external_term_buf = (const uint8_t *) external_term; @@ -107,42 +96,36 @@ static term externalterm_to_term_internal(const void *external_term, size_t size } size_t eterm_size; - int heap_usage = calculate_heap_usage(external_term_buf + 1, size - 1, &eterm_size, copy); + int heap_usage = calculate_heap_usage(external_term_buf + 1, size - 1, &eterm_size, flags & ExternalTermCopy); if (heap_usage == INVALID_TERM_SIZE) { return term_invalid_term(); } term result; - if (opts & ExternalTermToHeapFragment) { + if (flags & ExternalTermToHeapFragment) { // We need to allocate fragments as reading external terms from modules // is not accounted for by the compiler when it emits test_heap opcodes Heap heap; if (UNLIKELY(memory_init_heap(&heap, heap_usage) != MEMORY_GC_OK)) { return term_invalid_term(); } - result = parse_external_terms(external_term_buf + 1, &eterm_size, copy, &heap, ctx->global); + result = parse_external_terms(external_term_buf + 1, &eterm_size, flags & ExternalTermCopy, &heap, ctx->global); memory_heap_append_heap(&ctx->heap, &heap); } else { - if (UNLIKELY(memory_ensure_free(ctx, heap_usage) != MEMORY_GC_OK)) { + if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_usage, num_roots, roots, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { fprintf(stderr, "Unable to ensure %zu free words in heap\n", eterm_size); return term_invalid_term(); } - result = parse_external_terms(external_term_buf + 1, &eterm_size, copy, &ctx->heap, ctx->global); + result = parse_external_terms(external_term_buf + 1, &eterm_size, flags & ExternalTermCopy, &ctx->heap, ctx->global); } *bytes_read = eterm_size + 1; return result; } -term externalterm_to_term(const void *external_term, size_t size, Context *ctx, ExternalTermOpts opts) -{ - size_t bytes_read = 0; - return externalterm_to_term_internal(external_term, size, ctx, opts, &bytes_read, false); -} - -term externalterm_to_term_copy(const void *external_term, size_t size, Context *ctx, ExternalTermOpts opts) +term externalterm_to_term(const void *external_term, size_t size, Context *ctx, ExternalTermFlags flags) { size_t bytes_read = 0; - return externalterm_to_term_internal(external_term, size, ctx, opts, &bytes_read, true); + return externalterm_to_term_with_roots(external_term, size, ctx, flags, &bytes_read, 0, NULL); } enum ExternalTermResult externalterm_from_binary(Context *ctx, term *dst, term binary, size_t *bytes_read) @@ -150,22 +133,26 @@ enum ExternalTermResult externalterm_from_binary(Context *ctx, term *dst, term b if (!term_is_binary(binary)) { return EXTERNAL_TERM_BAD_ARG; } - // - // Copy the binary data to a buffer (in case of GC) - // size_t len = term_binary_size(binary); const uint8_t *data = (const uint8_t *) term_binary_data(binary); - uint8_t *buf = malloc(len); - if (IS_NULL_PTR(buf)) { - fprintf(stderr, "Unable to allocate %zu bytes for binary buffer.\n", len); - return EXTERNAL_TERM_MALLOC; + term t; + if (term_is_heap_binary(binary)) { + // If the binary is a heap binary, we will copy its buffer. We cannot simply add it + // to roots because it may be moved and pointer will change between computation of size + // and decoding. + uint8_t *buf = malloc(len); + if (IS_NULL_PTR(buf)) { + fprintf(stderr, "Unable to allocate %zu bytes for binary buffer.\n", len); + return EXTERNAL_TERM_MALLOC; + } + memcpy(buf, data, len); + t = externalterm_to_term_with_roots(buf, len, ctx, ExternalTermCopy, bytes_read, 0, NULL); + free(buf); + } else { + // If the binary is not a heap binary, the pointer will not move during GC. So we don't + // need to copy data. + t = externalterm_to_term_with_roots(data, len, ctx, ExternalTermCopy, bytes_read, 1, &binary); } - memcpy(buf, data, len); - // - // convert - // - term t = externalterm_to_term_internal(buf, len, ctx, false, bytes_read, true); - free(buf); if (term_is_invalid_term(t)) { return EXTERNAL_TERM_BAD_ARG; } else { diff --git a/src/libAtomVM/externalterm.h b/src/libAtomVM/externalterm.h index a1c7082d1..6cc931152 100644 --- a/src/libAtomVM/externalterm.h +++ b/src/libAtomVM/externalterm.h @@ -47,8 +47,9 @@ enum ExternalTermResult typedef enum { ExternalTermNoOpts = 0, - ExternalTermToHeapFragment = 1 -} ExternalTermOpts; + ExternalTermToHeapFragment = 1, + ExternalTermCopy = 2, +} ExternalTermFlags; /** * @brief Gets a term from external term data. @@ -57,30 +58,34 @@ typedef enum * @param external_term the external term that will be deserialized. * @param size to allocate for term. * @param ctx the context that owns the memory that will be allocated. - * @param opts if non-zero, use a heap fragment to store the generated - * terms. Otherwise, use the heap in the provided context. Note that when using the - * context heap, this function may call the GC, if there is insufficient space to - * store the generated terms. + * @param flags options to deserialize data. + * The following flags are supported: + * - `ExternalTermToHeapFragment' : use a heap fragment to store the generated + * terms (default is to use the heap in the provided context). Note that when + * using the context heap, this function may call the GC, if there is + * insufficient space to store the generated terms. + * - `ExternalTermCopy' : copy (binary and atom) data. Suitable when + * `external_term' pointer is not a const literal. * @returns a term. */ term externalterm_to_term( - const void *external_term, size_t size, Context *ctx, ExternalTermOpts opts); + const void *external_term, size_t size, Context *ctx, ExternalTermFlags flags); /** - * @brief Gets a term from external term data, and makes a copy of all data. - * - * @details Deserialize an external term from external format and returns a term. - * @param external_term the external term that will be deserialized. - * @param size to allocate for term. - * @param ctx the context that owns the memory that will be allocated. - * @param opts if non-zero, use a heap fragment to store the generated - * terms. Otherwise, use the heap in the provided context. Note that when using the - * context heap, this function may call the GC, if there is insufficient space to - * store the generated terms. - * @returns a term. + * @brief Create a term from external term data. This variant is meant to be used + * for distribution or in cases where the number of bytes read is important. + * + * @param external_term buffer containing external term + * @param size size of the external_term + * @param ctx current context in which terms may be stored + * @param flags additional flags (see above) + * @param bytes_read the number of bytes read off external_term in order to yield a term + * @param num_roots number of roots when invoking GC + * @param roots roots to preserve when invoking GC + * @return the parsed term */ -term externalterm_to_term_copy( - const void *external_term, size_t size, Context *ctx, ExternalTermOpts opts); +term externalterm_to_term_with_roots(const void *external_term, size_t size, Context *ctx, + ExternalTermFlags flags, size_t *bytes_read, size_t num_roots, term *roots); /** * @brief Create a term from a binary. diff --git a/src/libAtomVM/term.h b/src/libAtomVM/term.h index acbf0bd16..e9e7fc60e 100644 --- a/src/libAtomVM/term.h +++ b/src/libAtomVM/term.h @@ -407,9 +407,9 @@ static inline bool term_is_binary(term t) } /** - * @brief Checks if a term is a binary + * @brief Checks if a term is a refc binary * - * @details Returns \c true if a term is a binary stored on the heap, otherwise \c false. + * @details Returns \c true if a term is a ref-counted binary, otherwise \c false. * @param t the term that will be checked. * @return \c true if check succeeds, \c false otherwise. */ @@ -425,6 +425,25 @@ static inline bool term_is_refc_binary(term t) return false; } +/** + * @brief Checks if a term is a heap binary + * + * @details Returns \c true if a term is a binary stored on the heap, otherwise \c false. + * @param t the term that will be checked. + * @return \c true if check succeeds, \c false otherwise. + */ +static inline bool term_is_heap_binary(term t) +{ + /* boxed: 10 */ + if (term_is_boxed(t)) { + const term *boxed_value = term_to_const_term_ptr(t); + int masked_value = boxed_value[0] & TERM_BOXED_TAG_MASK; + return masked_value == TERM_BOXED_HEAP_BINARY; + } + + return false; +} + static inline bool term_refc_binary_is_const(term t) { const term *boxed_value = term_to_const_term_ptr(t);