diff --git a/CHANGELOG.md b/CHANGELOG.md index f69e307510..876be4966f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for utf8 encoding to `*_to_atom` and `atom_to_*` functions - `binary_to_atom/1` and `atom_to_binary/1` that default to utf8 (they were introduced with OTP23) - Added Pico cmake option `AVM_WAIT_BOOTSEL_ON_EXIT` (default `ON`) to allow tools to use automated `BOOTSEL` mode after main application exits +- Use UTF-8 encoding for atoms when using `erlang:term_to_binary/1`, in conformance with OTP-26 ### Fixed diff --git a/doc/src/programmers-guide.md b/doc/src/programmers-guide.md index 2cab1a65d1..92d3c81c82 100644 --- a/doc/src/programmers-guide.md +++ b/doc/src/programmers-guide.md @@ -587,6 +587,26 @@ The currently supported keys are enumerated in the following table: See the `word_size` key in the [System APIs](#system-apis) section for information about how to find the number of bytes used in a machine word on the current platform. +### External Term Format + +The `erlang:term_to_binary/1` function can be used to serialize arbitrary term data into and out of binary data. These operations can be useful for applications that wish to share term data over some network protocol, such as HTTP or MQTT, or wish to store serialized term data in some permanant sttorage (e.g., Non-volatile storage on ESP32 devices). + +For example, to convert a term to a binary, use `erlang:term_to_binary/1`, e.g., + + %% erlang + Term = ... + Binary = erlang:term_to_binary(Term), + +And to convert the binary back to a term, use `erlang:binary_to_term/1,2`, e.g., + + %% erlang + Binary = ... + {Term, _Used} = erlang:binary_to_term(Binary, [used]), + +By default, AtomVM will encode all atoms using UTF-8 encoding. This encoding is the default encoding for OTP-26 and later releases. + +For more information about Erlang external term format, consult the [Erlang Documentation](https://www.erlang.org/doc/apps/erts/erl_ext_dist.html) + ### System APIs You can obtain system information about the AtomVM virtual machine via the [`erlang:system_info/1`](./apidocs/erlang/estdlib/erlang.md#system_info1) function, which takes an atom parameter designating the desired datum. Allowable parameters include @@ -1693,7 +1713,7 @@ The station mode configuration supports the following options: | `dhcp_hostname` | `string() \| binary()` | no | `atomvm-` where `` is the factory-assigned MAC-address of the device | DHCP hostname for the connecting device | ```{important} -The WiFi network to which you are connecting must support DHCP and IPv4. +The WiFi network to which you are connecting must support DHCP and IPv4. IPv6 addressing is not yet supported on AtomVM. ``` diff --git a/src/libAtomVM/bitstring.c b/src/libAtomVM/bitstring.c index 8ec5bd85d0..76a9c014c0 100644 --- a/src/libAtomVM/bitstring.c +++ b/src/libAtomVM/bitstring.c @@ -153,7 +153,7 @@ enum UnicodeTransformDecodeResult bitstring_utf8_decode(const uint8_t *buf, size v |= (buf[3] & 0x3F); // overlong encoding or invalid codepoint if (v <= 0x10000 || v > 0x10FFFF) { - return false; + return UnicodeTransformDecodeFail; } *c = v; *out_size = 4; @@ -165,7 +165,7 @@ enum UnicodeTransformDecodeResult bitstring_utf8_decode(const uint8_t *buf, size v |= (buf[2] & 0x3F); // overlong encoding or surrogate if (v < 0x800 || (v >= 0xD800 && v <= 0xDFFF)) { - return false; + return UnicodeTransformDecodeFail; } *c = v; *out_size = 3; @@ -176,7 +176,7 @@ enum UnicodeTransformDecodeResult bitstring_utf8_decode(const uint8_t *buf, size v |= (buf[1] & 0x3F); // overlong encoding if (v < 0x80) { - return false; + return UnicodeTransformDecodeFail; } *c = v; *out_size = 2; diff --git a/src/libAtomVM/externalterm.c b/src/libAtomVM/externalterm.c index 088d84fbf0..fd3ec2b9e8 100644 --- a/src/libAtomVM/externalterm.c +++ b/src/libAtomVM/externalterm.c @@ -273,11 +273,11 @@ static int serialize_term(uint8_t *buf, term t, GlobalContext *glb) size_t atom_len; atom_ref_t atom_ref = atom_table_get_atom_ptr_and_len(glb->atom_table, atom_index, &atom_len); if (!IS_NULL_PTR(buf)) { - buf[0] = ATOM_EXT; - WRITE_16_UNALIGNED(buf + 1, atom_len); - atom_table_write_bytes(glb->atom_table, atom_ref, atom_len, buf + 3); + buf[0] = SMALL_ATOM_UTF8_EXT; + buf[1] = atom_len; + atom_table_write_bytes(glb->atom_table, atom_ref, atom_len, buf + 2); } - return 3 + atom_len; + return 2 + atom_len; } else if (term_is_tuple(t)) { size_t arity = term_get_tuple_arity(t); diff --git a/tests/erlang_tests/test_binary_to_term.erl b/tests/erlang_tests/test_binary_to_term.erl index 0f786564f9..83d60ca8f8 100644 --- a/tests/erlang_tests/test_binary_to_term.erl +++ b/tests/erlang_tests/test_binary_to_term.erl @@ -32,18 +32,19 @@ start() -> % Starting from OTP-26, atoms are encoded as UTF-8 by default. - TermToBinaryOptions = - case erlang:system_info(machine) of - "BEAM" -> - case erlang:system_info(version) >= "13.2" of - true -> [{minor_version, 1}]; - false -> [] - end; - "ATOM" -> - [] - end, - test_reverse(foo, <<131, 100, 0, 3, 102, 111, 111>>, TermToBinaryOptions), - test_reverse(bar, <<131, 100, 0, 3, 98, 97, 114>>, TermToBinaryOptions), + test_reverse(foo, {<<131, 119, 3, 102, 111, 111>>, <<131, 100, 0, 3, 102, 111, 111>>}), + test_reverse(bar, {<<131, 119, 3, 98, 97, 114>>, <<131, 100, 0, 3, 98, 97, 114>>}), + test_reverse( + '∀x∃y.f(x,y)', + <<131, 119, 15, 226, 136, 128, 120, 226, 136, 131, 121, 46, 102, 40, 120, 44, 121, 41>>, + [] + ), + test_reverse( + ':アトムVM', + <<131, 119, 16, 58, 227, 130, 162, 227, 131, 136, 227, 131, 160, 239, 188, 182, 239, 188, + 173>>, + [] + ), test_reverse(128, <<131, 97, 128>>), test_reverse(257, <<131, 98, 0, 0, 1, 1>>), test_reverse(0, <<131, 97, 0>>), @@ -51,23 +52,57 @@ start() -> test_reverse(32768, <<131, 98, 0, 0, 128, 0>>), test_reverse(-32768, <<131, 98, 255, 255, 128, 0>>), test_reverse( - {foo, bar}, - <<131, 104, 2, 100, 0, 3, 102, 111, 111, 100, 0, 3, 98, 97, 114>>, - TermToBinaryOptions + {foo, bar}, { + <<131, 104, 2, 119, 3, 102, 111, 111, 119, 3, 98, 97, 114>>, + <<131, 104, 2, 100, 0, 3, 102, 111, 111, 100, 0, 3, 98, 97, 114>> + } ), - test_reverse({foo, 0}, <<131, 104, 2, 100, 0, 3, 102, 111, 111, 97, 0>>, TermToBinaryOptions), + test_reverse({foo, 0}, { + <<131, 104, 2, 119, 3, 102, 111, 111, 97, 0>>, + <<131, 104, 2, 100, 0, 3, 102, 111, 111, 97, 0>> + }), test_reverse([], <<131, 106>>), test_reverse( - [{foo, 0}, {bar, 1}], - <<131, 108, 0, 0, 0, 2, 104, 2, 100, 0, 3, 102, 111, 111, 97, 0, 104, 2, 100, 0, 3, 98, 97, - 114, 97, 1, 106>>, - TermToBinaryOptions + [{foo, 0}, {bar, 1}], { + <<131, 108, 0, 0, 0, 2, 104, 2, 119, 3, 102, 111, 111, 97, 0, 104, 2, 119, 3, 98, 97, + 114, 97, 1, 106>>, + <<131, 108, 0, 0, 0, 2, 104, 2, 100, 0, 3, 102, 111, 111, 97, 0, 104, 2, 100, 0, 3, 98, + 97, 114, 97, 1, 106>> + } ), test_reverse( [improper | list], - <<131, 108, 0, 0, 0, 1, 100, 0, 8, 105, 109, 112, 114, 111, 112, 101, 114, 100, 0, 4, 108, - 105, 115, 116>>, - TermToBinaryOptions + { + <<131, 108, 0, 0, 0, 1, 119, 8, 105, 109, 112, 114, 111, 112, 101, 114, 119, 4, 108, + 105, 115, 116>>, + <<131, 108, 0, 0, 0, 1, 100, 0, 8, 105, 109, 112, 114, 111, 112, 101, 114, 100, 0, 4, + 108, 105, 115, 116>> + } + ), + test_reverse({foo, bar}, { + <<131, 104, 2, 119, 3, 102, 111, 111, 119, 3, 98, 97, 114>>, + <<131, 104, 2, 100, 0, 3, 102, 111, 111, 100, 0, 3, 98, 97, 114>> + }), + test_reverse({foo, 0}, { + <<131, 104, 2, 119, 3, 102, 111, 111, 97, 0>>, + <<131, 104, 2, 100, 0, 3, 102, 111, 111, 97, 0>> + }), + test_reverse([], <<131, 106>>), + test_reverse( + [{foo, 0}, {bar, 1}], { + <<131, 108, 0, 0, 0, 2, 104, 2, 119, 3, 102, 111, 111, 97, 0, 104, 2, 119, 3, 98, 97, + 114, 97, 1, 106>>, + <<131, 108, 0, 0, 0, 2, 104, 2, 100, 0, 3, 102, 111, 111, 97, 0, 104, 2, 100, 0, 3, 98, + 97, 114, 97, 1, 106>> + } + ), + test_reverse( + [improper | list], { + <<131, 108, 0, 0, 0, 1, 119, 8, 105, 109, 112, 114, 111, 112, 101, 114, 119, 4, 108, + 105, 115, 116>>, + <<131, 108, 0, 0, 0, 1, 100, 0, 8, 105, 109, 112, 114, 111, 112, 101, 114, 100, 0, 4, + 108, 105, 115, 116>> + } ), test_reverse(<<"foobar">>, <<131, 109, 0, 0, 0, 6, 102, 111, 111, 98, 97, 114>>), test_reverse(<<":アトムVM">>, <<131, 109, 0, 0, 0, 6, 58, 162, 200, 224, 54, 45>>), @@ -86,7 +121,7 @@ start() -> 57, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 49, 50, 51, 52, 53>> ), - ok = test_external_function(TermToBinaryOptions), + ok = test_external_function(), {32768, 6} = erlang:binary_to_term(<<131, 98, 0, 0, 128, 0, 127>>, [used]), test_catenate_and_split([foo, bar, 128, {foo, bar}, [a, b, c, {d}]]), @@ -99,7 +134,16 @@ start() -> test_reverse(T, Interop) -> test_reverse(T, Interop, []). -test_reverse(T, Interop, Options) -> +test_reverse(T, {Utf8Interop, Latin1Interop}, Options) -> + case get_otp_version() of + X when is_integer(X) andalso X >= 26 -> + test_reverse(T, Utf8Interop, Options); + atomvm -> + test_reverse(T, Utf8Interop, Options); + _ -> + test_reverse(T, Latin1Interop, Options) + end; +test_reverse(T, Interop, Options) when is_binary(Interop) andalso is_list(Options) -> Bin = case Options of [] -> erlang:term_to_binary(T); @@ -173,18 +217,27 @@ mutate_bin(Bin, I) -> I2 = Ith bxor 16#FF, <>. -test_external_function(Options) -> +test_external_function() -> T = [fun ?MODULE:apply/2, fun ?MODULE:apply/3], Bin = - case Options of - [] -> erlang:term_to_binary(T); - _ -> erlang:term_to_binary(T, Options) + case get_otp_version() of + X when is_integer(X) andalso X >= 26 orelse X =:= atomvm -> + %% expect SMALL_ATOM_UTF8_EXT encoding + <<131, 108, 0, 0, 0, 2, 113, 119, 19, 116, 101, 115, 116, 95, 98, 105, 110, 97, 114, + 121, 95, 116, 111, 95, 116, 101, 114, 109, 119, 5, 97, 112, 112, 108, 121, 97, + 2, 113, 119, 19, 116, 101, 115, 116, 95, 98, 105, 110, 97, 114, 121, 95, 116, + 111, 95, 116, 101, 114, 109, 119, 5, 97, 112, 112, 108, 121, 97, 3, 106>>; + _ -> + %% expect ATOM_EXT encoding + <<131, 108, 0, 0, 0, 2, 113, 100, 0, 19, 116, 101, 115, 116, 95, 98, 105, 110, 97, + 114, 121, 95, 116, 111, 95, 116, 101, 114, 109, 100, 0, 5, 97, 112, 112, 108, + 121, 97, 2, 113, 100, 0, 19, 116, 101, 115, 116, 95, 98, 105, 110, 97, 114, 121, + 95, 116, 111, 95, 116, 101, 114, 109, 100, 0, 5, 97, 112, 112, 108, 121, 97, 3, + 106>> end, - Bin = - <<131, 108, 0, 0, 0, 2, 113, 100, 0, 19, 116, 101, 115, 116, 95, 98, 105, 110, 97, 114, 121, - 95, 116, 111, 95, 116, 101, 114, 109, 100, 0, 5, 97, 112, 112, 108, 121, 97, 2, 113, - 100, 0, 19, 116, 101, 115, 116, 95, 98, 105, 110, 97, 114, 121, 95, 116, 111, 95, 116, - 101, 114, 109, 100, 0, 5, 97, 112, 112, 108, 121, 97, 3, 106>>, + + Bin = erlang:term_to_binary(T), + [Fun2, Fun3] = binary_to_term(Bin), true = is_function(Fun2), true = is_function(Fun3), @@ -337,3 +390,11 @@ expect_badarg(Fun) -> _:badarg -> ok end. + +get_otp_version() -> + case erlang:system_info(machine) of + "BEAM" -> + list_to_integer(erlang:system_info(otp_release)); + _ -> + atomvm + end. diff --git a/tests/erlang_tests/test_gc.erl b/tests/erlang_tests/test_gc.erl index ad35707d20..84d652da27 100644 --- a/tests/erlang_tests/test_gc.erl +++ b/tests/erlang_tests/test_gc.erl @@ -26,7 +26,7 @@ start() -> {HeapSize, _} = make_a_big_heap(), MemorySize = erlang:process_info(self(), memory), true = erlang:garbage_collect(), - NewHeapSize = erlang:process_info(self(), heap_size), + {heap_size, NewHeapSize} = erlang:process_info(self(), heap_size), ok = case NewHeapSize < HeapSize of true -> ok; @@ -42,7 +42,7 @@ start() -> make_a_big_heap() -> LargeBlob = create_string(1024, []), - HeapSize = erlang:process_info(self(), heap_size), + {heap_size, HeapSize} = erlang:process_info(self(), heap_size), {HeapSize, length(LargeBlob)}. create_string(0, Accum) ->