From a44111ec5fc81199baf4d4ecca5c0dff4914cb8a Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 21 Jan 2025 23:13:32 +0100 Subject: [PATCH] Fix memory corruption caused by `binary:split/2,3` Signed-off-by: Paul Guyot --- CHANGELOG.md | 1 + src/libAtomVM/nifs.c | 6 +- src/libAtomVM/term.h | 2 +- tests/erlang_tests/test_binary_split.erl | 90 +++++++++++++++++------- tests/test.c | 2 +- 5 files changed, 71 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3efa4fb..83fc96448 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ certain VM instructions are used. - Fixed a double free when esp32 uart driver was closed, yielding an assert abort - Fixed compilation with latest debian gcc-arm-none-eabi - Fix `network:stop/0` on ESP32 so the network can be started again +- Fix a memory corruption caused by `binary:split/2,3` ## [0.6.5] - 2024-10-15 diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 0a185988f..545564bd6 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -3121,15 +3121,19 @@ static term nif_binary_split(Context *ctx, int argc, term argv[]) size_t num_segments = 1; const char *temp_bin_data = bin_data; int temp_bin_size = bin_size; + size_t heap_size = 0; do { const char *found = (const char *) memmem(temp_bin_data, temp_bin_size, pattern_data, pattern_size); if (!found) break; num_segments++; + heap_size += CONS_SIZE + term_sub_binary_heap_size(argv[0], found - temp_bin_data); int next_search_offset = found - temp_bin_data + pattern_size; temp_bin_data += next_search_offset; temp_bin_size -= next_search_offset; } while (global && temp_bin_size >= pattern_size); + heap_size += CONS_SIZE + term_sub_binary_heap_size(argv[0], temp_bin_size); + term result_list = term_nil(); if (num_segments == 1) { @@ -3142,7 +3146,7 @@ static term nif_binary_split(Context *ctx, int argc, term argv[]) } // binary:split/2,3 always return sub binaries, except when copied binaries are as small as sub-binaries. - if (UNLIKELY(memory_ensure_free_with_roots(ctx, LIST_SIZE(num_segments, TERM_BOXED_SUB_BINARY_SIZE), 2, argv, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { + if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_size, 2, argv, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { RAISE_ERROR(OUT_OF_MEMORY_ATOM); } diff --git a/src/libAtomVM/term.h b/src/libAtomVM/term.h index cecad50c5..283e14e0a 100644 --- a/src/libAtomVM/term.h +++ b/src/libAtomVM/term.h @@ -1068,7 +1068,7 @@ static inline term term_from_literal_binary(const void *data, size_t size, Heap */ static inline size_t term_sub_binary_heap_size(term binary, size_t len) { - if (term_is_refc_binary(binary) && len >= SUB_BINARY_MIN) { + if ((term_is_refc_binary(binary) || term_is_sub_binary(binary)) && len >= SUB_BINARY_MIN) { return TERM_BOXED_SUB_BINARY_SIZE; } else { return term_binary_heap_size(len); diff --git a/tests/erlang_tests/test_binary_split.erl b/tests/erlang_tests/test_binary_split.erl index 079ff5046..0bb9eab73 100644 --- a/tests/erlang_tests/test_binary_split.erl +++ b/tests/erlang_tests/test_binary_split.erl @@ -20,20 +20,25 @@ -module(test_binary_split). --export([start/0, split_compare/3, split_compare/2, compare_bin/2, fail_split/1]). +-export([start/0, split_compare/3, split_compare/2, compare_bin/2, fail_split/1, id/1]). start() -> - split_compare(<<"Hello:World">>, <<"Hello">>, <<"World">>) + - split_compare(<<"Hello:::World:">>, <<"Hello">>, <<"::World:">>) + - split_compare(<<"Test:">>, <<"Test">>, <<>>) + - split_compare(<<":">>, <<>>, <<>>) + - split_compare(<<>>, <<>>) + - split_compare(<<"Test">>, <<>>) + - split_compare2(<<"Test">>, <<>>) + - split_compare2(<<"helloSEPARATORworld">>, <<"hello">>, <<"world">>) + - fail_split(<<>>) + - fail_split({1, 2}) + - fail_split2({1, 2}). + ok = split_compare(<<"Hello:World">>, <<"Hello">>, <<"World">>), + ok = split_compare(<<"Hello:::World:">>, <<"Hello">>, <<"::World:">>), + ok = split_compare(<<"Test:">>, <<"Test">>, <<>>), + ok = split_compare(<<":">>, <<>>, <<>>), + ok = split_compare(<<>>, <<>>), + ok = split_compare(<<"Test">>, <<>>), + ok = split_compare2(<<"Test">>, <<>>), + ok = split_compare2(<<"helloSEPARATORworld">>, <<"hello">>, <<"world">>), + ok = fail_split(<<>>), + ok = fail_split({1, 2}), + ok = fail_split2({1, 2}), + case erlang:system_info(machine) of + "BEAM" -> ok; + "ATOM" -> ok = memory_allocation_split() + end, + 0. split_compare(Bin, Part1) -> [A] = binary:split(Bin, <<":">>), @@ -41,7 +46,8 @@ split_compare(Bin, Part1) -> split_compare(Bin, Part1, Part2) -> [A, B] = binary:split(Bin, <<":">>), - compare_bin(Part1, A) + compare_bin(B, Part2). + ok = compare_bin(Part1, A), + ok = compare_bin(B, Part2). split_compare2(Bin, Part1) -> [A] = binary:split(Bin, <<"SEPARATOR">>), @@ -49,34 +55,64 @@ split_compare2(Bin, Part1) -> split_compare2(Bin, Part1, Part2) -> [A, B] = binary:split(Bin, <<"SEPARATOR">>), - compare_bin(Part1, A) + compare_bin(B, Part2). + ok = compare_bin(Part1, A), + ok = compare_bin(B, Part2). compare_bin(Bin1, Bin2) -> compare_bin(Bin1, Bin2, byte_size(Bin1) - 1). compare_bin(_Bin1, _Bin2, -1) -> - 1; + ok; compare_bin(Bin1, Bin2, Index) -> B1 = binary:at(Bin1, Index), - case binary:at(Bin2, Index) of - B1 -> - compare_bin(Bin1, Bin2, Index - 1); - _Any -> - 0 - end. + B1 = binary:at(Bin2, Index), + compare_bin(Bin1, Bin2, Index - 1). fail_split(Separator) -> try binary:split(<<"TESTBIN">>, Separator) of - _Any -> 2000 + _Any -> {unexpected, _Any} catch - error:badarg -> 1; - _:_ -> 4000 + error:badarg -> ok; + T:V -> {unexpected, {T, V}} end. fail_split2(Bin) -> try binary:split(Bin, <<"TESTSEPARATOR">>) of - _Any -> 2000 + _Any -> {unxpected, _Any} catch - error:badarg -> 1; - _:_ -> 4000 + error:badarg -> ok; + T:V -> {unxpected, {T, V}} end. + +memory_allocation_split() -> + Parent = self(), + Hostname = <<"atomvm">>, + {Pid, MonitorRef} = spawn_opt( + fun() -> + % Carefully designed lists to generate a crash on unix 64 bits + % This binary is 63 bytes long, so it's stored on heap on 64 bits + % binary:split should allocate sufficient bytes as subbinaries + % have to be on heap as well + HeapBin = list_to_binary([ + id(Hostname), <<"@atomvms3.object.stream.atomvms3.object.stream.atomvms3.o">> + ]), + List1 = binary:split(HeapBin, <<"@">>, [global]), + Parent ! {self(), List1} + end, + [link, monitor, {atomvm_heap_growth, minimum}] + ), + ok = + receive + {Pid, List1} -> + 2 = length(List1), + ok + after 5000 -> timeout + end, + normal = + receive + {'DOWN', MonitorRef, process, Pid, Reason} -> Reason + after 5000 -> timeout + end, + ok. + +id(X) -> X. diff --git a/tests/test.c b/tests/test.c index 0a3f372d8..dd1bf24a0 100644 --- a/tests/test.c +++ b/tests/test.c @@ -318,7 +318,7 @@ struct Test tests[] = { TEST_CASE(test_unicode), TEST_CASE_EXPECTED(test_binary_part, 12), - TEST_CASE_EXPECTED(test_binary_split, 16), + TEST_CASE(test_binary_split), TEST_CASE_COND(plusone, 134217728, LONG_MAX != 9223372036854775807),