From 43693b8052ebfc707a4a6258c2057cb32544fce5 Mon Sep 17 00:00:00 2001 From: Edward Nolan Date: Sat, 21 Sep 2024 17:29:36 -0400 Subject: [PATCH] utfview conditional borrowing --- include/UtfView/to_utf_view.hpp | 25 +++++++++++ paper/P2728.md | 64 +++++++++++++++++++---------- src/UtfView/tests/to_utf_view.t.cpp | 4 ++ 3 files changed, 71 insertions(+), 22 deletions(-) diff --git a/include/UtfView/to_utf_view.hpp b/include/UtfView/to_utf_view.hpp index a6a0add..05f33e9 100644 --- a/include/UtfView/to_utf_view.hpp +++ b/include/UtfView/to_utf_view.hpp @@ -1162,4 +1162,29 @@ inline constexpr detail::to_utf_impl to_utf32; /* PAPER: } */ +template +inline constexpr bool std::ranges::enable_borrowed_range> = + std::ranges::enable_borrowed_range; + +template +inline constexpr bool std::ranges::enable_borrowed_range> = + std::ranges::enable_borrowed_range; + +template +inline constexpr bool std::ranges::enable_borrowed_range> = + std::ranges::enable_borrowed_range; + +/* PAPER: namespace std::ranges { */ +/* PAPER: */ +/* PAPER: template */ +/* PAPER: inline constexpr bool enable_borrowed_range> = enable_borrowed_range; */ +/* PAPER: */ +/* PAPER: template */ +/* PAPER: inline constexpr bool enable_borrowed_range> = enable_borrowed_range; */ +/* PAPER: */ +/* PAPER: template */ +/* PAPER: inline constexpr bool enable_borrowed_range> = enable_borrowed_range; */ +/* PAPER: */ +/* PAPER: } */ + #endif // UTFVIEW_TO_UTF_VIEW_HPP diff --git a/paper/P2728.md b/paper/P2728.md index 3fab3f5..c139f70 100644 --- a/paper/P2728.md +++ b/paper/P2728.md @@ -278,25 +278,8 @@ In short, rejecting `char` and `wchar_t` forces you to write "`| as_char8_t`" everywhere you want to use a `std::string` with the interfaces proposed in this paper. -SG-16 has previously expressed support for rejecting `char` and `wchar_t`. -[Here](https://github.com/sg16-unicode/sg16-meetings/blob/master/README-2023.md#april-12th-2023) -is the relevant SG-16 poll: - -*UTF transcoding interfaces provided by the C++ standard library should -operate on charN_t types, with support for other types provided by adapters, -possibly with a special case for char and wchar_t when their associated -literal encodings are UTF.* - -+----+---+---+---+----+ -| SF | F | N | A | SA | -+====+===+===+===+====+ -| 6 |1 |0 |0 | 1 | -+----+---+---+---+----+ - -(This paper ignores the "possibly with a special case for char and -wchar_t when their associated literal encodings are UTF" part. Making the -evaluation of a concept change based on the literal encoding seems like a -flaky move; the literal encoding can change TU to TU.) +SG-16 has previously expressed strong support for rejecting `char` and +`wchar_t`, as can be observed in the polling history section. The feeling in SG-16 was that the `charN_t` types are designed to represent UTF encodings, and `char` is not. A `char const *` string could be in any one @@ -5241,6 +5224,19 @@ namespace std::uc { inline constexpr @*unspecified*@ to_utf32; } + +namespace std::ranges { + + template + inline constexpr bool enable_borrowed_range> = enable_borrowed_range; + + template + inline constexpr bool enable_borrowed_range> = enable_borrowed_range; + + template + inline constexpr bool enable_borrowed_range> = enable_borrowed_range; + +} ``` The exposition-only concept `@*to-utf-view-iterator-optimizable*@` is true if @@ -5255,7 +5251,7 @@ down. The iterator type of `@*to-utf-view-impl*@` is `@*utf-iterator*@`. `@*utf-iterator*@` is an iterator that transcodes from UTF-N to UTF-M, where N and M are each one of 8, 16, or 32. N may equal -M. +M. `@*utf-iterator*@` uses a mapping between character types and UTF encodings, which is that that `char` and `char8_t` correspond to UTF-8, `char16_t` @@ -5393,10 +5389,10 @@ expression-equivalent to: - Otherwise, if `T` is an array type of known bound, then: - - If the array extent is nonzero and the last element of the array is zero, + - If the array extent is nonzero and the last element of the array is zero, then `V(std::ranges::subrange(std::ranges::begin(E), --std::ranges::end(E)))` - - Otherwise, + - Otherwise, `V(std::ranges::subrange(std::ranges::begin(E), std::ranges::end(E)))` - Otherwise, `V(std::views::all(E))` @@ -5915,6 +5911,30 @@ exception of `empty_view{} | to_utfN`, the following are always true: Add the feature test macro `__cpp_lib_unicode_transcoding`. +## Relevant Polls/Minutes + +TODO + +[Here](https://github.com/sg16-unicode/sg16-meetings/blob/master/README-2023.md#april-12th-2023) +is the relevant SG-16 poll: + +*UTF transcoding interfaces provided by the C++ standard library should +operate on charN_t types, with support for other types provided by adapters, +possibly with a special case for char and wchar_t when their associated +literal encodings are UTF.* + ++----+---+---+---+----+ +| SF | F | N | A | SA | ++====+===+===+===+====+ +| 6 |1 |0 |0 | 1 | ++----+---+---+---+----+ + +(This paper ignores the "possibly with a special case for char and +wchar_t when their associated literal encodings are UTF" part. Making the +evaluation of a concept change based on the literal encoding seems like a +flaky move; the literal encoding can change TU to TU.) + + ## Design notes None of the proposed interfaces is subject to change in future versions of diff --git a/src/UtfView/tests/to_utf_view.t.cpp b/src/UtfView/tests/to_utf_view.t.cpp index 7bbf1c9..d998066 100644 --- a/src/UtfView/tests/to_utf_view.t.cpp +++ b/src/UtfView/tests/to_utf_view.t.cpp @@ -1140,6 +1140,10 @@ constexpr bool empty_test() { return true; } +static_assert(std::ranges::borrowed_range>); +static_assert(std::ranges::borrowed_range>); +static_assert(std::ranges::borrowed_range>); + CONSTEXPR_UNLESS_MSVC bool utf_view_test() { if (!input_iterator_test(std::initializer_list{u8'x'})) { return false;