diff --git a/include/UtfView/to_utf_view.hpp b/include/UtfView/to_utf_view.hpp index 05f33e9..f48188c 100644 --- a/include/UtfView/to_utf_view.hpp +++ b/include/UtfView/to_utf_view.hpp @@ -1102,19 +1102,15 @@ to_utf32_view(R&&) -> to_utf32_view>; /* !PAPER */ -template -using to_utf_view = std::conditional_t< - std::is_same_v, to_utf8_view, - std::conditional_t< - std::is_same_v, to_utf16_view, - std::conditional_t, to_utf32_view, void>>>; - -/* PAPER: template<@*code-unit-to*@ ToType, @*utf-range*@ V> */ -/* PAPER: using to_utf_view = @*unspecified*@ */ -/* PAPER: */ - namespace detail { + template + using to_utf_view = std::conditional_t< + std::is_same_v, to_utf8_view, + std::conditional_t< + std::is_same_v, to_utf16_view, + std::conditional_t, to_utf32_view, void>>>; + template struct to_utf_impl : std::ranges::range_adaptor_closure> { template diff --git a/paper/P2728.md b/paper/P2728.md index b8d2799..e2a252c 100644 --- a/paper/P2728.md +++ b/paper/P2728.md @@ -87,9 +87,9 @@ monofont: "DejaVu Sans Mono" as an implementation detail of separate `to_utf8_view`, `to_utf16_view`, and `to_utf32_view` classes, addressing broken deduction guides in the previous revision. -- Remove `project_view` and copy most of its implementation into separate `char8_view`, - `char16_view`, and `char32_view` classes, addressing broken deduction guides in the - previous revision. +- Remove `project_view` and copy most of its implementation into separate + `char8_view`, `char16_view`, and `char32_view` classes, addressing broken + deduction guides in the previous revision. - Change `utf_iterator` to an exposition-only member class of `@*utf-view-impl*@`. - Eliminate iterator unpacking mechanism and replace it with an alternative @@ -3236,18 +3236,18 @@ subsequent operations on it and its iterators are memory safe. ## Optimization for transcoding views wrapping other transcoding views -In generic contexts, users will create `to_utf_view`s wrapping iterators of -other `to_utf_view`s. This presents a problem for a naive implementation -because when `to_utf_view` is wrapping a bidirectional range, the number of -iterators in each successive `to_utf_view` wrapper increases geometrically +In generic contexts, users will create `to_utfN_view`s wrapping iterators of +other `to_utfN_view`s. This presents a problem for a naive implementation +because when `to_utfN_view` is wrapping a bidirectional range, the number of +iterators in each successive `to_utfN_view` wrapper increases geometrically unless we use workarounds. -The workaround makes it so that when a `to_utf_view` is constructed from -another `to_utf_view`'s iterators, instead of storing those iterators in the -iterators of the outer `to_utf_view`, the outer `to_utf_view`'s iterators have -identical contents to the inner `to_utf_view`'s iterators, the only difference -being the output encoding. This also allows the outer `to_utf_view`'s -iterators to reconstruct the inner `to_utf_view` iterator when its `base()` +The workaround makes it so that when a `to_utfN_view` is constructed from +another `to_utfN_view`'s iterators, instead of storing those iterators in the +iterators of the outer `to_utfN_view`, the outer `to_utfN_view`'s iterators have +identical contents to the inner `to_utfN_view`'s iterators, the only difference +being the output encoding. This also allows the outer `to_utfN_view`'s +iterators to reconstruct the inner `to_utfN_view` iterator when its `base()` member function is invoked, without actually storing it. This optimization is only needed when the underlying range is bidirectional @@ -5211,9 +5211,6 @@ namespace std::uc { template to_utf32_view(R&&) -> to_utf32_view>; - template<@*code-unit-to*@ ToType, @*utf-range*@ V> - using to_utf_view = @*unspecified*@ - template<@*code-unit-to*@ ToType> inline constexpr @*unspecified*@ to_utf; @@ -5367,9 +5364,7 @@ units below are inclusive.) `to_utf8_view` produces a UTF-8 view of the elements from a `@*utf-range*@`. `to_utf16_view` produces a UTF-16 view of the elements from a `@*utf-range*@`. `to_utf32_view` produces a UTF-32 view of the elements from a -`@*utf-range*@`. `to_utf_view` is an alias template that yields -`to_utf8_view` if `ToType` is `char8_t`, `to_utf16_view` if `ToType` is -`char16_t`, or `to_utf32_view` if `ToType` is `char32_t`. +`@*utf-range*@`. The names `to_utf8`, `to_utf16`, and `to_utf32` denote range adaptor objects ([range.adaptor.object]). `to_utf` denotes a range adaptor object @@ -5816,129 +5811,32 @@ ill-formed. The expression `as_charN_t(E)` is expression-equivalent to: \[Example 1: ```c++ -// todo +std::vector path_as_ints = {U'C', U':', U'\x00010000'}; +std::filesystem::path path = path_as_ints | as_char32_t | std::ranges::to(); +auto const& native_path = path.native(); +if (native_path != std::wstring{L'C', L':', L'\xD800', L'\xDC00'}) { + return false; +} ``` — end example\] -### Why there are three `to_utfN_view`s views plus `utf_view` +### Why there are three `to_utfN_view`s views plus `utf_view`, and three `as_charN_t_view`s The views in `std::ranges` are constrained to accept only `std::ranges::view` template parameters. However, they accept `std::ranges::viewable_range`s in -practice, because they each have a deduction guide that likes like this: +practice, because they each have a deduction guide that looks like this: ```c++ template to_utf8_view(R &&) -> to_utf8_view>; ``` -It's not possible to make this work for `utf_view`, since to use it you must -supply a `format` NTTP. So, we need the `to_utfN_view`s. It might be possible -to make `utf_view` an exposition-only implementation detail, but I think some -users might find use for it, especially in generic contexts. For instance: - -```c++ -template -auto f(std::uc::utf_view const & view) { - // Use F, V, and view here.... -} -``` - -### `unpacking_view` - -For a particular `V` being adapted by `to_utfN`, there are two cases: 1) `V` -is unpackable (taht is, unpacking produces different iterator/sentinel types -than what you had before unpacking), and 2) `V` is not unpackable. The second -case is easy; since `V` is already unpacked, you just construct a `to_utfN_view` -from `V`, and you're done. The first case is a little harder. For that case, -we either need to let `to_utfN_view` know statically that it must do some -unpacking, or we must introduce yet another view that does it for us. -Introducing a view is the right answer, because introducing an NTTP to -`to_utfN_view` would be unergonomic. For instance: - -```c++ -template -void f(std::uc::utf32_view const & utf32) { - // ... -} -``` - -What do we write for the `/* ??? */` -- the NTTP that indicates whether `V` is -already unpacked or not? We have to do a nontrivial amount of work involving -`V` to know what to write there. - -So, we have `unpacking_view` instead. When `V` is unpackable, `to_utfN` -returns a `to_utfN_view>`. - -In the previous revision of this paper, the `to_utfN` adaptor unpacked the -adapted range most of the time, except for the one case where it could not. -That case was when `r` in `r | to_utfN` is an rvalue whose `begin()` and -`end()` are unpackable. In that case, we needed a special-case type called -`unpacking_owning_view` that would store `r` and unpack `r.begin()` and -`r.end()`. This is not ideal, because doing the unpacking in the adaptor -loses information. It loses information because the unpacked view used to -construct `to_utfN_view` is a `ranges::subrange`, not the original range. For -example, if you start with an lvalue `vector`, then keeping -`unpacking_view>>` means that you can get access to the -`vector` itself with a chain of `base()` calls. You lose that if it's a -`subrange::iterator, typename vector::iterator>`. - -### Adaptor examples - -```c++ -struct my_text_type -{ - my_text_type() = default; - my_text_type(std::u8string utf8) : utf8_(std::move(utf8)) {} - - auto begin() const { - return std::uc::utf_8_to_32_iterator( - utf8_.begin(), utf8_.begin(), utf8_.end()); - } - auto end() const { - return std::uc::utf_8_to_32_iterator( - utf8_.begin(), utf8_.end(), utf8_.end()); - } - -private: - std::u8string utf8_; -}; - -static_assert(std::is_same_v< - decltype(my_text_type(u8"text") | std::uc::to_utf16), - std::uc::utf16_view>>>); - -static_assert(std::is_same_v< - decltype(u8"text" | std::uc::to_utf16), - std::uc::utf16_view>>); - -static_assert(std::is_same_v< - decltype(std::u8string(u8"text") | std::uc::to_utf16), - std::uc::utf16_view>>); - -std::u8string const str = u8"text"; - -static_assert(std::is_same_v< - decltype(str | std::uc::to_utf16), - std::uc::utf16_view>>); - -static_assert(std::is_same_v< - decltype(str.c_str() | std::uc::to_utf16), - std::uc::utf16_view>>); - -static_assert(std::is_same_v< - decltype(std::ranges::empty_view{} | std::uc::as_char16_t), - std::ranges::empty_view>); - -std::u16string str2 = u"text"; - -static_assert(std::is_same_v< - decltype(str2 | std::uc::to_utf16), - std::uc::utf16_view>>); - -static_assert(std::is_same_v< - decltype(str2.c_str() | std::uc::to_utf16), - std::uc::utf16_view>>); -``` +It's not possible to make this work for any view that's a template class that +accepts a template parameter other than the underlying view, because of the +all-or-nothing nature of deduction guides. So we need separate `to_utfN_view`s +and separate `as_charN_t_view`s instead of having them simply be alias +templates for a hypothetical generic `to_utf_view` or +`as_charN_t_view`, respectively. ### Why `utf_view` always uses `utf_iterator`, even in UTF-N to UTF-N cases