diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..e0b0428
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,22 @@
+BasedOnStyle: WebKit
+ColumnLimit: 90
+AlwaysBreakTemplateDeclarations: Yes
+AlignAfterOpenBracket: Align
+AllowShortFunctionsOnASingleLine: Empty
+BreakConstructorInitializers: BeforeColon
+PackConstructorInitializers: Never
+Cpp11BracedListStyle: True
+SpaceBeforeCpp11BracedList: False
+BreakBeforeBraces: Custom
+BraceWrapping:
+ AfterFunction: False
+AllowShortEnumsOnASingleLine: False
+BreakStringLiterals: False
+AlwaysBreakAfterReturnType: None
+PenaltyReturnTypeOnItsOwnLine: 200
+ContinuationIndentWidth: 4
+IndentWidth: 2
+BreakBeforeBinaryOperators: None
+IndentAccessModifiers: False
+AccessModifierOffset: -2
+ReflowComments: false
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..301fc68
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,58 @@
+name: CI
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ gcc-linux:
+ runs-on: ubuntu-latest
+ container:
+ image: ghcr.io/ednolan/ubuntu24.10_gcc14.2.0
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ - name: Run
+ run: |
+ ./ci.sh -DCMAKE_CXX_FLAGS='-fsanitize=address -fsanitize=undefined -fsanitize-undefined-trap-on-error -coverage'
+ - name: Paper
+ run: |
+ cmake --build ./build -t p2728
+ - name: Coverage
+ run: |
+ lcov --directory ./build --capture --output-file ./build/coverage_all.info
+ lcov --remove ./build/coverage_all.info -o ./build/coverage.info '/usr/include/*' "$PWD/src/UtfView/tests/*" "$PWD/deps/*"
+ - name: Coveralls
+ uses: coverallsapp/github-action@master
+ with:
+ path-to-lcov: ${{runner.workspace}}/UtfView/build/coverage.info
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ clang-linux:
+ runs-on: ubuntu-latest
+ container:
+ image: ghcr.io/ednolan/ubuntu24.10_clang19.1.0rc3
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ - name: Run
+ run: |
+ ./ci.sh -DCMAKE_CXX_FLAGS='-fsanitize=address -fsanitize=undefined -fsanitize-undefined-trap-on-error -stdlib=libc++'
+ msvc-windows:
+ runs-on: windows-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ - name: Run
+ shell: bash
+ run: |
+ ./ci.sh
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..567609b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+build/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..0da792a
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,12 @@
+[submodule "deps/stl_interfaces"]
+ path = deps/stl_interfaces
+ url = https://github.com/ednolan/stl_interfaces.git
+[submodule "deps/assert"]
+ path = deps/assert
+ url = https://github.com/boostorg/assert.git
+[submodule "deps/config"]
+ path = deps/config
+ url = https://github.com/boostorg/config.git
+[submodule "deps/wg21"]
+ path = deps/wg21
+ url = https://github.com/mpark/wg21.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..42fa7cb
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-3.0-only
+
+cmake_minimum_required(VERSION 3.27)
+project(utfview CXX)
+
+if (BUILD_TESTING)
+ include(CTest)
+endif()
+
+if (NOT TARGET boost_config)
+ add_subdirectory(deps/config)
+endif()
+
+if (NOT TARGET boost_assert)
+ add_subdirectory(deps/assert)
+endif()
+
+if (NOT TARGET boost_stl_interfaces)
+ add_definitions(-DBOOST_STL_INTERFACES_ENABLE_DEDUCED_THIS)
+ add_subdirectory(deps/stl_interfaces)
+else()
+ message(FATAL_ERROR "Conflicting dependency: boost_stl_interfaces already exists, but we need our own fork")
+endif()
+
+add_subdirectory(src/UtfView)
+
+add_subdirectory(paper)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..94a9ed0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/P2728_unicode_1_transcoding.md b/P2728_unicode_1_transcoding.md
deleted file mode 100644
index 2d53194..0000000
--- a/P2728_unicode_1_transcoding.md
+++ /dev/null
@@ -1,1873 +0,0 @@
----
-title: "Unicode in the Library, Part 1: UTF Transcoding"
-document: P2728R6
-date: 2023-07-11
-audience:
- - SG-16 Unicode
- - LEWG
-author:
- - name: Zach Laine
- email:
-toc: true
-monofont: "DejaVu Sans Mono"
-
----
-
-# Changelog
-
-## Changes since R0
-
-- When naming code points in interfaces, use `char32_t`.
-- When naming code units in interfaces, use `charN_t`.
-- Remove each eager algorithm, leaving in its corresponding view.
-- Remove all the output iterators.
-- Change template parameters to `utfN_view` to the types of the from-range,
- instead of the types of the transcoding iterators used to implement the view.
-- Remove all make-functions.
-- Replace the misbegotten `as_utfN()` functions with the `as_utfN` view
- adaptors that should have been there all along.
-- Add missing `transcoding_error_handler` concept.
-- Turn `unpack_iterator_and_sentinel` into a CPO.
-- Lower the UTF iterator concepts from bidirectional to input.
-
-## Changes since R1
-
-- Reintroduce the transcoding-from-a-buffer example.
-- Generalize `null_sentinel_t` to a non-Unicode-specific facility.
-- In utility functions that search for ill-formed encoding, take a range
- argument instead of a pair of iterator arguments.
-- Replace `utf{8,16,32}_view` with a single `utf_view`.
-
-## Changes since R2
-
-- Add `noexcept` where appropriate.
-- Remove non-essential constants and utility functions, and elaborate on the
- usage of the ones that remain.
-- Note differences from similar elements proposed in [@P1629R1].
-- Extend the examples slightly.
-- Correct an error in the description of the view adaptors' semantics, and
- provide several examples of their use.
-
-## Changes since R3
-
-- Changed the definition of the `code_unit` concept, and added `as_charN_t`
- adaptors.
-- Removed the utility functions and Unicode-related constants, except
- `replacement_character`.
-- Changed the constraint on `utf_iterator` slightly.
-- Change `null_sentinel_t` back to being Unicode-specific.
-
-## Changes since R4
-
-- Replace `unpacking_owning_view` with `unpacking_view`, and use it to do
- unpacking, rather than sometimes doing the unpacking in the adaptor.
-- Ensure `const` and non-`const` overloads for `begin` and `end` in all views.
-- Move `null_sentinel_t` to `std`, remove its `base` member function, and make
- it useful for more than just pointers, based on SG-9 guidance.
-
-## Changes since R5
-
-- Simplify the complicated constraint on the compariason operator for
- `null_sentinel_t`.
-- Introduce `ranges::project_view`, and inplement `charN_view`s in terms of
- that.
-- Convert the `utfN_view`s to aliases, rather than individual classes.
-
-# Motivation
-
-Unicode is important to many, many users in everyday software. It is not
-exotic or weird. Well, it's weird, but it's not weird to see it used. C and
-C++ are the only major production languages with essentially no support for
-Unicode.
-
-Let's fix.
-
-To fix, first we start with the most basic representations of strings in
-Unicode: UTF. You might get a UTF string from anywhere; on Windows you often
-get them from the OS, in UTF-16. In web-adjacent applications, strings are
-most commonly in UTF-8. In ASCII-only applications, everything is in UTF-8,
-by its definition as a superset of ASCII.
-
-Often, an application needs to switch between UTFs: 8 -> 16, 32 -> 16, etc.
-In SG-16 we've taken to calling such UTF-N -> UTF-M operations "transcoding".
-
-I'm proposing interfaces to do transcoding that meet certain design
-requirements that I think are important; I hope you'll agree:
-
-- Ranges are the future. We should have range-friendly ways of doing
- transcoding. This includes support for sentinels and lazy views.
-- Iterators are the present. We should support generic programming, whether
- it is done in terms of pointers, a particular iterator, or an iterator type
- specified as a template parameter.
-- A null-terminated string should not be treated as a special case. The
- ubiquity of such strings means that they should be treated as first-class
- strings.
-- It is common to want to view the same text as code points and code units at
- different times. It is therefore important that transcoding iterators have
- a convenient way to access the underlying sequence of code units being
- transcoded.
-- Memory safety is important. Ensuring that the Unicode part of the standard
- library is as memory safe as possible should be a priority.
-
-## A note about P1629
-
-[@P1629R1] from JeanHeyd Meneide is a much more ambitious proposal that aims
-to standardize a general-purpose text encoding conversion mechanism. This
-proposal is not at odds with P1629; the two proposals have largely orthogonal
-aims. This proposal only concerns itself with UTF interconversions, which is
-all that is required for Unicode support. P1629 is concerned with those
-conversions, plus a lot more. Accepting both proposals would not cause
-problems; in fact, the APIs proposed here could be used to implement parts of
-the P1629 design.
-
-There are some differences between the way that the transcode views and
-iterators from [@P1629R1] work and the transcoding view and iterators from
-this paper work. First, `std::text::transcode_view` has no direct support for
-null-terminated strings. Second, it does not do the unpacking described in
-this paper. Third, it is not printable and streamable.
-
-# The shortest Unicode primer imaginable
-
-There are multiple encoding types defined in Unicode: UTF-8, UTF-16, and
-UTF-32.
-
-A *code unit* is the lowest-level datum-type in your Unicode data. Examples
-are a `char8_t` in UTF-8 and a `char32_t` in UTF-32.
-
-A *code point* is a 32-bit integral value that represents a single Unicode
-value. Examples are U+0041 "A" "LATIN CAPITAL LETTER A" and U+0308 "ยจ"
-"COMBINING DIAERESIS".
-
-A code point may be consist of multiple code units. For instance, 3 UTF-8
-code units in sequence may encode a particular code point.
-
-# A few examples
-
-## Case 1: Adapt to an existing range interface taking a different UTF
-
-In this case, we have a generic range interface to transcode into, so we use a
-transcoding view.
-
-```cpp
-// A generic function that accepts sequences of UTF-16.
-template
-void process_input(R r);
-void process_input_again(std::uc::utf_view> r);
-
-std::u8string input = get_utf8_input();
-auto input_utf16 = input | std::uc::as_utf16;
-
-process_input(input_utf16);
-process_input_again(input_utf16);
-```
-
-## Case 2: Adapt to an existing iterator interface taking a different UTF
-
-This time, we have a generic iterator interface we want to transcode into, so
-we want to use the transcoding iterators.
-
-```cpp
-// A generic function that accepts sequences of UTF-16.
-template
-void process_input(I first, I last);
-
-std::u8string input = get_utf8_input();
-
-process_input(
- std::uc::utf_iterator(
- input.begin(), input.begin(), input.end()),
- std::uc::utf_iterator(
- input.begin(), input.end(), input.end()));
-
-// Even more conveniently:
-auto const utf16_view = input | std::uc::as_utf16;
-process_input(utf16_view.begin(), utf16.end());
-```
-
-## Case 3: Adapt a range of non-character-type values
-
-Let's say that we want to take code points that we got from ICU, and transcode
-them to UTF-8. The problem is that ICU's code point type is `int`. Since
-`int` is not a character type, it's not deduced by `as_utf8` to be UTF-32
-data.
-
-```cpp
-// A generic function that accepts sequences of UTF-16.
-template
-void process_input(R r);
-
-std::vector input = get_icu_code_points();
-// This is ill-formed without the as_char32_t adaptation.
-auto input_utf8 = input | std::uc::as_char32_t | std::uc::as_utf8;
-
-process_input(input_utf8);
-```
-
-## Case 4: Print the results of transcoding
-
-Text processing is pretty useless without I/O. All of the Unicode algorithms
-operate on code points, and so the output of any of those algorithms will be
-in code points/UTF-32. It should be easy to print the results to a
-`std::ostream`, to a `std::wostream` on Windows, or using `std::format` and
-`std::print`. `utf_view` is therefore printable and streamable.
-
-```c++
-void double_print(char32_t const * str)
-{
- auto utf8 = str | std::uc::as_utf8;
- std::print("{}", utf8);
- std::cerr << utf8;
-}
-```
-
-# Proposed design
-
-## Dependencies
-
-This proposal depends on the existence of
-[P2727](https://isocpp.org/files/papers/P2727R0.html)
-"std::iterator_interface".
-
-## Add concepts that describe parameters to transcoding APIs
-
-The macro `CODE_UNIT_CONCEPT_OPTION_2` is used below to indicate the two
-options for how to define `code_unit`. See below for a description of the two
-options.
-
-```cpp
-namespace std::uc {
-
- enum class format { utf8 = 1, utf16 = 2, utf32 = 4 };
-
- inline constexpr format @*wchar-t-format*@ = @*see below*@; // @*exposition only*@
-
- template
- concept code_unit = (same_as && F == format::utf8) ||
- (same_as && F == format::utf16) ||
- (same_as && F == format::utf32)
-#if CODE_UNIT_CONCEPT_OPTION_2
- || (same_as && F == format::utf8)
- || (same_as && F == @*wchar-t-format*@)
-#endif
- ;
-
- template
- concept utf8_code_unit = code_unit;
-
- template
- concept utf16_code_unit = code_unit;
-
- template
- concept utf32_code_unit = code_unit;
-
- template
- concept utf_code_unit = utf8_code_unit || utf16_code_unit || utf32_code_unit;
-
- template
- concept code_unit_iter =
- input_iterator && code_unit, F>;
- template
- concept code_unit_pointer =
- is_pointer_v && code_unit, F>;
- template
- concept code_unit_range = ranges::input_range &&
- code_unit, F>;
-
- template
- concept utf8_iter = code_unit_iter;
- template
- concept utf8_pointer = code_unit_pointer;
- template
- concept utf8_range = code_unit_range;
-
- template
- concept utf16_iter = code_unit_iter;
- template
- concept utf16_pointer = code_unit_pointer;
- template
- concept utf16_range = code_unit_range;
-
- template
- concept utf32_iter = code_unit_iter;
- template
- concept utf32_pointer = code_unit_pointer;
- template
- concept utf32_range = code_unit_range;
-
- template
- concept utf_iter = utf8_iter || utf16_iter || utf32_iter;
- template
- concept utf_pointer = utf8_pointer || utf16_pointer || utf32_pointer;
- template
- concept utf_range = utf8_range || utf16_range || utf32_range;
-
- template
- concept utf_range_like =
- utf_range> || utf_pointer>;
-
- template
- concept utf8_input_range_like =
- (ranges::input_range> && utf8_code_unit>) ||
- utf8_pointer>;
- template
- concept utf16_input_range_like =
- (ranges::input_range> && utf16_code_unit>) ||
- utf16_pointer>;
- template
- concept utf32_input_range_like =
- (ranges::input_range> && utf32_code_unit>) ||
- utf32_pointer>;
-
- template
- concept utf_input_range_like =
- utf8_input_range_like || utf16_input_range_like || utf32_input_range_like;
-
- template
- concept transcoding_error_handler =
- requires (T t, string_view msg) { { t(msg) } -> same_as; };
-
-}
-```
-
-There are two options for how the `code_unit` concept is defined.
-
-### Code unit option 1
-
-This is represented by `CODE_UNIT_CONCEPT_OPTION_2 == 0` in the code above.
-In this option, a code unit must be one of `char8_t`, `char16_t`, and
-`char32_t`.
-
-### Code unit option 2
-
-This is represented by `CODE_UNIT_CONCEPT_OPTION_2 == 1` in the code above.
-In this option, a code unit must be a character type. This includes the
-`charN_t` character types from Option 1, plus `char` and `wchar_t`. The value
-of `@*wchar-t-format*@` is implementation defined, but must be
-`uc::format::utf16` or `uc::format::utf32`.
-
-### The impact of options 1 and 2
-
-Here are some examples of the differences between Options 1 and 2. The
-`as_utfN` and `as_charN` adaptors are discussed later in this paper.
-
-The `as_utfN` adaptors produce `utfN_view`s, which do transcoding.
-
-The `as_utfN` adaptors produce `charN_view`s that are each very similar to a
-`transform_view` that casts each element of the adapted range to a `charN_t`
-value. A `charN_view` differs from the equivalent transform in that it may be
-a borrowed range, and that the `utfN_view` views know about the `charN_view`s,
-and can optimize away the work that would be done by the `charN_view`. This
-turns `charN_view` into a no-op when nested within a `utfN_view`.
-
-Note the use of `charN_t` below with `std::wstring`. That's there because
-whether you write `as_char16_t` or `as_char32_t` is implementation-dependent.
-
-::: tonytable
-
-### Option 1
-```c++
-using namespace std::uc;
-
-auto v1 = u8"text" | as_utf32; // Ok.
-auto v2 = u"text" | as_utf8; // Ok.
-auto v3 = U"text" | as_utf16; // Ok.
-
-auto v4 = std::u8string(u8"text") | as_utf32; // Ok.
-auto v5 = std::u16string(u"text") | as_utf8; // Ok.
-auto v6 = std::u32string(U"text") | as_utf16; // Ok.
-
-auto v7 = std::string | as_utf32; // Error; ill-formed.
-auto v8 = std::wstring | as_utf8; // Error; ill-formed.
-
-auto v9 = std::string | as_char8_t | as_utf32; // Ok.
-auto v10 = std::wstring | as_charN_t | as_utf8; // Ok.
-```
-
-### Option 2
-```c++
-using namespace std::uc;
-
-auto v1 = u8"text" | as_utf32; // Ok.
-auto v2 = u"text" | as_utf8; // Ok.
-auto v3 = U"text" | as_utf16; // Ok.
-
-auto v4 = std::u8string(u8"text") | as_utf32; // Ok.
-auto v5 = std::u16string(u"text") | as_utf8; // Ok.
-auto v6 = std::u32string(U"text") | as_utf16; // Ok.
-
-auto v7 = std::string | as_utf32; // Ok.
-auto v8 = std::wstring | as_utf8; // Ok.
-
-auto v9 = std::string | as_char8_t | as_utf32; // Ok.
-auto v10 = std::wstring | as_charN_t | as_utf8; // Ok.
-```
-
-:::
-
-In short, Option 1 forces you to write "`| as_char8_t`" everywhere you want to
-use a `std::string` with the interfaces proposed in this paper.
-
-Option 1 is supported by most of SG-16. Here is the relevant SG-16 poll:
-
-*UTF transcoding interfaces provided by the C++ standard library should operate
-on charN_t types, with support for other types provided by adapters, possibly
-with a special case for char and wchar_t when their associated literal
-encodings are UTF.*
-
-+----+---+---+---+----+
-| SF | F | N | A | SA |
-+====+===+===+===+====+
-| 6 |1 |0 |0 | 1 |
-+----+---+---+---+----+
-
-(I have chosen to ignore the "possibly with a special case for char and
-wchar_t when their associated literal encodings are UTF" part. Making the
-evaluation of a concept change based on the literal encoding seems like a
-flaky move to me; the literal encoding can change TU to TU.)
-
-The feeling in SG-16 is that the `charN_t` types are designed to represent UTF
-encodings, and `char` is not. A `char const *` string could be in any one of
-dozens (hundreds?) of encodings. The addition of "`| as_char8_t`" to adapt
-ranges of `char` is meant to act as a lexical indicator of user intent.
-
-I believe this decision is a mistake. I would very, very much *not* like to
-standardize Unicode interfaces that do not easily interoperate with
-`std::string`. This is my reasoning:
-
-First, `char` and `char8_t` maintain exactly the same set of invariants -- the
-empty set. Note that this is true even for string literals. The encoding of
-`u8"text"` is not necessarily UTF-8! It depends on the flags you pass to your
-compiler. Those flags are allowed to vary TU by TU. I have been bitten by
-the "`u8` does not necessarily mean UTF-8" oddity of MSVC before.
-
-Second, "`| as_char8_t`" is a no-op when used with `utfN_view`/`utf_view`. It
-does not actually do anything to help you get your program's text into UTF-8
-encoding, nor to detect that you have non-UTF-8 encoded text in your program.
-
-Third, people use `std::string` a lot. They use `char` string literals a lot.
-They use `std::u8string` and `char8_t` string literals almost not at all.
-Using Github Code Search, I found 15.3M references to `std::string` and 6.7k
-references to `std::u8string`. Even were everyone to switch from
-`std::string` to `std::u8string` today, we should still have to deal with lots
-and lots of `char const *` strings for C API compatibility.
-
-Finally, whether a given range of code units is properly UTF encoded may be a
-precondition of a given API that the user writes, but it is not a precondition
-of *any* API proposed in this paper, nor is it a precondition of any API I'm
-proposing in the papers that will follow this one.
-
-In short, I think `"text" | std::uc::as_utf32` should "just work". Making
-users write `"text" | std::uc::as_char8_t | std::uc::as_utf32`, when that does
-not increase correctness or efficiency -- and produces no different object
-code -- seems wrongheaded to me. Users that want the extra explicitness can
-still write the longer version under both options. Users that do not want
-this explicitness should not be forced to write it.
-
-## Add a null-terminated sequence sentinel
-
-```cpp
-namespace std {
- struct null_sentinel_t {
- template
- requires default_initializable> &&
- equality_comparable, iter_value_t>
- friend constexpr auto operator==(I it, null_sentinel_t) { return *it == iter_value_t{}; }
- };
-
- inline constexpr null_sentinel_t null_sentinel;
-}
-```
-
-This sentinel type matches any iterator position `it` at which `*it` is equal
-to a default-constructed object of type `iter_value_t`. This works for
-null-terminated strings, but can also serve as the sentinel for any forward
-range terminated by a default-constructed value.
-
-Because this type is potentially useful for lots of ranges unrelated to
-Unicode or text, it is in the `std` namespace, not `std::uc`.
-
-If you're wondering why `@*ITER_CONCEPT*@` is used instead of directly
-requiring `forward_iterator`, it's because the latter causes recursion in a
-check of `equality_comparable` within `forward_iterator`.
-
-
-## Add the transcoding iterator template
-
-I'm using [P2727](https://isocpp.org/files/papers/P2727R0.html)'s
-`iterator_interface` here for simplicity.
-
-First, the synopsis:
-
-```c++
-namespace std::uc {
- inline constexpr char32_t replacement_character = 0xfffd;
-
- struct use_replacement_character {
- constexpr char32_t operator()(string_view error_msg) const noexcept;
- };
-
- template
- constexpr auto @*format-to-type*@() { // @*exposition only*@
- if constexpr (Format == format::utf8) {
- return char8_t{};
- } else if constexpr (Format == format::utf16) {
- return char16_t{};
- } else {
- return char32_t{};
- }
- }
-
- template
- using @*format-to-type-t*@ = decltype(@*format-to-type*@()); // @*exposition only*@
-
- template<
- format FromFormat,
- format ToFormat,
- input_iterator I,
- sentinel_for S = I,
- transcoding_error_handler ErrorHandler = use_replacement_character>
- requires convertible_to, @*format-to-type-t*@>
- class utf_iterator;
-}
-```
-
-Then the definitions:
-
-```c++
-namespace std::uc {
- template
- constexpr auto @*bidirectional-at-most*@() { // @*exposition only*@
- if constexpr (bidirectional_iterator) {
- return bidirectional_iterator_tag{};
- } else if constexpr (forward_iterator) {
- return forward_iterator_tag{};
- } else if constexpr (input_iterator) {
- return input_iterator_tag{};
- }
- }
-
- template
- using @*bidirectional-at-most-t*@ = decltype(@*bidirectional-at-most*@()); // @*exposition only*@
-
- template>
- struct @*first-and-curr*@ { // @*exposition only*@
- @*first-and-curr*@() = default;
- @*first-and-curr*@(I curr) : curr{curr} {}
- template
- requires convertible_to
- @*first-and-curr*@(const @*first-and-curr*@& other) : curr{other.curr} {}
-
- I curr;
- };
- template
- struct @*first-and-curr*@ { // @*exposition only*@
- @*first-and-curr*@() = default;
- @*first-and-curr*@(I first, I curr) : first{first}, curr{curr} {}
- template
- requires convertible_to
- @*first-and-curr*@(const @*first-and-curr*@& other) : first{other.first}, curr{other.curr} {}
-
- I first;
- I curr;
- };
-
- struct use_replacement_character {
- constexpr char32_t operator()(string_view) const noexcept { return replacement_character; }
- };
-
- template<
- format FromFormat,
- format ToFormat,
- input_iterator I,
- sentinel_for S,
- transcoding_error_handler ErrorHandler>
- requires convertible_to, @*format-to-type-t*@>
- class utf_iterator : public iterator_interface<
- @*bidirectional-at-most*@,
- @*format-to-type-t*@,
- @*format-to-type-t*@> {
- public:
- using value_type = @*format-to-type-t*@;
-
- constexpr utf_iterator() = default;
-
- constexpr utf_iterator(I first, I it, S last) requires bidirectional_iterator
- : first_and_curr_{first, it}, last_(last) {
- if (curr() != last_)
- read();
- }
- constexpr utf_iterator(I it, S last) requires (!bidirectional_iterator)
- : first_and_curr_{it}, last_(last) {
- if (curr() != last_)
- read();
- }
-
- template
- requires convertible_to && convertible_to
- constexpr utf_iterator(const utf_iterator& other) :
- buf_(other.buf_),
- first_and_curr_(other.first_and_curr_),
- buf_index_(other.buf_index_),
- buf_last_(other.buf_last_),
- last_(other.last_)
- {}
-
- constexpr I begin() const requires bidirectional_iterator { return first(); }
- constexpr S end() const { return last_; }
-
- constexpr I base() const requires forward_iterator { return curr(); }
-
- constexpr value_type operator*() const { return buf_[buf_index_]; }
-
- constexpr utf_iterator& operator++() {
- if (buf_index_ + 1 == buf_last_ && curr() != last_) {
- if constexpr (forward_iterator) {
- advance(curr(), to_increment_);
- }
- if (curr() == last_)
- buf_index_ = 0;
- else
- read();
- } else if (buf_index_ + 1 <= buf_last_) {
- ++buf_index_;
- }
- return *this;
- }
-
- constexpr utf_iterator& operator--() requires bidirectional_iterator {
- if (!buf_index_ && curr() != first())
- read_reverse();
- else if (buf_index_)
- --buf_index_;
- return *this;
- }
-
- friend constexpr bool operator==(utf_iterator lhs, utf_iterator rhs)
- requires forward_iterator || requires (I i) { i != i; } {
- if constexpr (forward_iterator) {
- return lhs.curr() == rhs.curr() && lhs.buf_index_ == rhs.buf_index_;
- } else {
- if (lhs.curr() != rhs.curr())
- return false;
-
- if (lhs.buf_index_ == rhs.buf_index_ &&
- lhs.buf_last_ == rhs.buf_last_) {
- return true;
- }
-
- return lhs.buf_index_ == lhs.buf_last_ &&
- rhs.buf_index_ == rhs.buf_last_;
- }
- }
-
- friend constexpr bool operator==(utf_iterator lhs, S rhs)
- if constexpr (forward_iterator) {
- return lhs.curr() == rhs;
- } else {
- return lhs.curr() == rhs && lhs.buf_index_ == lhs.buf_last_;
- }
- }
-
- using base_type = // @*exposition only*@
- iterator_interface<@*bidirectional-at-most-t*@, value_type, value_type>;
- using base_type::operator++;
- using base_type::operator--;
-
- private:
- constexpr void read(); // @*exposition only*@
- constexpr void read_reverse(); // @*exposition only*@
-
- constexpr I first() const requires bidirectional_iterator // @*exposition only*@
- { return first_and_curr_.first; }
- constexpr I& curr() { return first_and_curr_.curr; } // @*exposition only*@
- constexpr I curr() const { return first_and_curr_.curr; } // @*exposition only*@
-
- array(ToFormat)> buf_; // @*exposition only*@
-
- @*first-and-curr*@ first_and_curr_; // @*exposition only*@
-
- uint8_t buf_index_ = 0; // @*exposition only*@
- uint8_t buf_last_ = 0; // @*exposition only*@
- uint8_t to_increment_ = 0; // @*exposition only*@
-
- [[no_unique_address]] S last_; // @*exposition only*@
-
- template<
- format FromFormat2,
- format ToFormat2,
- code_unit_iter I2,
- sentinel_for S2,
- transcoding_error_handler ErrorHandler2>
- friend class utf_iterator;
- };
-}
-```
-
-`use_replacement_character` is an error handler type that can be used with
-`utf_iterator`. It accepts a `string_view` error message, and returns the
-replacement character. The user can substitute their own type here, which may
-throw, abort, log, etc.
-
-`utf_iterator` is an iterator that transcodes from UTF-N to UTF-M, where N and
-M are each one of 8, 16, or 32. N may equal M. UTF-N to UTF-N operation
-invokes the error handler as appropriate, but does not change format.
-`utf_iterator` does its work by adapting an underlying range of code units.
-Each code point `c` to be transcoded is decoded from `FromFormat` in the
-underlying range. `c` is then encoded to `ToFormat` into an internal buffer.
-If ill-formed UTF is encountered during the decoding step, `c` is whatever
-invoking the error handler returns; using the default error handler, this is
-`replacement_character`.
-
-`utf_iterator` maintains certain invariants; the invariants differ based on
-whether `utf_iterator` is an input iterator.
-
-For input iterators the invariant is: if `*this` is at the end of the range
-being adapted, then `curr()` == `last_`; otherwise, the position of `curr()`
-is always at the end of the current code point `c` within the range being
-adapted, and `buf_` contains the code units in `ToFormat` that comprise `c`.
-
-For forward and bidirectional iterators, the invariant is: if `*this` is at
-the end of the range being adapted, then `curr()` == `last_`; otherwise, the
-position of `curr()` is always at the beginning of the current code point `c`
-within the range being adapted, and `buf_` contains the code units in
-`ToFormat` that comprise `c`.
-
-When ill-formed UTF is encountered in the range being adapted, `utf_iterator`
-calls `ErrorHandler{}.operator()` to produce a character to represent the
-ill-formed sequence. The number and position of error handler invocations
-within the transcoded output is the same, whether the range being adapted is
-traversed forward or backward. The number and position of the error handler
-invocations should use the "substitution of maximal subparts" approach
-described in Chapter 3 of the Unicode standard.
-
-Besides the constructors, no member function of `utf_iterator` has
-preconditions. As long as a `utf_iterator` `i` is constructed with proper
-arguments, all subsequent operations on `i` are memory safe. This includes
-decrementing a `utf_iterator` at the beginning of the range being adapted, and
-incrementing or dereferencing a `utf_iterator` at the end of the range being
-adapted.
-
-If `FromFormat` and `ToFormat` are not each one of `format::utf8`,
-`format::utf16`, or `format::utf32`, the program is ill-formed.
-
-If `input_iterator` is `true`, `noexcept(ErrorHandler{}("")))` must be
-`true` as well; otherwise, the program is ill-formed.
-
-The exposition-only member function `read` decodes the code point `c` as
-`FromFormat` starting from position `curr()` in the range being adapted (`c`
-may be `replacement_character`); sets `to_increment_` to the number of code
-units read while decoding `c`; encodes `c` as `ToFormat` into `buf_`; sets
-`buf_index_` to `0`; and sets `buf_last_` to the number of code units encoded
-into `buf_`. If `forward_iterator` is `true`, `curr()` is set to the
-position it had before `read` was called. If an exception is thrown during a
-call to `read`, the call to `read` has no effect.
-
-The exposition-only member function `read_reverse` decodes the code point `c`
-as `FromFormat` ending at position `curr()` in the range being adapted (`c`
-may be `replacement_character`); sets `to_increment_` to the number of code
-units read while decoding `c`; encodes `c` as `ToFormat` into `buf_`; sets
-`buf_last_` to the number of code units encoded into `buf_`; and sets
-`buf_index_` to `buf_last_ - 1`. If an exception is thrown during a call to
-`read_reverse`, the call to `read_reverse` has no effect.
-
-### Why `utf_iterator` is constrained the way it is
-
-The template parameter `I` to `utf_iterator` is not constrained with
-`code_unit_iter` as it was in earlier revisions of this paper.
-Instead, `I` must be an `input_iterator` whose value type is convertible to
-`@*format-to-type-t*@`. This allows two uses of `utf_iterator`
-that the previous constraint would not.
-
-First, `utf_iterator` can be used to adapt an iterator whose value type is
-some non-character type. This is useful in general, since lots of existing
-Unicode-aware user code uses `uint32_t` for UTF-32, or `short` for UTF-16 or
-whatever. It is useful in particular because ICU uses `int` for its
-UTF-32/code point type.
-
-Second, because of the first point, adaptations of ranges of non-character
-types can be made more efficient. Consider:
-
-```c++
-std::vector code_points_from_icu = /* ... */;
-auto v = code_points_from_icu | std::uc::as_char32_t | std::uc::as_utf8;
-auto first = v.begin();
-```
-
-The type of `first` is:
-
-```c++
-std::uc::utf_iterator::iterator>
-```
-
-That is, the adapting iterator that `as_char32_t` uses is gone. This makes
-using `as_char32_t` more efficient, when used in conjunction with `as_utfN`.
-If `utf_iterator`'s `I` were required to be a `utf_iter`, this optimization
-would not work.
-
-### Why `utf_iterator` is not a nested type within `utf_view`
-
-Most users will use views most of the time. However, it can be useful to use
-iterators some of the time. For example, say I wanted to track some
-user-visible cursor within some bit of text. If I wanted to represent that
-cursor independently from the view within which it is found, it can be awkward
-to do so without an independent iterator template.
-
-```c++
-// This is the easy case. We have the View right there, and can use
-// ranges::iterator_t to get its iterator type.
-
-template
-struct my_state_type
-{
- View all_text_;
- std::ranges::iterator_t> current_position_;
- // other state ...
-};
-
-// This one, not so much. Since we don't have the View type, we have to make
-// the type of current_position_ a template parameter, even if there's only one
-// type ever in use for a given view.
-
-template
-struct my_other_state_type
-{
- Iterator current_position_;
- // other state ...
-};
-```
-
-Using `utf_iterator` allows us to write more specific code. Sometimes,
-generic code is more desirable; sometimes nongeneric code is more desirable.
-
-```c++
-struct my_other_state_type
-{
- std::uc::utf_iterator current_position_;
- // other state ...
-};
-```
-
-Further, `utf_iterator` has configurability options that do not work for
-`utfN_view`, like the `ErrorHandler` template parameter. This will not be
-used often, but some users will want it sometimes. I don't think such
-alternate uses are going to be common enough to justify complicating
-`utfN_view`; those uses belong in a lower-level interface like `utf_iterator`.
-
-### Optional: Add aliases for common `utf_iterator` specializations
-
-```c++
-namespace std::uc {
- template<
- utf8_iter I,
- std::sentinel_for S = I,
- transcoding_error_handler ErrorHandler = use_replacement_character>
- using utf_8_to_16_iterator =
- utf_iterator;
- template<
- utf16_iter I,
- std::sentinel_for S = I,
- transcoding_error_handler ErrorHandler = use_replacement_character>
- using utf_16_to_8_iterator =
- utf_iterator;
-
- template<
- utf8_iter I,
- std::sentinel_for S = I,
- transcoding_error_handler ErrorHandler = use_replacement_character>
- using utf_8_to_32_iterator =
- utf_iterator;
- template<
- utf32_iter I,
- std::sentinel_for S = I,
- transcoding_error_handler ErrorHandler = use_replacement_character>
- using utf_32_to_8_iterator =
- utf_iterator;
-
- template<
- utf16_iter I,
- std::sentinel_for S = I,
- transcoding_error_handler ErrorHandler = use_replacement_character>
- using utf_16_to_32_iterator =
- utf_iterator;
- template<
- utf32_iter I,
- std::sentinel_for S = I,
- transcoding_error_handler ErrorHandler = use_replacement_character>
- using utf_32_to_16_iterator =
- utf_iterator;
-}
-```
-
-These aliases make it easier to spell `utf_iterator`s. Consider
-`utf_8_to_32_iterator` versus `utf_iterator`. More importantly, they allow CTAD to work, as
-in `utf_8_to_32_iterator(first, it, last)`. These aliases are completely
-optional, of course. Let us poll.
-
-### Add `unpack_iterator_and_sentinel` CPO for iterator "unpacking"
-
-```cpp
-struct no_op_repacker {
- template T operator()(T x) const { return x; }
-};
-
-template S, class Repack>
-struct unpack_result {
- static constexpr format format_tag = FormatTag;
-
- I first;
- [[no_unique_address]] S last;
- [[no_unique_address]] Repack repack;
-};
-
-// CPO equivalent to:
-template S, class Repack = no_op_repacker>
- constexpr auto unpack_iterator_and_sentinel(I first, S last, Repack repack = Repack());
-```
-
-Any `utf_iterator` `ti` contains two iterators and a sentinel. If one were to
-adapt `ti` in another transcoding iterator `ti2`, one quickly encounters a
-problem -- since for example `utf_iterator>` would be the size of
-9 pointers! Further, such an iterator would do a UTF-8 to UTF-16 to UTF-32
-conversion, when it could have done a direct UTF-8 to UTF-32 conversion
-instead.
-
-One would obviously never write a type like the monstrosity above. However,
-it is quite possible to accidentally construct one in generic code. Consider:
-
-```c++
-using namespace std::uc;
-
-template
-void f(Iter it, null_sentinel_t) {
-#if _MSC_VER
- // On Windows, do something with 'it' that requires UTF-16.
- utf_iterator it16;
- windows_function(it16, null_sentinel);
-#endif
-
- // ... etc.
-}
-
-int main(int argc, char const * argv[]) {
- utf_iterator it(argv[1], null_sentinel);
-
- f(it, null_sentinel);
-
- // ... etc.
-}
-```
-
-This example is a bit contrived, since users will not create iterators
-directly like this very often. Users are much more likely to use the
-`utfN_view` views and `as_utfN` view adaptors being proposed below. The view
-adaptors are defined in such a way that they avoid this problem altogether.
-They do this by unpacking the view they are adapting before adapting it. For
-instance:
-
-```cpp
-std::u8string str = u8"some text";
-
-auto utf16_str = str | std::uc::as_utf16;
-
-static_assert(std::same_as<
- decltype(utf16_str.begin()),
- std::uc::utf_iterator
->);
-
-auto utf32_str = utf16_str | std::uc::as_utf32;
-
-// Poof! The utf_iterator
->);
-```
-
-The unpacking logic is used in the view adaptors, as shown above. This allows
-you to write `r | std::uc::as_utf32` in a generic context, without caring
-whether `r` is a range of UTF-8, UTF-16, or UTF-32. You also do not need to
-care about whether `r` is a common range or not. You also can ignore whether
-`r` is comprised of raw pointers, some other kind of iterator, or transcoding
-iterators.
-
-This becomes especially useful in the APIs proposed in later papers that
-depend on this paper. In particular, APIs in subsequent papers accept any
-UTF-N iterator, and then transcode internally to UTF-32. However, this
-creates a minor problem for some algorithms. Consider this algorithm (not
-proposed) as an example.
-
-```c++
-template S, output_iterator O>
- requires (utf8_code_unit> || utf16_code_unit>)
-transcode_result transcode_to_utf32(I first, S last, O out);
-```
-
-Such a transcoding algorithm is pretty similar to `std::ranges::copy`, in that
-you should return both the output iterator *and* the final position of the
-input iterator (`transcode_result` is an alias for `in_out_result`). For such
-interfaces, it can be difficult in the general case to form an iterator of
-type `I` to return to the user:
-
-```c++
-template S, output_iterator O>
- requires (utf8_code_unit> || utf16_code_unit>)
-transcode_result transcode_to_utf32(I first, S last, O out) {
- // Get the input as UTF-32. This may involve unpacking, so possibly decltype(r.begin()) != I.
- auto r = ranges::subrange(first, last) | uc::as_utf32;
-
- // Do transcoding.
- auto copy_result = ranges::copy(r, out);
-
- // Return an in_out_result.
- return result{/* ??? */, copy_result.out};
-}
-```
-
-What should we write for `/* ??? */`? That is, how do we get back from the
-UTF-32 iterator `r.begin()` to an `I` iterator? It's harder than it first
-seems; consider the case where `I` is
-`std::uc::utf_16_to_32_iterator>`.
-The solution is for the unpacking algorithm to remember the structure of
-whatever iterator it unpacks, and then rebuild the structure when returning
-the result. To demonstrate, here is the implementation of
-`transcode_to_utf32` from Boost.Text:
-
-```c++
-template S, std::output_iterator O>
- requires (utf8_code_unit> || utf16_code_unit>)
-transcode_result transcode_to_utf32(I first, S last, O out)
-{
- auto const r = boost::text::unpack_iterator_and_sentinel(first, last);
- auto unpacked = detail::transcode_to_32(
- detail::tag_t, r.first, r.last, -1, out);
- return {r.repack(unpacked.in), unpacked.out};
-}
-```
-
-Note the call to `r.repack`. This is an invocable created by the unpacking
-process itself.
-
-If this all sounds way too complicated, it's not bad at all. Here's the
-unpacking/repacking implementation from Boost.Text:
-[unpack.hpp](https://github.com/tzlaine/text/blob/develop/include/boost/text/unpack.hpp).
-
-`unpack_iterator_and_sentinel` is a CPO. It is intended to work with UDTs
-that provide their own unpacking implementation. It returns an
-`unpack_result`.
-
-In telecon review, some concerns were voiced about the name
-`uc::unpack_iterator_and_sentinel`. Some people felt that the name should
-include some mention of "UTF" or "transcoding" or "Unicode". I think that
-it's fine as-is, since it's in namesapce `std::uc`, but a poll on renaming
-might be in order. I suggest `uc::unpack_utf_iterator_and_sentinel` as a
-possible alternative.
-
-### Why input iterators are not unpackable
-
-Input iterators are messed up. They barely resemble the other iterators. For
-one thing, they are single-pass. This means that when a `utf_iterator`
-adapting an input iterator reads the next code point from the range it is
-adapting, it must leave the iterator at a location that is just after the
-current code point. It has no choice, since it cannot backtrack.
-
-It is possible to unpack an input iterator in an entirely different way than
-other iterators. The unpack operation for input iterators could be to produce
-the underlying code unit iterator (the adapted input iterator itself), *plus*
-the current code point that the input iterator was just used to read.
-
-However, this is not very much help. Consider a case in which we need to
-unpack a UTF-8 to UTF-32 transcoding iterator so we can form a UTF-8 to UTF-16
-iterator instead. The unpack operation will produce an unpacked input
-transcoding iterator -- the moral equivalent of `std::pair`.
-
-What can you do with this? Well, you can try to construct a
-`utf_iterator` from it. That would mean
-adding a constructor that takes an input iterator and a `char32_t`. This
-would also mean that any user transcoding iterator types that are usable with
-the `unpack_iterator_and_sentinel` CPO would also need to unpack their input
-iterator into an iterator/code point pair, and that those user types would
-also need to add this odd constructor.
-
-This is all weird. It's also a pretty small use case. People don't use input
-iterators that often. Since this can always be added later, it is not being
-proposed right now.
-
-## Add `project_view`
-
-This template is a `std::ranges` view and adaptor that makes the
-implementation of the code unit views and adaptors nearly trivial. It is
-being added based on input from SG-9. No one in the SG-9 telecon could think
-of a name everyone liked; suggestions are welcome.
-
-```c++
-namespace std::ranges {
- template
- requires view &&
- regular_invocable> &&
- @*can-reference*@>>
- class project_view : public view_interface>
- {
- V @*base_*@ = V(); // @*exposition only*@
-
- template
- class @*iterator*@; // @*exposition only*@
- template
- class @*sentinel*@; // @*exposition only*@
-
- public:
- constexpr project_view() requires default_initializable = default;
- constexpr explicit project_view(V base) : @*base_*@(std::move(base)) {}
-
- constexpr V& base() & { return @*base_*@; }
- constexpr const V& base() const& { return @*base_*@; }
- constexpr V base() && { return std::move(@*base_*@); }
-
- constexpr @*iterator*@ begin() { return @*iterator*@{ranges::begin(@*base_*@)}; }
- constexpr @*iterator*@ begin() const requires range
- { return @*iterator*@{ranges::begin(@*base_*@)}; }
-
- constexpr @*sentinel*@ end() { return @*sentinel*@{ranges::end(@*base_*@)}; }
- constexpr @*iterator*@ end() requires common_range { return @*iterator*@{ranges::end(@*base_*@)}; }
- constexpr @*sentinel*@ end() const requires range { return @*sentinel*@{ranges::end(@*base_*@)}; }
- constexpr @*iterator*@ end() const requires common_range
- { return @*iterator*@{ranges::end(@*base_*@)}; }
-
- constexpr auto size() requires sized_range { return ranges::size(@*base_*@); }
- constexpr auto size() const requires sized_range { return ranges::size(@*base_*@); }
- };
-
- template
- requires view &&
- regular_invocable> &&
- @*can-reference*@>>
- template
- class project_view::@*iterator*@
- : public std::proxy_iterator_interface<
- iterator_to_tag_t>>,
- invoke_result_t>>
- {
- public:
- using reference_type = invoke_result_t>;
-
- private:
- using @*iterator_type*@ = iterator_t<@*maybe-const*@>; // @*exposition only*@
-
- friend std::iterator_interface_access;
- @*iterator_type*@ & base_reference() noexcept { return @*it_*@; } // @*exposition only*@
- @*iterator_type*@ base_reference() const { return @*it_*@; } // @*exposition only*@
-
- @*iterator_type*@ @*it_*@ = @*iterator_type*@(); // @*exposition only*@
-
- friend project_view::@*sentinel*@;
-
- public:
- constexpr @*iterator*@() = default;
- constexpr @*iterator*@(@*iterator_type*@ it) : @*it_*@(std::move(it)) {}
-
- constexpr reference_type operator*() const { return F(*@*it_*@); }
- };
-
- template
- requires view &&
- regular_invocable> &&
- @*can-reference*@>>
- template
- class project_view::@*sentinel*@
- {
- using @*Base*@ = @*maybe-const*@; // @*exposition only*@
- using @*sentinel_type*@ = sentinel_t<@*Base*@>; // @*exposition only*@
-
- @*sentinel_type*@ @*end_*@ = @*sentinel_type*@(); // @*exposition only*@
-
- public:
- constexpr @*sentinel*@() = default;
- constexpr explicit @*sentinel*@(@*sentinel_type*@ end) : @*end_*@(std::move(end)) {}
- constexpr @*sentinel*@(@*sentinel*@ i) requires Const
- && convertible_to, sentinel_t<@*Base*@>>;
-
- constexpr @*sentinel_type*@ base() const { return @*end_*@; }
-
- template
- requires sentinel_for<@*sentinel_type*@, iterator_t<@*maybe-const*@>>
- friend constexpr bool operator==(const @*iterator*@ & x, const @*sentinel*@ & y)
- { return x.@*it_*@ == y.@*end_*@; }
-
- template
- requires sized_sentinel_for<@*sentinel_type*@, iterator_t<@*maybe-const*@>>
- friend constexpr range_difference_t<@*maybe-const*@>
- operator-(const @*iterator*@ & x, const @*sentinel*@ & y)
- { return x.@*it_*@ - y.@*end_*@; }
-
- template
- requires sized_sentinel_for<@*sentinel_type*@, iterator_t<@*maybe-const*@