diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000000..57b9c31f08 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1 @@ +Checks: 'performance-*, modernize-*, misc-*, -misc-definitions-in-headers, -misc-non-private-member-variables-in-classes, -misc-no-recursion, -misc-include-cleaner, -misc-use-anonymous-namespace, -modernize-use-trailing-return-type, -modernize-use-nodiscard, -modernize-use-emplace, -modernize-loop-convert, -modernize-pass-by-value, -modernize-use-equals-delete, -modernize-use-equals-default, -modernize-use-override, -modernize-avoid-c-arrays, -modernize-raw-string-literal, -modernize-concat-nested-namespaces, -performance-inefficient-string-concatenation' diff --git a/.github/workflows/build-cppfront.yaml b/.github/workflows/build-cppfront.yaml index 0bb92a20d8..6d62fcb713 100644 --- a/.github/workflows/build-cppfront.yaml +++ b/.github/workflows/build-cppfront.yaml @@ -1,26 +1,28 @@ name: Multi-platform Build of cppfront on: - push: - branches: - - main pull_request: - branches: - - main + paths-ignore: + - 'docs/**' + push: + paths-ignore: + - 'docs/**' + workflow_dispatch: + jobs: build-windows: runs-on: windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: ilammy/msvc-dev-cmd@v1 - name: Compiler name & version run: cl.exe - name: Build - run: cl.exe source/cppfront.cpp -std:c++latest -MD -EHsc -experimental:module -W4 -WX + run: cl.exe source/cppfront.cpp -std:c++latest -MD -EHsc -W4 -WX build-unix-like: strategy: fail-fast: false matrix: - runs-on: [ubuntu-latest] + runs-on: [ubuntu-22.04] compiler: [g++-10, g++-11, g++-12, clang++-12, clang++-14] cxx-std: ['c++20', 'c++2b'] exclude: @@ -33,18 +35,33 @@ jobs: - compiler: clang++-14 cxx-std: 'c++2b' include: - - runs-on: macos-11 - compiler: clang++ - cxx-std: 'c++20' - runs-on: macos-latest compiler: clang++ cxx-std: 'c++20' + - runs-on: ubuntu-22.04 + compiler: clang++-15 + cxx-std: 'c++20' + - runs-on: ubuntu-24.04 + compiler: clang++-16 + cxx-std: 'c++20' + - runs-on: ubuntu-24.04 + compiler: clang++-17 + cxx-std: 'c++20' + - runs-on: ubuntu-24.04 + compiler: clang++-18 + cxx-std: 'c++20' + - runs-on: ubuntu-24.04 + compiler: clang++-18 + cxx-std: 'c++23' + - runs-on: ubuntu-24.04 + compiler: g++-14 + cxx-std: 'c++2b' runs-on: ${{ matrix.runs-on }} env: CXX: ${{ matrix.compiler }} CXXFLAGS: -std=${{ matrix.cxx-std }} -Wall -Wextra -Wold-style-cast -Wunused-parameter -Wpedantic -Werror -pthread -Wno-unknown-warning -Wno-unknown-warning-option steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install compiler if: startsWith(matrix.runs-on, 'ubuntu') run: sudo apt-get install -y $CXX diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000000..7dd2f48a94 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,21 @@ +name: docs +on: + push: + branches: + - main +permissions: + contents: write +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - uses: actions/cache@v4 + with: + key: ${{ github.ref }} + path: .cache + - run: pip install mkdocs-material + - run: mkdocs gh-deploy --force \ No newline at end of file diff --git a/.github/workflows/regression-tests.yml b/.github/workflows/regression-tests.yml index 3af5455949..23bafb29ac 100644 --- a/.github/workflows/regression-tests.yml +++ b/.github/workflows/regression-tests.yml @@ -2,13 +2,16 @@ name: Regression tests on: pull_request: - types: [opened, synchronize, reopened] + paths-ignore: + - 'docs/**' push: + paths-ignore: + - 'docs/**' workflow_dispatch: jobs: regression-tests: - name: Run on ${{ matrix.os }} using ${{ matrix.compiler }} + name: ${{ matrix.shortosname }} | ${{ matrix.compiler }} | ${{ matrix.cxx_std }} | ${{ matrix.stdlib }} | ${{ matrix.os }} runs-on: ${{ matrix.os }} env: CXX: ${{ matrix.compiler }} @@ -16,38 +19,118 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest] - compiler: [g++-10, g++-13] + os: [ubuntu-24.04] + shortosname: [ubu-24] + compiler: [g++-14, g++-13] + cxx_std: [c++2b] + stdlib: [libstdc++] include: - - os: ubuntu-20.04 - compiler: clang++-12 + - os: ubuntu-24.04 + shortosname: ubu-24 + compiler: clang++-19 + cxx_std: c++20 + stdlib: libstdc++ + - os: ubuntu-24.04 + shortosname: ubu-24 + compiler: clang++-19 + cxx_std: c++23 + stdlib: libc++-18-dev + - os: ubuntu-22.04 + shortosname: ubu-22 + compiler: clang++-15 + cxx_std: c++20 + stdlib: libstdc++ + - os: ubuntu-22.04 + shortosname: ubu-22 + compiler: clang++-15 + cxx_std: c++20 + stdlib: libc++-15-dev + - os: macos-14 + shortosname: mac-14 + compiler: clang++ + cxx_std: c++2b + stdlib: default - os: macos-13 + shortosname: mac-13 compiler: clang++ - - os: windows-latest + cxx_std: c++2b + stdlib: default + - os: macos-13 + shortosname: mac-13 + compiler: clang++-15 + cxx_std: c++2b + stdlib: default + - os: windows-2022 + shortosname: win-22 + compiler: cl.exe + cxx_std: c++latest + stdlib: default + - os: windows-2022 + shortosname: win-22 compiler: cl.exe + cxx_std: c++20 + stdlib: default steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 + + - name: Prepare compilers - macOS + if: matrix.os == 'macos-13' + run: | + sudo xcode-select --switch /Applications/Xcode_14.3.1.app + sudo ln -s "$(brew --prefix llvm@15)/bin/clang" /usr/local/bin/clang++-15 + + - name: Prepare compilers - Ubuntu 24.04 + if: matrix.os == 'ubuntu-24.04' + run: | + sudo sudo apt-get install clang-19 - name: Run regression tests - Linux and macOS version - if: startsWith(matrix.os, 'ubuntu') || matrix.os == 'macos-13' + if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos') run: | cd regression-tests - bash run-tests.sh -c ${{ matrix.compiler }} + bash run-tests.sh -c ${{ matrix.compiler }} -s ${{ matrix.cxx_std }} -d ${{ matrix.stdlib }} -l ${{ matrix.os }} - name: Run regression tests - Windows version - if: matrix.os == 'windows-latest' + if: startsWith(matrix.os, 'windows') run: | "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" && ^ git config --local core.autocrlf false && ^ cd regression-tests && ^ - bash run-tests.sh -c ${{ matrix.compiler }} + bash run-tests.sh -c ${{ matrix.compiler }} -s ${{ matrix.cxx_std }} -d ${{ matrix.stdlib }} -l ${{ matrix.os }} shell: cmd - name: Upload patch - if: ${{ !cancelled() }} + if: success() || failure() uses: actions/upload-artifact@v4 with: - name: ${{ matrix.compiler }}-patch.diff - path: regression-tests/${{ matrix.compiler }}-patch.diff + name: ${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.cxx_std }}-${{ matrix.stdlib }}.patch + path: regression-tests/${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.cxx_std }}-${{ matrix.stdlib }}.patch if-no-files-found: ignore + + aggregate-results: + needs: regression-tests + if: success() || failure() + runs-on: ubuntu-latest + steps: + - name: Download all patches + uses: actions/download-artifact@v4 + with: + path: downloaded-results + + - name: Prepare result files + id: prepare_files + run: | + mkdir aggregated-results + echo "Flattening file hierarchy" + find . -type f -wholename "./downloaded-results*" -exec mv {} aggregated-results \; + patch_count=$(ls aggregated-results 2>/dev/null | wc -l) + echo "patch_count=${patch_count}" >> $GITHUB_OUTPUT + + - name: Upload aggregated results + if: steps.prepare_files.outputs.patch_count != '0' + uses: actions/upload-artifact@v4 + with: + name: aggregated-results + path: aggregated-results + if-no-files-found: ignore diff --git a/.gitignore b/.gitignore index b247266812..4ed68fa538 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ *.tlog *.idb *.obj +*.ifc *.pdb *.suo *.db @@ -27,9 +28,21 @@ *.exe source/gen_version.bat build*/ +venv/* +*.ifc # Visual Studio cache directory .vs/ # VSCode workspace directory .vscode/ +buildh2.bat +gen_version.bat +mkdocs_serve.sh +experimental/a.out +cppfront/x64/Debug/microsoft/STL/std.compat.ixx.ifc.dt.d.json +cppfront/x64/Debug/microsoft/STL/std.compat.ixx.ifc.dt.module.json +cppfront/x64/Debug/microsoft/STL/std.compat.ixx.ifc.dt.module.json.command +cppfront/x64/Debug/microsoft/STL/std.ixx.ifc.dt.d.json +cppfront/x64/Debug/microsoft/STL/std.ixx.ifc.dt.module.json +cppfront/x64/Debug/microsoft/STL/std.ixx.ifc.dt.module.json.command diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000..9d8190f124 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,12 @@ +cff-version: 1.2.0 +message: >- + If you use this software, please cite it using the + metadata from this file. +type: software +title: cppfront +authors: + - given-names: Herb + family-names: Sutter +repository-code: 'https://github.com/hsutter/cppfront' +abstract: A personal experimental C++ Syntax 2 -> Syntax 1 compiler +license: Apache-2.0 WITH LLVM-exception diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 7fe8d59c4e..775b7b1518 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,10 +1,6 @@ # cppfront -An experimental C++ Syntax 2 -> Syntax 1 transpiler - -Copyright (c) Herb Sutter -SPDX-License-Identifier: CC-BY-NC-ND-4.0 -See [License](LICENSE) +This project is an experimental C++ Syntax 2 -> Syntax 1 transpiler, copyright (c) Herb Sutter • See [License](LICENSE) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE_OF_CONDUCT.md) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d27b74165e..386cba3c5a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,9 +1,10 @@ -This is the contributor license agreement. If you are looking for the usage license, see here: [usage license](https://github.com/hsutter/cppfront/blob/main/LICENSE) +## Contributing -## cppfront Contributor License Agreement +This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant the rights to use your contribution. A sample of the CLA is below. -By contributing content to cppfront (i.e., submitting a pull request for inclusion in this repository): -- You warrant that your material is original, or you have the right to contribute it. -- With respect to the material that you own, you grant a worldwide, non-exclusive, irrevocable, transferable, and royalty-free license to your contributed material to Herb Sutter to display, reproduce, perform, distribute, and create derivative works of that material for commercial or non-commercial use. -- With respect to any other material you contribute, such material must be under a worldwide, non-exclusive, irrevocable, transferable, and royalty-free license sufficient to allow Herb Sutter to display, reproduce, perform, distribute, and create derivative works of that material for commercial or non-commercial use. +When you submit a pull request, a CLA bot or human will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided. You will only need to do this once for all your contributions to cppfront. + +## Cppfront Contribution License Agreement (Sample) + +A sample can be found here: [Cppfront CLA 2024-10 - sample.pdf](docs/Cppfront CLA 2024-10 - sample.pdf) diff --git a/LICENSE b/LICENSE index 1689a5eac9..e582264d18 100644 --- a/LICENSE +++ b/LICENSE @@ -1,11 +1,236 @@ -Copyright (c) Herb Sutter -SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +Copyright 2022-2024 Herb Sutter +SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +Cppfront is under the Apache License v2.0 with LLVM Exception: + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/README.md b/README.md index e7ced6a1dd..bdeea05642 100644 --- a/README.md +++ b/README.md @@ -1,113 +1,49 @@ # cppfront - Copyright (c) Herb Sutter - -See [License](LICENSE) +Copyright (c) Herb Sutter • See [License](LICENSE) +
[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](CODE_OF_CONDUCT.md) [![Build (clang, gcc, vs)](https://github.com/hsutter/cppfront/actions/workflows/build-cppfront.yaml/badge.svg)](https://github.com/hsutter/cppfront/actions/workflows/build-cppfront.yaml) -Cppfront is an experimental compiler from a potential C++ 'syntax 2' (Cpp2) to today's 'syntax 1' (Cpp1), to learn some things, prove out some concepts, and share some ideas. This compiler is a work in progress and currently hilariously incomplete... basic functions work, classes will be next, then metaclasses and lightweight exceptions. - -- [Goals and history](#goals-and-history) -- [What's different about this experiment?](#whats-different-about-this-experiment) -- [How do I build cppfront?](#how-do-i-build-cppfront) -- [How do I build my `.cpp2` file?](#how-do-i-build-my-cpp2-file) -- [Where's the documentation?](#wheres-the-documentation) -- [Papers and talks derived from this work (presented in today's syntax)](#papers-and-talks-derived-from-this-work-presented-in-current-syntax) -- [Epilog: 2016 roadmap diagram](#epilog-2016-roadmap-diagram) - -## Goals and history - - My goal is to explore whether there's a way we can evolve C++ itself to become 10x simpler, safer, and more toolable. If we had an alternate C++ syntax, it would give us a "bubble of new code that doesn't exist today" where we could make arbitrary improvements (e.g., change defaults, remove unsafe parts, make the language context-free and order-independent, and generally apply 30 years' worth of learnings), free of backward source compatibility constraints. - -In 2015-16 I did most of the 'syntax 2' design work. Since then, my ISO C++ evolution proposals and conference talks have come from this work (see list below) — each presenting one part of the design as a standalone proposal under today's syntax, usually with a standalone prototype implementation, to validate and refine that part. Since 2021, I've been writing this cppfront compiler to prototype all the parts together as a whole as originally intended, now including the alternative 'syntax 2' for C++ that enables their full designs including otherwise-breaking changes. - -## What's different about this experiment? - -This is one of many experiments going on across the industry looking at ways to accomplish a major C++ evolution. I look forward to seeing how all the experiments work out. This experiment is different in two main ways. - -### 1) This is about C++20/23/... — not about something else - - For me, ISO C++ is the best tool in the world today to write the programs I want and need. I want to keep writing code in C++... just "nicer": - -- with less complexity to remember; - -- with fewer safety gotchas; and - -- with the same level of tool support other languages enjoy. - -We've been improving C++'s safety and ergonomics with each ISO C++ release, but they have been "10%" improvements. We haven't been able to do a **"10x"** improvement primarily because we have to keep 100% syntax backward compatibility. So, what if we could have our compatibility cake, and eat it too — by having: +Cppfront is a compiler from an experimental C++ 'syntax 2' (Cpp2) to today's 'syntax 1' (Cpp1), to prove out some concepts, share some ideas, and prototype features that can also be proposed for evolving today's C++. -- 100% seamless **link compatibility always** (no marshaling, no thunks, no wrappers, no generated 'compatibility modules' to import/export C++ code from/to a different world); and +## Documentation: [available here](https://hsutter.github.io/cppfront/) -- 100% seamless **backward source compatibility always _available_**, including 100% SFINAE and macro compatibility, but only pay for it when we use it... that is, apply C++'s familiar "zero-overhead principle" also to backward source compatibility? +## What's different about this project? -In cppfront this means you have two options always available: (1) _Write mixed Cpp1/Cpp2 in the same source file_ with perfect backward source compatibility via `#include` or `import`. (2) _Write only Cpp2 in a particular source file_ and program in a 10x simpler C++, where code is type-safe and memory-safe by construction, keeps perfect backward link compatibility via `import`, and in the future (if this project succeeds) with faster compilers and better tools tuned for the simpler language. +In short, it aims to help evolve C++ itself, not to be a "C++ successor." -I hope to start a conversation about what could be possible _**within C++**_’s own evolution to rejuvenate C++, now that we have C++20 and soon C++23 to build upon. I want to encourage us to look for ways to push the boundaries to bring C++ itself forward and double down on C++ — not to switch to something else. I want us to aim for major C++ evolution directed toward things that will make us better C++ programmers — not programmers of something else. +**What it isn't.** Cpp2 is not a successor or alternate language with its own divergent or incompatible ecosystem. For example, it does not have its own nonstandard incompatible modules/concepts/etc. that compete with the Standard C++ features; it does not replace your Standard C++ compiler or other tools; and it does not require any changes to your Standard C++ compiler or standard library or other libraries or tools to keep fully using all of them. -### 2) This is about measurable improvements to safety, simplicity, and toolability — not about green-field design or random drive-by changes +**What it is.** Cpp2 aims to be another "skin" for C++ itself, just a simpler and safer way to write ordinary C++ types/functions/objects, and a faster way to experiment with proposals for future new Standard C++ features in a simpler compiler and syntax flavor. It seamlessly uses Standard C++ modules, concept requirements, and other features. - My specific goal is to explore the question: Can we make C++ **10x safer, simpler, and more toolable?** I want each proposed improvement to address those known C++ pain points in a measurable way (e.g., reduce a class of CVEs (vulnerabilities) by A%, reduce the guidance we have to teach by B%). +It also works with all existing C++20 (or higher) compilers, libraries, and tools right out of the box with no changes required to use them all seamlessly and directly, with zero overhead. -An alternative syntax would let us do things we can never do within today's syntax without breaking the world: +For more, see [What is Cpp2?](https://hsutter.github.io/cppfront/#what-is-cpp2). -- fix defaults (e.g., make `[[nodiscard]]` the default); -- double down on modern C++ (e.g., make C++20 modules and C++23 `import std;` the default); -- remove unsafe parts that are already superseded (e.g., remove `union` and pointer arithmetic); -- have type and memory safety by default (e.g., make the [C++ Core Guidelines safety profiles](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#S-profile) the default and required); -- eliminate 90% of the guidance we have to teach about today's complex language; -- make it easy to write a parser (e.g., have a context-free grammar); and -- make it easy to write refactoring and other tools (e.g., have order-independent semantics). - -### Acknowledgments +## Acknowledgments Scores of people have given valuable feedback and many are listed below, but I especially want to thank Joe Duffy, Anders Hejlsberg, Bjarne Stroustrup, Andrew Sutton, Tim Sweeney, and Mads Torgersen for their insights and valuable feedback on this work over the years — especially when they disagreed with me. I'd also like to thank Dave Abrahams, Andrei Alexandrescu, Walter Bright, Lee Howes, Chris McKinsey, Scott Meyers, Gor Nishanov, Andrew Pardoe, Sean Parent, Jared Parsons, David Sankel, Nathan Sidwell, JC van Winkel, and Ville Voutilainen for broad feedback on the design. Many more people are listed below for their help with specific parts of the design and those proposals/prototypes. I apologize for the names I have forgotten. -## How do I build cppfront? - - -Cppfront builds with any major C++20 compiler. - -#### MSVC build instructions - - cl cppfront.cpp -std:c++20 -EHsc - -#### GCC build instructions - - g++-10 cppfront.cpp -std=c++20 -o cppfront - -#### Clang build instructions - - clang++-12 cppfront.cpp -std=c++20 -o cppfront - -## How do I build my `.cpp2` file? - -Run `cppfront your.cpp2`, then run the generated `your.cpp` through any major C++20 compiler after putting `/cppfront/include` in the path so it can find `cpp2util.h`. - -- MSVC would be: `cl your.cpp -std:c++20 -EHsc` -- GCC would be: `g++-10 your.cpp -std=c++20` -- Clang would be: `clang++-12 your.cpp -std=c++20` - -## Where's the documentation? - -I'm not posting much documentation because that would imply this project is intended for others to use — if it someday becomes ready for that, I'll post more docs. +## Further information To learn more, please see: -- [**My CppCon 2022 talk, "Can C++ be 10x simpler and safer ...?"**](https://www.youtube.com/watch?v=ELeZAKCN4tY) -- [**My CppCon 2023 talk, "Cooperative C++ Evolution: Toward a TypeScript for C++"**](https://www.youtube.com/watch?v=8U3hl8XMm8c) -- **The [cppfront regression tests](https://github.com/hsutter/cppfront/tree/main/regression-tests)** which show dozens of working examples, each with a`.cpp2` file. In the `/test-results/` subdirectory you can see the `.cpp` file that each is translated to. Each filename briefly describes the language features the test demonstrates (e.g., contracts, parameter passing, bounds safety, type-safe `is` queries and `as` casts, initialization safety, and generalized value capture including in function expressions ('lambdas'), postconditions, and string interpolation). -- The list of papers and talks below. +- [**👀 The documentation 👀**](https://hsutter.github.io/cppfront/) +- [My CppCon 2022 talk, "Can C++ be 10x simpler and safer ...?"](https://www.youtube.com/watch?v=ELeZAKCN4tY) +- [My CppCon 2023 talk, "Cooperative C++ Evolution: Toward a TypeScript for C++"](https://www.youtube.com/watch?v=8U3hl8XMm8c) +- [Update on cppfront at ACCU 2024](https://www.youtube.com/watch?v=EB7yR-1317k&t=3866s) +- [This repo's wiki](https://github.com/hsutter/cppfront/wiki) +- The list of papers and talks below -## Papers and talks derived from this work (presented in current syntax) +### Papers and talks derived from this work (presented in current syntax as contributions toward ISO C++'s evolution itself) Here are the ISO C++ papers and CppCon conference talks I've given since 2015 that have been derived from this work, in the order that I brought each piece forward. Most of the details in the materials below are still current with only incremental updates, apart from the specific syntax of course. -### 2015: Lifetime safety +#### 2015: Lifetime safety - [**CppCon 2015**: "Writing good C++14... _by default_"](https://youtu.be/hEx5DNLWGgA) particularly [from 29:00 onward](https://youtu.be/hEx5DNLWGgA?t=1757) shows the Lifetime analysis with live demos in a Visual Studio prototype. - [**CppCon 2018**: "Thoughts on a more powerful _and_ simpler C++ (#5 of N)](https://youtu.be/80BZxujhY38): @@ -118,7 +54,7 @@ Here are the ISO C++ papers and CppCon conference talks I've given since 2015 th This is not yet implemented in cppfront. Implementations are shipping in Visual Studio and in CLion, and initial parts have been upstreamed in Clang. I want to especially thank Matthias Gehre, Gabor Horvath, Neil MacIntosh, and Kyle Reed for their help in implementing the Lifetime static analysis design in Visual Studio and a Clang fork. Thanks also to the following for their input and feedback on the specification: Andrei Alexandrescu, Steve Carroll, Pavel Curtis, Gabriel Dos Reis, Joe Duffy, Daniel Frampton, Anna Gringauze, Chris Hawblitzel, Nicolai Josuttis, Ellie Kornstaedt, Aaron Lahman, Ryan McDougall, Nathan Myers, Gor Nishanov, Andrew Pardoe, Jared Parsons, Dave Sielaff, Richard Smith, Jim Springfield, and Bjarne Stroustrup. -### 2016: Garbage-collected memory arena +#### 2016: Garbage-collected memory arena - [**CppCon 2016**: "Leak-freedom in C++... _by default_"](https://www.youtube.com/watch?v=JfmTagWcqoE) particularly [from 59:00 onward](https://youtu.be/JfmTagWcqoE?t=3558) where I show the strawman prototype I wrote of a tracing garbage-collection memory arena. - [**Github.com/hsutter/gcpp**: "gcpp: Deferred and unordered destruction"](https://github.com/hsutter/gcpp) is the GitHub prototype I wrote. @@ -130,9 +66,9 @@ This is not yet implemented in cppfront. I welcome a real GC expert to collabora - [**CppCon 2017 (just the intro, first 6 minutes)**: "Meta: Thoughts on generative C++"](https://www.youtube.com/watch?v=4AfRAVcThyA). - [**P0515**: Consistent comparison](https://wg21.link/p0515) is the proposal in today's syntax that I proposed, and was adopted, for C++20. -This is part of ISO C++20 and C++23. Thank you again to everyone who helped land this in the Standard in C++20 and improve it in C++23, including especially Walter Brown, Lawrence Crowl, Cameron DaCamara, Gabriel Dos Reis, Jens Maurer, Barry Revzin, Richard Smith, and David Stone. +This is part of ISO C++20 and C++23, except only for chained comparisons which is implemented in cppfront. Thank you again to everyone who helped land this in the Standard in C++20 and improve it in C++23, including especially Walter Brown, Lawrence Crowl, Cameron DaCamara, Gabriel Dos Reis, Jens Maurer, Barry Revzin, Richard Smith, and David Stone. -### 2017: Reflection, generation, and metaclasses +#### 2017: Reflection, generation, and metaclasses - [**ACCU 2017**: "Thoughts on metaclasses"](https://www.youtube.com/watch?v=6nsyX37nsRs) is the first talk I gave about this. - [**CppCon 2017**: "Meta: Thoughts on generative C++"](https://www.youtube.com/watch?v=4AfRAVcThyA) from after the intro, [from 6:00 onward](https://youtu.be/4AfRAVcThyA?t=393). @@ -141,11 +77,11 @@ This is part of ISO C++20 and C++23. Thank you again to everyone who helped land - (repeating the Lifetime section bullet above) [The final part starting at 1:28](https://youtu.be/80BZxujhY38?t=5307) shows the Lifetime and Metaclasses proposals working hand-in-hand. This is one of the few places before cppfront where the same compiler has contained prototypes of multiple 'syntax 2'-derived features so I could show how they build on each other when used together. - [**P0707**: Metaclass functions: Generative C++](https://wg21.link/p0707) is the paper I brought to the ISO C++ committee. -This is not yet implemented in cppfront. Thanks again to Andrew Sutton and his colleagues Wyatt Childers and Jennifer Yao for their help in implementing the Clang-based prototypes of this proposal, and everyone else who contributed feedback on the design including Louis Brandy, Chandler Carruth, Casey Carter, Matúš Chochlík, Lawrence Crowl, Pavel Curtis, Louis Dionne, Gabriel Dos Reis, Joe Duffy, Kenny Kerr, Nicolai Josuttis, Aaron Lahman, Scott Meyers, Axel Naumann, Gor Nishanov, Stephan T. Lavavej, Andrew Pardoe, Sean Parent, Jared Parsons, David Sankel, Richard Smith, Jeff Snyder, Mike Spertus, Mads Torgersen, Daveed Vandevoorde, Tony Van Eerd, JC van Winkel, Ville Voutilainen, and Titus Winters, and many more WG 21 / SG 7 participants. +This is mostly implemented in cppfront, except for the ability to write your own metafunctions (that's coming). Thanks again to Andrew Sutton and his colleagues Wyatt Childers and Jennifer Yao for their help in implementing the Clang-based prototypes of this proposal, and everyone else who contributed feedback on the design including Louis Brandy, Chandler Carruth, Casey Carter, Matúš Chochlík, Lawrence Crowl, Pavel Curtis, Louis Dionne, Gabriel Dos Reis, Joe Duffy, Kenny Kerr, Nicolai Josuttis, Aaron Lahman, Scott Meyers, Axel Naumann, Gor Nishanov, Stephan T. Lavavej, Andrew Pardoe, Sean Parent, Jared Parsons, David Sankel, Richard Smith, Jeff Snyder, Mike Spertus, Mads Torgersen, Daveed Vandevoorde, Tony Van Eerd, JC van Winkel, Ville Voutilainen, and Titus Winters, and many more WG 21 / SG 7 participants. -### 2018: Updates to Lifetime and Metaclasses (see above) +#### 2018: Updates to Lifetime and Metaclasses (see above) -### 2019: Zero-overhead deterministic exceptions: Throwing values +#### 2019: Zero-overhead deterministic exceptions: Throwing values - [**ACCU 2019**: "De-fragmenting C++: Making exceptions more affordable and usable](https://www.youtube.com/watch?v=os7cqJ5qlzo). - [**CppCon 2019**: "De-fragmenting C++: Making exceptions and RTTI more affordable and usable ("Simplifying C++" #6 of N)](https://www.youtube.com/watch?v=ARYP83yNAWk). @@ -153,7 +89,7 @@ This is not yet implemented in cppfront. Thanks again to Andrew Sutton and his c This is not yet implemented in cppfront. -### 2020: Parameter passing +#### 2020: Parameter passing - **ACCU autumn 2019**: "Quantifying accidental complexity: An empirical look at teaching and using C++" was my first public talk about this, but a "beta" version that was not recorded; you can find the description [here](https://accu.org/conf-previous/2019_autumn/sessions/#XQuantifyingAccidentalComplexityAnEmpiricalLookatTeachingandUsingC). - [**CppCon 2020**: "Quantifying accidental complexity: An empirical look at teaching and using C++"](https://www.youtube.com/watch?v=6lurOCdaj0Y): @@ -163,9 +99,9 @@ This is not yet implemented in cppfront. - [**Github.com/hsutter/708**](https://github.com/hsutter/708) is a repo with the paper and demo examples as used in the talk. - [**P2064**: "Assumptions"](https://wg21.link/p2064) is also related to this 'syntax 2' work, because this work includes a contracts design, and assumptions ought to be separate from that. This paper was making the technical argument why assumptions and assertions (contracts) are different things. -This is implemented in cppfront, except not the unified `operator=` experiment which will come with classes. Thanks to Andrew Sutton for an initial Clang-based implementation. +This is implemented in cppfront, including the unified `operator=` for user-defined types. Thanks to Andrew Sutton for an initial Clang-based implementation. -### 2020: "Bridge to NewThingia" +#### 2020: "Bridge to NewThingia" In 2020 I also started socializing the ideas of: @@ -177,14 +113,14 @@ I had specifically in mind a major C++ evolution's success when many attempts to - [**DevAroundTheSun**: "Bridge to Newthingia"](https://herbsutter.com/2020/06/14/talk-video-available-bridge-to-newthingia-devaroundthesun/), an initial 26-minute version. - [**C++ on Sea**: "Bridge to NewThingia"](https://www.youtube.com/watch?v=BF3qw1ObUyo) which especially [at the end starting near 48:00](https://youtu.be/BF3qw1ObUyo?t=2883) had a slide that directly tackled the "C++ major evolution" scenario, and laid out what I think it would take to have credible answers to the key questions. -### 2021: `is`, `as`, and pattern matching +#### 2021: `is`, `as`, and pattern matching - [**CppCon 2021**: "Extending and simplifying C++: Thoughts on pattern matching using `is` and `as`"](https://www.youtube.com/watch?v=raB_289NxBk). - [**P2392**: Pattern matching using `is` and `as`](https://wg21.link/p2392) is the ISO C++ committee paper. -This is partly implemented in cppfront. There is basic support for `is` and `as`, and very basic `inspect` expressions. +This is mostly implemented in cppfront. There is support for `is`, `as`, and basic `inspect` expressions. -### 2022: CppCon 2022 talk and cppfront +#### 2022: CppCon 2022 talk and cppfront - [**CppCon 2022: "Can C++ be 10x simpler and safer ...?"**](https://www.youtube.com/watch?v=ELeZAKCN4tY) - This repo. diff --git a/build_h2.bat b/build_h2.bat new file mode 100644 index 0000000000..080a89ff6d --- /dev/null +++ b/build_h2.bat @@ -0,0 +1,6 @@ +@echo off +cd source +cppfront reflect.h2 -verb %1 +cd ..\include +cppfront cpp2regex.h2 -verb %1 +cd.. diff --git a/docs/Cppfront CLA 2024-10 - sample.pdf b/docs/Cppfront CLA 2024-10 - sample.pdf new file mode 100644 index 0000000000..85f8a4cb60 Binary files /dev/null and b/docs/Cppfront CLA 2024-10 - sample.pdf differ diff --git a/docs/cpp2/common.md b/docs/cpp2/common.md new file mode 100644 index 0000000000..4d82c8e1a3 --- /dev/null +++ b/docs/cpp2/common.md @@ -0,0 +1,250 @@ +# Common programming concepts + +## `main` + +As always, `main` is the entry point of the program. For example: + +`main` can have either: + +- No parameters:   **`#!cpp main: () /*etc.*/`** + +- One parameter of implicit type named `args`:   **`#!cpp main: (args) /*etc.*/`** + + - The type of `args` cannot be explicitly specified. It is always `cpp2::args_t`, which behaves similarly to a `#!cpp const std::array`. + + - Using `args` performs zero heap allocations. Every `string_view` is directly bound to the string storage provided by host environment. + + - `args.argc` and `args.argv` additionally provide access to the raw C/C++ `main` parameters. + +``` cpp title="main with (args)" hl_lines="5 9" +// Print out command line arguments, then invoke +// a Qt event loop for a non-UI Qt application +main: (args) -> int += { + for args do (arg) { + std::cout << arg << "\n"; + } + + app: QCoreApplication = (args.argc, args.argv); + return app.exec(); +} +``` + +`main` can return: + +- `#!cpp void`, the default return value for functions. No `#!cpp return` statement is allowed in the body. In this case, the compiled Cpp1 code behaves as if `main` returned `#!cpp int`. + +- `#!cpp int`. If the body has no `#!cpp return` statement, the default is to `#!cpp return 0;` at the end of the function body. + +- Some other type that your Cpp1 compiler(s) supports as a nonstandard extension. + + +## Comments + +The usual `#!cpp // line comments` and `#!cpp /* stream comments */` are supported. For example: + +``` cpp title="Writing comments" +// A line comment: After //, the entire +// rest of the line is part of the comment + +/* + A stream comment: After /*, everything until the + next * / (without a space between) is part of the + comment. Note that stream comments do not nest. + */ +``` + + +## Lists and commas + +All lists use `,` commas between list items, and may be enclosed by + +- `(` `)` parentheses, for most lists + +- `[` `]` brackets, for calling the subscript operator + +- `<` `>` angle brackets, for template parameter/argument lists + +For example: + +``` cpp title="Lists" hl_lines="1 4 6 7" +print: (t: T, u: U) = { std::cout << t << u << "\n"; } + +main: () = { + array: std::array = ('A', 'B', 'C'); + + for (0, 1, 2) do (e) { + print( e, array[e] ); + } + // Prints: + // 0A + // 1B + // 2C +} +``` + + +An extra comma at the end of the list, before the closing `)` or `>`, is always allowed but ignored if present (for details, see [Design note: Commas](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Commas)). + +For example: + +``` cpp title="Lists, using optional trailing commas just because we can" hl_lines="1 4 6 7" +print: (t: T, u: U,) = { std::cout << t << u << "\n"; } + +main: () = { + array: std::array = ('A', 'B', 'C',); + + for (0, 1, 2,) do (e) { + print( e, array[e,], ); + } + // Prints: + // 0A + // 1B + // 2C +} +``` + + +## Contextual keywords + +Cpp2 has very few globally reserved keywords; nearly all keywords are contextual, where they have their special meaning when they appear in a particular place in the grammar. For example: + +- `new` is used as an ordinary function to do allocation (e.g., `shared.new(1, 2, 3)`). + +- `struct` and `enum` are used as function names in the metafunctions library. + +- `type` can be used as an ordinary name (e.g., `std::common_type::type`). + +- Unqualified `type_of(x)` is a synonym for Cpp1 `std::remove_cvref_t`. + +In rare cases, usually when consuming code written in other languages, you may need to write a name that is a reserved keyword. The way to do that is to prefix it with `__identifer__`, which treats it as an ordinary identifier (without the prefix). + + +## Fundamental data types + +Cpp2 supports the same fundamental types as today's Cpp1, but additionally provides the following aliases in namespace `cpp2`: + +| Fixed-width types | Synonym for | +|---|---| +| `i8` | `std::int8_t` | +| `i16` | `std::int16_t` | +| `i32` | `std::int32_t` | +| `i64` | `std::int64_t` | +| `u8` | `std::uint8_t` | +| `u16` | `std::uint16_t` | +| `u32` | `std::uint32_t` | +| `u64` | `std::uint64_t` | + +| Variable-width types
(Cpp2-compatible single-word names) | Synonym for (these multi-word
names are not allowed in Cpp2) | +|---|---| +| `ushort` | `#!cpp unsigned short` | +| `uint` | `#!cpp unsigned int` | +| `ulong` | `#!cpp unsigned long` | +| `longlong` | `#!cpp long long` | +| `ulonglong` | `#!cpp unsigned long long` | +| `longdouble` | `#!cpp long double` | + +| For compatibility/interop only,
so deliberately ugly names | Synonym for (these multi-word
names are not allowed in Cpp2) | Notes | +|---|---|---| +| `_schar` | `#!cpp signed char` | Normally, prefer `i8` instead | +| `_uchar` | `#!cpp unsigned char` | Normally, prefer `u8` instead | + +## Type qualifiers + +Types can be qualified with `#!cpp const` and `#!cpp *`. Types are written left-to-right, so a qualifier always applies to what immediately follows it. For example, to declare a `#!cpp const` pointer to a non-`#!cpp const` pointer to a `#!cpp const i32` object, write: + +``` cpp title="Using type qualifiers" +// A const pointer to a non-const pointer to a const i32 object +p: const * * const i32; +``` + +## Literals + +Cpp2 supports the same `#!cpp 'c'`haracter, `#!cpp "string"`, binary, integer, and floating point literals as Cpp1, including most Unicode encoding prefixes and raw string literals. + +Cpp2 supports using Cpp1 user-defined literals for compatibility, to support seamlessly using existing libraries. However, because Cpp2 has [unified function call syntax (UFCS)](expressions.md#ufcs), the preferred way to author the equivalent in Cpp2 is to just write a function or type name as a `.` call suffix. For example: + +- You can create a `u8` value by writing either `u8(123)` or **`123.u8()`**. [^u8using] + +- You can write a 'constexpr' function like `#!cpp nm: (value: i64) -> my_nanometer_type == { /*...*/ }` that takes an integer and returns a value of a strongly typed "nanometer" type, and then create a `nm` value by writing either `nm(123)` or **`123.nm()`**. + +Both **`123.nm()`** and **`123.u8()`** are very similar to user-defined literal syntax, and more general. + + +## Operators + +Operators have the same precedence and associativity as in Cpp1, but some unary operators that are prefix (always or sometimes) in Cpp1 are postfix (always) in Cpp2. + +### Unary operators + +The operators `!`, `+`, and `-` are prefix, as in Cpp1. For example: + +``` cpp title="Using prefix operators" +if !vec.empty() { + vec.emplace_back( -123.45 ); +} +``` + +| Unary operator | Cpp2 example | Cpp1 equivalent | +|---|---|---| +| `!` | `!vec.empty()` | `!vec.empty()` | +| `+` | `#!cpp +100` | `#!cpp +100` | +| `-` | `#!cpp -100` | `#!cpp -100` | + +The operators `.`, `..`, `*`, `&`, `~`, `++`, `--`, `()`, `[]`, `..<`, `..=`, and `$` are postfix. For example: + +``` cpp title="Using postfix operators" +// Cpp1 examples, from cppfront's own source code: +// address = &(*tokens)[pos + num]; +// is_void = *(*u)->identifier == "void"; +// Cpp2 equivalents: + address = tokens*[pos + num]&; + is_void = u**.identifier* == "void"; +``` + +Postfix notation lets the code read fluidly left-to-right, in the same order in which the operators will be applied, and lets declaration syntax be consistent with usage syntax. For more details, see [Design note: Postfix operators](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Postfix-operators). + +> Note: The function call syntax `f(x)` calls a namespace-scope function only. The function call syntax `x.f()` is a unified function call syntax (aka UFCS) that calls a type-scope function in the type of `x` if available, otherwise calls the same as `f(x)`. The function call syntax `x..f()` calls a type-scope function only. For details, see [Design note: UFCS](https://github.com/hsutter/cppfront/wiki/Design-note%3A-UFCS). + +| Unary operator | Cpp2 example | Cpp1 equivalent | +|---|---|---| +| `#!cpp .` | `#!cpp obj.f()` | `#!cpp obj.f()` | +| `#!cpp *` | `#!cpp pobj*.f()` | `#!cpp (*pobj).f()` or `#!cpp pobj->f()` | +| `#!cpp &` | `#!cpp obj&` | `#!cpp &obj` | +| `#!cpp ~` | `#!cpp val~` | `#!cpp ~val` | +| `#!cpp ++` | `#!cpp iter++` | `#!cpp ++iter` | +| `#!cpp --` | `#!cpp iter--` | `#!cpp --iter` | +| `(` `)` | `#!cpp f( 1, 2, 3)` | `#!cpp f( 1, 2, 3)` | +| `[` `]` | `#!cpp vec[123]` | `#!cpp vec[123]` | +| `..<` | `#!cpp v.begin().. Note: The `...` pack expansion syntax is also supported. + +> Note: The `(` `)`, `[` `]`, `..<`, and `..=` operators are treated as postfix unary operators, though they can take additional arguments. + +> Note: Because `++` and `--` always have in-place update semantics, we never need to remember "use prefix `++`/`--` unless you need a copy of the old value." If you do need a copy of the old value, just take the copy before calling `++`/`--`. When you write a copyable type that overloads `operator++` or `operator--`, cppfront generates also the copy-old-value overload of that function to support natural use of the type from Cpp1 code. + + +### Binary operators + +Binary operators are the same as in Cpp1. From highest to lowest precedence: + +| Binary operators grouped by precedence | +|---| +| `*`, `/`, `%` | +| `+`, `-` | +| `<<`, `>>` | +| `<=>` | +| `<`, `>`, `<=`, `>=` | +| `==`, `!=` | +| `&` | +| `^` | +| `|` | +| `&&` | +| `||` | +| `=` and compound assignment | + + +[^u8using]: Or `123.cpp2::u8()` if you aren't `using` the namespace or that specific name. diff --git a/docs/cpp2/contracts.md b/docs/cpp2/contracts.md new file mode 100644 index 0000000000..b197073adb --- /dev/null +++ b/docs/cpp2/contracts.md @@ -0,0 +1,174 @@ + +# Contracts + +## Overview + +Cpp2 currently supports three kinds of contracts: + +- **Preconditions and postconditions.** A function declaration can include `pre(condition)` and `post(condition)` before the `= /* function body */`. Before entering the function body, preconditions are fully evaluated and postconditions are captured as function expressions to be evaluated later (and perform their captures of values on entry, if any). Immediately before exiting the function body via a normal return, postconditions are evaluated. If the function exits via an exception, postconditions are not evaluated. + +- **Assertions.** A function body can write `assert(condition)` assertion statements. Assertions are evaluated when control flow passes through them. + +Notes: + +- `condition` is an expression that evaluates to `#!cpp true` or `#!cpp false`. It will not be evaluated unless checking for this contract group is enabled (`group.is_active()` is `true`). + +- Optionally, `condition` may be followed by `, "message"`, a message to include if a violation occurs. For example, `pre(condition, "message")`. + +- Optionally, a `` can be written inside `<` `>` angle brackets immediately before the `(`, to designate that this test is part of the [contract group](#groups) named `group` and (also optionally) [contract predicates](#predicates) `pred1` and `pred2`. If a violation occurs, `Group.report_violation()` will be called. For example, `pre(condition)`. If no contract group is specified, the contract defaults to being part of the `default` group (spelled `cpp2_default` when used from Cpp1 code). + +The order of evaluation is: + +- First, if the contract group is `unevaluated` then the contract is ignored; `condition` is never evaluated. This special group designates conditions intended for use by static analyzers only, and the only requirement is that the condition be grammatically valid. + +- Next, predicates are evaluated in order. If any predicate evaluates to `#!cpp false`, stop. + +- Next, `group.is_active()` is evaluated. If that evaluates to `#!cpp false`, stop. + +- Next, `condition` is evaluated. If that evaluates to `#!cpp true`, stop. + +- Finally, if all the predicates were true and the group is active and the condition was false, `group.report_violation()` is called. + +For example: + +``` cpp title="Precondition and postcondition examples" hl_lines="2 3" +insert_at: (container, where: int, val: int) + pre( 0 <= where <= container.ssize(), "position (where)$ is outside 'container'" ) + post ( container.ssize() == container.ssize()$ + 1 ) += { + _ = container.insert( container.begin()+where, val ); +} +``` + +In this example: + +- The `$` captures are performed before entering the function. + +- The precondition is part of the `bounds_safety` contract group and is checked before entering the function. If the check fails, say because `where` is `#!cpp -1`, then `#!cpp cpp2::bounds_safety.report_violation("position -1 is outside 'container'")` is called. + +- The postcondition is part of the `default` safety contract group. If the check fails, then `#!cpp cpp2::default.report_violation()` is called. + + +## Contract groups + +Contract groups are useful to enable or disable or [set custom handlers](#violation-handlers) independently for different groups of contracts. A contract group `grp` is just the name of an object that can be called with: + +- `grp.report_violation()` and `grp.report_violation(message)`, where `message` is a `* const char` C-style text string + +- `grp.is_active()`, which returns `#!cpp true` if and only if the group is enabled + +You can create new contract groups just by creating new objects that have a `.report_violation` function. The object's name is the contract group's name. The object can be at any scope: local, global, or heap. + +For example, here are some ways to use contract groups of type [`cpp2::contract_group`](#violation-handlers), which is a convenient group type: + +``` cpp title="Using contract groups" hl_lines="1 4 6 10-12" +group_a: cpp2::contract_group = (); // a global group + +func: () = { + group_b: cpp2::contract_group = (); // a local group + + group_c := new(); // a dynamically allocated group + + // ... + + assert( some && condition ); + assert( another || condition ); + assert( another && condition ); +} +``` + +You can make all the objects in a class hierarchy into a contract group by having a `.report_violation` function in a base class, and then writing contracts in that hierarchy using `` as desired. This technique is used in cppfront's own reflection API: + +``` cpp title="Example of using 'this' as a contract group, from cppfront 'reflect.h2'" hl_lines="8 9" +function_declaration: @copyable type = +{ + // inherits from a base class that provides '.report_violation' + + // ... + + add_initializer: (inout this, source: std::string_view) + pre (!has_initializer(), "cannot add an initializer to a function that already has one") + pre (parent_is_type(), "cannot add an initializer to a function that isn't in a type scope") + = { /*...*/ } + + // ... + +} +``` + + +## Contract predicates + +Contract predicates are useful to conditionally check specific contracts as a static or dynamic property. Importantly, if any predicate is `#!cpp false`, the check's conditional expression will not be evaluated. + +For example: + +``` cpp title="Using contract predicates" hl_lines="1 3 4 7" +is_checked_build: bool == SEE_BUILD_FLAG; // a static (compile-time) predicate + +checking_enabled: bool = /*...*/ ; // a dynamic (run-time) predicate, + // could change as the program runs + +func: () = { + assert( condition ); +} +``` + +In this example, the order of evaluation is: + +- `is_checked_build` is evaluated. Since it is a compile-time value, the evaluation can happen at compile time. If it evaluates to `#!cpp false`, then stop; the entire contract could be optimized away by the compiler. + +- Otherwise, next `checking_enabled` is evaluated at run time. If it evaluates to `#!cpp false`, then stop. + +- Otherwise, next `audit.is_active()` is evaluated. If it evaluates to `#!cpp false`, then stop. + +- Otherwise, next `condition` is evaluated. If it evaluates to `#!cpp true`, then stop. + +- Otherwise, `audit.report_violation()` is called. + + +## `cpp2::contract_group`, and customizable violation handling + +The contract group object could also provide additional functionality. For example, Cpp2 comes with the `cpp2::contract_group` type which allows installing a customizable handler for each object. Each object can only have one handler at a time, but the handler can change during the course of the program. `contract_group` supports: + +- `.set_handler(pfunc)` accepts a pointer to a handler function with signature `#!cpp * (* const char)`. + +- `.get_handler()` returns the current handler function pointer, or null if none is installed. + +- `.is_active()` returns whether there is a current handler installed. + +- `.enforce(condition, message)` evaluates `condition`, and if it is `false` then calls `.report_violation(message)`. + +Cpp2 comes with five predefined `contract group` global objects in namespace `cpp2`: + +- `default`, which is used as the default contract group for contracts that don't specify a group. + +- `type_safety` for type safety checks. + +- `bounds_safety` for bounds safety checks. + +- `null_safety` for null safety checks. + +- `testing` for general test checks. + +For these groups, the default handler is `cpp2::report_and_terminate`, which prints information about the violation to `std::cerr` and then calls `std::terminate()`. But you can customize it to do anything you want, including to integrate with any third-party or in-house error reporting system your project is already using. For example: + +``` cpp title="Example of customized contract violation handler" hl_lines="2 8-10 17" +main: () -> int = { + cpp2::default.set_handler(call_my_framework&); + assert(false, "this is a test, this is only a test"); + std::cout << "done\n"; +} + +call_my_framework: (msg: * const char) = { + // You can do anything you like here, including arbitrary work + // and integration with your current error reporting libraries, + // log-and-continue, throw an exception, whatever is wanted... + std::cout + << "sending error to my framework... [" + << msg << "]\n"; + exit(0); +} +// Prints: +// sending error to my framework... [this is a test, this is only a test] +``` diff --git a/docs/cpp2/declarations.md b/docs/cpp2/declarations.md new file mode 100644 index 0000000000..c62db838ea --- /dev/null +++ b/docs/cpp2/declarations.md @@ -0,0 +1,282 @@ +# Declarations and aliases + +## Unified declarations + +All Cpp2 declarations are written as **"_name_ `:` _kind_ `=` _statement_"**. + +- The _name_ must be a valid identifier (start with a letter, and consist of letters, digits, or `_`). The name can be variadic (be a name for a list of zero or more things) by writing a `...` suffix at the end of the name. + +- The `:` is pronounced **"is a."** + +- The _kind_ can start with [template parameters](#template-parameters) and end with [`#!cpp requires` constraints](#requires). + +- The `=` is pronounced **"defined as."** For the definition of something that will always have the same value, write `==`, pronounced **"defined as a synonym for"**. + +- The _statement_ is typically an expression statement (e.g., `#!cpp a + b();`) or a compound statement (e.g., `#!cpp { /*...*/ return c(d) / e; }`). + +Various parts of the syntax allow a `_` "don't care" wildcard or can be omitted entirely to accept a default (e.g., `#!cpp x: int = 0;` can be equivalently written `#!cpp x: _ = 0;` or `#!cpp x := 0;` both of which deduce the type). + +> Notes: +> +> - When the type is omitted, whitespace does not matter, and writing `#!cpp x: = 0;` or `#!cpp x : = 0;` or `#!cpp x := 0;` or other whitespace is just a stylistic choice. This documentation's style uses the last one, except when there are multiple adjacent declaration lines this style lines up their `:` and `=`. +> +> - `==` stresses that this name will always have the given value, to express [aliases](#aliases) and side-effect-free 'constexpr' [function aliases](#function-aliases). + + +### Unnamed declaration expressions + +In an expression, most declarations can be written without a name (just starting with `:`). Such unnamed declaration expressions are useful for single-use temporary variables or 'lambda' functions that don't need a name to be reused elsewhere. For example: + +- `#!cpp :widget = 42` is an unnamed expression-local (aka temporary) object of type `widget` defined as having the initial value `#!cpp 42`. It uses the same general syntax, just without declaring a name. + +- `#!cpp :(x) = std::cout << x` is an unnamed expression-local generic function expression (aka lambda) defined as having the given one-statement body. The body can include [captures](expressions.md/#captures). + +Both just omit the name and make the final `;` optional. Otherwise, they have the identical syntax and meaning as if you declared the same thing with a name outside expression scope (e.g., `w: widget = 42;` or `f: (x) = std::cout << x;`) and then used the name in the expression. + +> Note: Throughout Cpp2, every declaration is written with `:`, and every use of `:` is a declaration. + + + +### From functions to local scopes, and back again + +The function syntax is deliberately designed to be general, so you can omit parts. This means Cpp2 has no special "lambda function" syntax for unnamed functions; an unnamed function is really an unnamed function, written using the ordinary function just without a name. This scales all the way down to ordinary blocks and statements, which are written the same as functions that have no name or parameters. + +We can illustrate this in two directions. First, let's start with a full function, and successively omit optional parts that we aren't currently using: + +``` cpp title="Start with a full function, and successively omit optional parts if unused" hl_lines="1 5 9 13" +// Full named function +f:(x: int = init) = { /*...*/ } // x is a parameter to the function +f:(x: int = init) = statement; // same, except return type is deduced + +// Omit name => anonymous function (aka 'lambda') + :(x: int = init) = { /*...*/ } // x is a parameter to the function + :(x: int = init) = statement; // same, except return type is deduced + +// Omit declaration => local and immediate (aka 'let' in other languages) + (x: int = init) { /*...*/ } // x is a parameter to this + (x: int = init) statement; // compound or single-statement + +// Omit parameters => ordinary block or statement + { /*...*/ } // ordinary compound statement + statement; // ordinary statement +``` + +Conversely, we can start with an ordinary block or statement, and successively build it up to make it more powerful: + +``` cpp title="Start with an ordinary block or statement, and successively add parts" hl_lines="1 5 9 13" +// Ordinary block or statement + { /*...*/ } // ordinary compound statement + statement; // ordinary statement + +// Add parameters => more RAII locally-scoped variables + (x: int = init) { /*...*/ } // x is destroyed after this + (x: int = init) statement; // compound or single-statement + +// Add declaration => treat the code as a callable object + :(x: int = init) = { /*...*/ } // x is a parameter to the function + :(x: int = init) = statement; // same, except return type is deduced + +// Add name => full named function +f:(x: int = init) = { /*...*/ } // x is a parameter to the function +f:(x: int = init) = statement; // same, except return type is deduced + +``` + + +### Template parameters + +A template parameter list is a [list](common.md#lists) enclosed by `<` `>` angle brackets, and the parameters separated by commas. Each parameter is declared using the [same syntax as any type or object](declarations.md). If a parameter's **`:`** ***kind*** is not specified, the default is `: type`. + +For example: + +``` cpp title="Declaring template parameters" hl_lines="1-3 8-9" +array: type + // parameter T is a type + // parameter size is a 32-bit int += { + // ... +} + +tuple: type + // parameter Ts is variadic list of zero or more types += { + // ... +} +``` + + +### `#!cpp requires` constraints + +A `#!cpp requires` ***condition*** constraint appears at the end of the ***kind*** of a templated declaration. If the condition evaluates to `#!cpp false`, that specialization of the template is ignored as if not declared. + +For example: + +``` cpp title="A requires constraint on a variadic function" hl_lines="3" +print: + (inout out: std::ostream, args...: Args) + requires sizeof...(Args) >= 1u += { + (out << ... << args); +} +``` + + +### Examples + +``` cpp title="Consistent declarations — name : kind = statement" linenums="1" hl_lines="2 6 10 15 24 28 32 43 49 53" +// n is a namespace defined as the following scope +n: namespace += { + // shape is a templated type with one type parameter T + // (equivalent to '') defined as the following scope + shape: type + = { + // point is a type defined as being always the same as + // (i.e., an alias for) T + point_type: type == T; + + // points is an object of type std::vector, + // defined as having an empty default value + // (type-scope objects are private by default) + points: std::vector = (); + + // draw is a function taking 'this' and 'canvas' parameters + // and returning bool, defined as the following body + // (type-scope functions are public by default) + // + // this is an object of type shape (as if written 'this: shape') + // + // where is an object of type canvas + draw: (this, where: canvas) -> bool + = { + // pen is an object of deduced (omitted) type 'color', + // defined as having initial value 'color::red' + pen := color::red; + + // success is an object of deduced (omitted) type bool, + // defined as having initial value 'false' + success := false; + + // ... + + return success; + } + + // count is a function taking 'this' and returning a type + // deduced from its body, defined as a single-expression body + // (equivalent to '= { return points.ssize(); }' but omitting + // syntax where we're using the language defaults) + count: (this) -> _ = points.ssize(); + + // ... + } + + // color is an @enum type (see Note) defined as having these enumerators + color: @enum type = { red; green; blue; } + + // calc_next_year is a function defined as always returning the same + // value for the same input (i.e., 'constexpr', side effect-free) + calc_next_year: (year: i32) -> i32 == year + 1; +} +``` + +> Note: `@enum` is a metafunction, which provides an easy way to opt into a group of defaults, constraints, and generated functions. For details, see [`@enum`](metafunctions.md#enum). + + +## Aliases + +Aliases are pronounced **"synonym for"**, and written using the same **name `:` kind `=` value** [declaration syntax](../cpp2/declarations.md) as everything in Cpp2: + +- **name** is declared to be a synonym for **value**. + +- **kind** can be any of the kinds: `namespace`, `type`, a function signature, or a type. + +- **`==`**, pronounced **"defined as a synonym for"**, always precedes the value. The `==` syntax stresses that during compilation every use of the name could be equivalently replaced with the value. + +- **value** is the expression that the **name** is a synonym for. + + +### Namespace aliases + +A namespace alias is written the same way as a [namespace](namespaces.md), but using `==` and with the name of another namespace as its value. For example: + +``` cpp title="Namespace aliases" hl_lines="1 2 4 5 8 12 16" +// 'chr' is a namespace defined as a synonym for 'std::chrono' +chr : namespace == std::chrono; + +// 'chrlit' is a namespace defined as a synonym for 'std::chrono_literals' +chrlit : namespace == std::chrono_literals; + +main: () = { + using chrlit::_ ; + + // The next two lines are equivalent + std::cout << "1s is (std::chrono::nanoseconds(1s).count())$ns\n"; + std::cout << "1s is (chr::nanoseconds(1s).count())$ns\n"; +} +// Prints: +// 1s is 1000000000ns +// 1s is 1000000000ns +``` + + +### Type aliases + +A type alias is written the same way as a [type](types.md), but using `==` and with the name of another type as its value. For example: + +``` cpp title="Type aliases" hl_lines="1 2 7 10" +// 'imap' is a type defined as a synonym for 'std::map' +imap : type == std::map; + +main: () = { + // The next two lines declare two objects with identical type + map1: std::map = (); + map2: imap = (); + + // Assertion they are the same type, using the same_as concept + static_assert( std::same_as< decltype(map1), decltype(map2) > ); +} +``` + + +### Function aliases + +A function alias is written the same way as a [function](functions.md), but using `==` and with a side-effect-free body as its value; the body must always return the same value for the same input arguments. For example: + +``` cpp title="Function aliases" hl_lines="1 2 6 9 12 15" +// 'square' is a function defined as a synonym for the value of 'i * i' +square: (i: i32) -> _ == i * i; + +main: () = { + // It can be used at compile time, with compile time values + ints: std::array = (); + + // Assertion that the size is the square of 4 + static_assert( ints.size() == 16 ); + + // And it can be used at run time, with run time values + std::cout << "the square of 4 is (square(4))$\n"; +} +// Prints: +// the square of 4 is 16 +``` + +> Note: A function alias is compiled to a Cpp1 `#!cpp constexpr` function. + + +### Object aliases + +An object alias is written the same way as an [object](objects.md), but using `==` and with a side-effect-free value. For example: + +``` cpp title="Function aliases" hl_lines="1 2 5 6" +// 'BufferSize' is an object defined as a synonym for the value 1'000'000 +BufferSize: i32 == 1'000'000; + +main: () = { + buf: std::array = (); + static_assert( buf.size() == BufferSize ); +} +``` + +> Note: An object alias is compiled to a Cpp1 `#!cpp constexpr` object. + diff --git a/docs/cpp2/expressions.md b/docs/cpp2/expressions.md new file mode 100644 index 0000000000..194e71b157 --- /dev/null +++ b/docs/cpp2/expressions.md @@ -0,0 +1,385 @@ + +# Common expressions + +## Calling functions: `f(x)` syntax, `x.f()` UFCS syntax, and `x..f()` members-only syntax + +A function argument list is a [list](common.md#lists) of arguments enclosed by `(` `)` parentheses. + +A function call like `f(x)` is a normal function call that will call non-member functions only, as usual in C++. + +A function call like `x.f()` is a unified function call syntax (aka UFCS) call. It will call a member function if one is available, and otherwise will call `f(x)`. Having UFCS is important for generic code that may want to call a member or a non-member function, whichever is available. It's also important to enable fluid programming styles and natural IDE autocompletion support. + +An operator notation call like `#!cpp a + b` will call an overloaded operator function if one is available, as usual in C++. + +A function call like `x..f()` will consider member functions only. + +For example: + +``` cpp title="Function calls" hl_lines="3 7 11 16 19 20" +// Generic function to log something +// This calls operator<< using operator notation +log: (x) = { clog << x; } + +f: ( v : std::vector ) = { + // This calls log() with the result of std::vector::size() + log( v.size() ); + + // This calls log() with the result of std::ssize(v), because + // v doesn't have a .ssize member function + log( v.ssize() ); +} + +// Generic function to use standard I/O to print any printable types +// safely using string interpolation (instead of type-unsafe format strings) +hello: (name, height: float) = { + // Using UFCS to make direct calls to C functions as if they were members + stdout.fprintf("%s", ("Hi (name)$, your height is (height:.1f)$\"!\n").c_str()); + // Equivalent using iostreams: + // std::cout << "Hello (name)$, your height is (height:.1f)$ inches!\n"; + + // The C and C++ standard libraries are not only fully available, + // but safer (and arguably nicer) when used from Cpp2 syntax code +} + +main: () = { + hello("Flimnap", 6.5); + hello("Goliath", 115); + hello("Polyphemus", 180); +} +// Sample output: +// Hi Flimnap, your height is 6.5"! +// Hi Goliath, your height is 115.0"! +// Hi Polyphemus, your height is 180.0"! +``` + +To explicitly treat an object name passed as an argument as `move` or `out`, write that keyword before the variable name. + +- Explicit `move` is rarely needed. Every definite last use of a local variable will apply `move` by default. Writing `move` from an object before its definite last use means that later uses may see a moved-from state. + +- Explicit `out` is needed only when initializing a local variable separately from its declaration using a call to a function with an `out` parameter. For details, see [Guaranteed initialization](../cpp2/objects.md#init). + +For example: + + + +## `_` — the "don't care" wildcard, including explicit discard + +`_` is pronounced **"don't care"** and allowed as a wildcard in most contexts. For example: + +``` cpp title="Using the _ wildcard" hl_lines="2 5 11" +// We don't care about the guard variable's name +_ : std::lock_guard = mut; + +// If we don't care to write the variable's type, deduce it +x : _ = 42; + // in cases like this, _ can be omitted... + // this is equivalent to "x := 42;" + +return inspect v -> std::string { + is std::vector = "v is a std::vector"; + is _ = "unknown"; // don't care what else, match anything +}; +``` + +Cpp2 treats all function outputs (return values, and results produced via `inout` and `out` parameters) as important, and does not let them be silently discarded by default. To explicitly discard such a value, assign it to `_`. For example: + +``` cpp title="Using _ for explicit discard" hl_lines="1 8" +_ = vec.emplace_back(1,2,3); + // "_ =" is required to explicitly discard emplace_back's + // return value (which is non-void since C++17) + +{ + x := my_vector.begin(); + std::advance(x, 2); + _ = x; // required to explicitly discard x's new value, + // because std::advance modifies x's value +} +``` + +For details, see [Design note: Explicit discard](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Explicit-discard). In Cpp2, data is always initialized, data is never silently lost, data flow is always visible. Data is precious, and it's always safe. + + +## Type/value queries and casts + +### `is` — safe type/value queries + +An `x is C` expression allows safe type and value queries, and evaluates to `#!cpp true` if `x` matches constraint `C`. It supports both static and dynamic queries, including customization, with support for standard library dynamic types like `std::variant`, `std::optional`, `std::expected`, and `std::any` provided out of the box. + +There are two kinds of `is`: + +- A **type query**, where `C` is a type constraint: a type, a template name, a concept, or a type predicate. Here `x` may be a type, or an object or expression; if it is an object or expression, the query refers to `x`'s type. + +| Type constraint kind | Example | +|---|---| +| Static type query | `x is int` | +| Dynamic type query | `ptr* is Shape` | +| Static template type query | `x is std::vector` | +| Static concept query | `x is std::integral` | + +- A **value query**, where `C` is a value constraint: a value, or a value predicate. Here `x` must be an object or expression. + +| Value constraint kind | Example | +|---|---| +| Value | `#!cpp x is 0` | +| Value predicate | `#!cpp x is (in(10, 20))` | + +`is` is useful throughout the language, including in `inspect` pattern matching alternatives. `is` is extensible, and works out of the box with `std::variant`, `std::optional`, `std::expected`, and `std::any`. For examples, see: + +- [`mixed-inspect-templates.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/mixed-inspect-templates.cpp2) +- [`mixed-inspect-values.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/mixed-inspect-values.cpp2) +- [`mixed-inspect-values-2.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/mixed-inspect-values-2.cpp2) +- [`mixed-type-safety-1.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/mixed-type-safety-1.cpp2) +- [`pure2-enum.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/pure2-enum.cpp2) +- [`pure2-inspect-expression-in-generic-function-multiple-types.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/pure2-inspect-expression-in-generic-function-multiple-types.cpp2) +- [`pure2-inspect-fallback-with-variant-any-optional.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/pure2-inspect-fallback-with-variant-any-optional.cpp2) +- [`pure2-type-safety-1.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/pure2-type-safety-1.cpp2) +- [`pure2-type-safety-2-with-inspect-expression.cpp2`](https://github.com/hsutter/cppfront/tree/main/regression-tests/pure2-type-safety-2-with-inspect-expression.cpp2) + +Here are some `is` queries with their Cpp1 equivalents. In this table, uppercase names are type names, lowercase names are objects, `v` is a `std::variant` where one alternative is `T`, `o` is a `std::optional`, and `a` is a `std::any`: + +| Some sample `is` queries | Cpp1 equivalent +|---|---| +| `X is Y && Y is X` | `std::is_same_v` | +| `D is B` | `std::is_base_of` | +| `#!cpp pb is *D` | `#!cpp dynamic_cast(pb) != nullptr` | +| `v is T` | `std::holds_alternative(v)` | +| `a is T` | `#!cpp a.type() == typeid(T)` | +| `o is T` | `o.has_value()` | + +> Note: `is` unifies a variety of differently-named Cpp1 language and library queries under one syntax, and supports only the type-safe ones. + + +### `as` — safe casts and conversions + +An `x as T` expression allows safe type casts. `x` must be an object or expression, and `T` must be a type. Like `is`, `as` supports both static and dynamic typing, including customization, with support for standard library dynamic types like `std::variant`, `std::optional`, `std::expected`, and `std::any` provided out of the box. For example: + +``` cpp title="Using as" hl_lines="4 6 14" +main: () = { + a: std::any = 0; // a's type is now int, value 0 + test(a); // prints "zero" + a = "plugh" as std::string; // a's type is now std::string, value "plugh" + test(a); // prints "plugh" + test("xyzzy" as std::string); // prints "xyzzy" +} + +// A generic function that takes an argument 'x' of any type, +// same as "void test( auto const& x )" in C++20 syntax +test: (x) = { + std::cout << inspect x -> std::string { + is 0 = "zero"; + is std::string = x as std::string; + is _ = "(no match)"; + } << "\n"; +} +``` + +Here are some `as` casts with their Cpp1 equivalents. In this table, uppercase names are type names, lowercase names are objects, `v` is a `std::variant` where one alternative is `T`, `o` is a `std::optional`, and `a` is a `std::any`: + +| Some sample `as` casts | Cpp1 equivalent +|---|---| +| `x as Y` | `Y{x}` | +| `#!cpp pb as *D` | `#!cpp dynamic_cast(pb)` | +| `v as T` | `std::get(v)` | +| `a as T` | `std::any_cast(a)` | +| `o as T` | `o.value()` | + +> Note: `as` unifies a variety of differently-named Cpp1 language and library casts and conversions under one syntax, and supports only the type-safe ones. + + +### Unchecked (explicitly type-unsafe) casts + +Casts that are not known to be type-safe at compile time must always be explicit. + +To perform a numeric narrowing cast, such as `i32` to `i16` or `u32`, use `unchecked_narrow(from)`. Otherwise, if you must perform any other type-unsafe cast, use `unchecked_cast(from)`. For example: + +``` cpp title="Type-unsafe narrowing and casts must be explicit" hl_lines="2 3 6 7" +f: (i: i32, inout s: std::string) = { + // j := i as i16; // error, maybe-lossy narrowing + j := unchecked_narrow(i); // ok, 'unchecked' is explicit + + pv: *void = s&; + // pi := pv as *std::string; // error, type-unsafe cast + pi := unchecked_cast<*std::string>(pv); // ok, 'unchecked' is explicit +} +``` + + +## `inspect` — pattern matching + +An `inspect expr -> Type = { /* alternatives */ }` expression allows pattern matching using `is`. + +- `expr` is evaluated once. + +- Each alternative is spelled `is C = statement;` and are evaluated in order. Each `is C` is evaluated as if called with `expr is C`, and if it evaluates to `#!cpp true`, then its `#!cpp = alternative;` body is used as the value of the entire `inspect` expression, and the meaning is the same as if the entire `inspect` expression had been written as just `#!cpp :Type = alternative;` — i.e., an unnamed object expression (aka 'temporary object') of type `Type` initialized with `alternative`. + +- A catchall `is _` is required. + +For example: + +``` cpp title="Using inspect" hl_lines="6-13" +// A generic function that takes an argument 'x' of any type +// and inspects various things about `x` +test: (x) = { + forty_two := 42; + std::cout + << inspect x -> std::string { + is 0 = "zero"; // == 0 + is (forty_two) = "the answer"; // == 42 + is int = "integer"; // is type int (and not 0 or 42) + is std::string = x as std::string; // is type std::string + is std::vector = "a std::vector"; // is a vector + is _ = "(no match)"; // is something else + } + << "\n"; +} + +// Sample call site +test(42); + // Behaves as if the following function were called: + // test: (x) = { std::cout << (:std::string = "the answer") << "\n"; } + // (and that's why inspect alternatives are introduced with '=') +``` + +For more examples, see also the examples in the previous two sections on `is` and `as`, many of which use `inspect`. + + +## `..<` and `..=` — range operators + +`..<` and `..=` designate a range of things. Use `begin ..< end` for a half-open range (that does not include `end`) and `first ..= last` for a closed range (that does include `last`, and `last` must be a valid value and must be valid to increment once). These operators work for any type that supports `++`; they start with the `first` value, and use `++` to increment until they reach the `last` value (which is included by `..=`, and not included by `..<`). + +> Note: For all numeric ranges, `last`'s value must be reachable by incrementing `first` a finite number of times. For `..=` closed numeric ranges, `last` must not be `std::numeric_limits::max()` or `std::numeric_limits::max()`. + +For example: + +``` cpp title="Using ..< and ..= for ranges" hl_lines="5 11 16-18" +test: (v: std::vector) = +{ + // Print strings from "Nonesuch" (if present) onward + i1 := v.std::ranges::find("Nonesuch"); + for i1 ..< v.end() do (e) { + std::cout << " (e*)$\n"; + } + + if v.ssize() > 2 { + // Print indexes 1 and 2 of v + for 1 ..= 2 do (e) { + std::cout << " (e)$ (v[e])$\n"; + } + } + + std::cout << " ((1 ..= 100).sum())$ \n"; + std::cout << " ((1 ..< 100).contains(99))$ \n"; + std::cout << " ((1 ..< 100).contains(100))$ \n"; +} + +main: () = { + vec: std::vector = ("Beholder", "Grue", "Nonesuch", "Wumpus"); + test( vec ); +} +// Prints: +// Nonesuch +// Wumpus +// 1 Grue +// 2 Nonesuch +// 5050 +// true +// false +``` + + +## `$` — captures, including interpolations + +Suffix `$` is pronounced **"paste the value of"** and captures the value of an expression at the point when the expression where the capture is written is evaluated. Depending on the complexity of the capture expression `expr$` and where it is used, parentheses `(expr)$` may be required for precedence or to show the boundaries of the expression. + +`x$` always captures `x` by value. To capture by reference, take the address and capture a pointer using `x&$`. If the value is immediately used, dereference again; for example `:(val) total&$* += val` adds to the `total` local variable itself, not a copy. + +Captures are evaluated at the point where they are written in function expressions, contract postconditions, and string literals. The stored captured value can then be used later when evaluating its context, such as when the function expression body containing the captured value is actually called later (one or more times), when the postcondition containing the captured value is evaluated later when the function returns, or when the string literal containing the captured value is read later. + +The design and syntax are selected so that capture is spelled the same way in all contexts. For details, see [Design note: Capture](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Capture). + + +### Capture in function expressions (aka lambdas) + +Any capture in a function expression body is evaluated at the point where the function expression is written, at the declaration of the function expression. The function expression itself is then evaluated each time the function is invoked, and can reference the captured value. + +For example: + +``` cpp title="Capture in an unnamed function expression (aka lambda)" hl_lines="7 8 13-18" +main: () = { + s := "-ish\n"; + vec: std::vector = (1, 2, 3, 5, 8, 13 ); + + std::ranges::for_each( + vec, + :(i) = std::cout << i << s$ + // Function capture: Paste the value of 's' + ); +} + +// prints: +// 1-ish +// 2-ish +// 3-ish +// 5-ish +// 8-ish +// 13-ish +``` + +Another example: + +``` cpp title="Capture in a named function expression (aka lambda)" hl_lines="2 4 9 14 15" +main: () = { + price := 100; + func := // Note: 'func' is a named variable of deduced type initialized to ... + :() = { std::cout << "Price = " << price$ << "\n"; } // ... this lambda + ; // This is a way to declare 'local functions', which can also capture + // (In the future, Cpp2 may directly support local functions) + func(); + price = 200; + func(); +} + +// prints: +// Price = 100 +// Price = 100 +``` + + +### Capture in contract postconditions + +Any capture in a postcondition is evaluated at the point where the postcondition is written, at the beginning (entry) of the function. The postcondition itself is then evaluated when the function returns, and can reference the captured value. + +For example: + +``` cpp title="Capture in contract postconditions" hl_lines="2" +push_back: (coll, value) + post(coll.ssize() == coll.ssize()$ + 1) + // Paste the value of `coll.ssize()` += { + // ... +} +``` + + +### Capture in string interpolation + +A string literal can capture the value of an expression `expr` by writing `(expr)$` inside the string literal. The `(` `)` are required, and cannot be nested. A string literal has type `std::string` if it performs any captures, otherwise it is a normal C/C++ string literal (array of characters). + +Any capture in a string literal is evaluated at the point where the string literal is written. The string literal can be used repeatedly later, and includes the captured value. + +For example: + +``` cpp title="Capture for string interpolation" hl_lines="2 5" +x := 0; +std::cout << "x is (x)$\n"; + // Paste the value of `x` +x = 1; +std::cout << "now x+2 is (x+2)$\n"; + // Paste the value of `x+2` + +// prints: +// x is 0 +// now x+2 is 3 +``` + +A string literal capture can include a `:suffix` where the suffix is a [standard C++ format specification](https://en.cppreference.com/w/cpp/utility/format/spec). For example, `#!cpp (x.price(): <10.2f)$` evaluates `x.price()` and converts the result to a string with 10-character width, 2 digits of precision, and left-justified. diff --git a/docs/cpp2/functions.md b/docs/cpp2/functions.md new file mode 100644 index 0000000000..2b116582d1 --- /dev/null +++ b/docs/cpp2/functions.md @@ -0,0 +1,500 @@ + +# Functions + +## Overview + +A function is defined by writing a function signature after the `:` and a statement (expression or `{` `}` compound statement) after the `=`. After the optional [template parameters](declarations.md#template-parameters) available for all declarations, a function signature consists of a possibly-empty [parameter list](#parameters), and one or more optional [return values](#return-values). + +For example, the minimal function named `func` that takes no parameters and returns nothing (`#!cpp void`) is: + +``` cpp title="A minimal function" +func: ( /* no parameters */ ) = { /* empty body */ } +``` + + +## Function signatures: Parameters, returns, and using function types + +### Overview + +There are six kinds of function parameters, and two of them are the kinds of functions returns: + +| Kind | Parameter | Return | +| -------- | -------- | ------- | +| `in` | ⭐ | | +| `inout` | ✅ | | +| `out` | ✅ | | +| `copy` | ✅ | | +| `move` | ✅ | ✅ | +| `forward` | ✅ | ⭐ | + +The two cases marked ⭐ can automatically pass/return by value or by reference, and so they can be optionally written with `_ref` to require pass/return by reference and not by value (i.e., `in_ref`, `-> forward_ref`). + +That's it. For details, see below. + +### Parameters + +The parameter list is a [list](common.md#lists) enclosed by `(` `)` parentheses. Each parameter is declared using the [same unified syntax](declarations.md) as used for all declarations. For example: + +``` cpp title="Declaring parameters" hl_lines="2-4" +func: ( + x: i32, // parameter x is a 32-bit int + y: std::string, // parameter y is a std::string + z: std::map // parameter z is a std::map + ) += { + // ... +} +``` + +The parameter type can be deduced by writing `_` (the default, so it can be omitted). You can use `is` to declare a type constraint (e.g., a concept) that a deduced type must match, in which case `_` is required. For example: + +``` cpp title="Declaring a parameter of constrained deduced type" hl_lines="2 3 6" +// ordinary generic function, x's type is deduced +print: (x: _) = { std::cout << x; } +print: (x) = { std::cout << x; } // same, using the _ default + +// number's type is deduced, but must match the std::integral concept +calc: (number: _ is std::integral) = { /*...*/ } +``` + +There are six ways to pass parameters that cover all use cases, that can be written before the parameter name: + +| Parameter ***kind*** | "Pass an `x` the function ______" | Accepts arguments that are | Special semantics | ***kind*** `x: X` compiles to Cpp1 as | +|---|---|---|---|---| +| **`in`** (default) | can read from | anything | always `#!cpp const`

automatically passes by value if cheaply copyable

to guarantee a by-reference passing, use `in_ref` | `X const x` or
`X const& x` | +| **`copy`** | gets a copy of | anything | acts like a normal local variable initialized with the argument | `X x` | +| **`inout`** | can read from and write to | lvalues | | `X& x` | +| **`out`** | writes to (including construct) | lvalues (including uninitialized) | must `=` assign/construct before other uses | `cpp2::impl::out` | +| **`move`** | moves from (consume the value of) | rvalues | automatically moves from every definite last use | `X&&` | +| **`forward`** | forwards | anything | automatically forwards from every definite last use | `auto&&`, and if a specific type is named also a `requires`-constraint requiring convertibilty to that type | + +> Note: All parameters and other objects in Cpp2 are `#!cpp const` by default, except for local variables. For details, see [Design note: `#!cpp const` objects by default](https://github.com/hsutter/cppfront/wiki/Design-note%3A-const-objects-by-default). + +For example: + +``` cpp title="Declaring parameter kinds" hl_lines="2 3 10" +append_x_to_y: ( + x : i32, // an i32 I can read from (i.e., const) + inout y : std::string // a string I can read from and write to + ) += { + y = y + to_string(x); // read x, read and write y +} + +wrap_f: ( + forward x // a generic value of deduced type I can forward +) // (omitting x's type means the same as ': _') += { + global_counter += x; // ok to read x + f(x); // last use: automatically does 'std::forward(x)' +} +``` + + +### Return values + +A function can return either a single anonymous return value, or a return parameter list containing named return value(s). The default is `#!cpp -> void`. + +#### Single anonymous return values + +**`#!cpp ->` _kind_ `X`** to return a single unnamed value of type `X` using the same kinds as in the [parameters](#parameters) syntax, but where the only legal kinds are `move` (the default) or `forward` (with optional `forward_ref`; see below). The type can be `#!cpp -> void` to signify the function has no return value. If `X` is not `#!cpp void`, the function body must have a `#!cpp return /*value*/;` statement that returns a value of type `X` on every path that exits the function, or must be a single expression of type `X`. + +To deduce the return type, write `_`: + +- `-> _` deduces by-value return. +- `-> forward _` deduces by-value return (if the function returns a prvalue or type member object) or by-reference return (everything else), based on the `decltype` of the returned expression. +- `-> forward_ref _` deduces by-reference return only. + +A function whose body is a single expression `= expr;` defaults to `-> forward _ = { return expr; }`. + +For example: + +``` cpp title="Functions with an unnamed return value" hl_lines="2 4 7 9 12 14 15 18 20 22" +// A function returning no value (void) +increment_in_place: (inout a: i32) -> void = { a++; } +// Or, using syntactic defaults, the following has identical meaning: +increment_in_place: (inout a: i32) = { a++; } + +// A function returning a single value of type i32 +add_one: (a: i32) -> i32 = { return a+1; } +// Or, using syntactic defaults, the following has identical meaning: +add_one: (a: i32) -> i32 = a+1; + +// A generic function returning a single value of deduced type +add: (a:T, b:U) -> forward _ = { return a+b; } +// Or, using syntactic defaults, the following have identical meaning: +add: (a, b) -> forward _ = a+b; +add: (a, b) a+b; + +// A generic function expression returning a single value of deduced type +vec.std::ranges::sort( :(x:_, y:_) -> forward _ = { return y (x:X, y:Y) -> forward _ = { return y ( /* parameter list */ )`** to return a list of named return parameters using the same [parameters](#parameters) syntax, but where the only needed kinds are `out` (the default, which moves where possible) or `forward`. The function body must [initialize](objects.md#init) the value of each return-parameter `ret` in its body the same way as any other local variable. An explicit return statement is written just `#!cpp return;` and returns the named values; the function has an implicit `#!cpp return;` at the end. If only a single return parameter is in the list, it is emitted in the lowered Cpp1 code the same way as a single anonymous return value above, so its name is only available inside the function body. + +For example: + +``` cpp title="Function with multiple/named return values" hl_lines="1 3-4 7-8 14 16-17" +divide: (dividend: int, divisor: int) -> (quotient: int, remainder: int) = { + if divisor == 0 { + quotient = 0; // constructs quotient + remainder = 0; // constructs remainder + } + else { + quotient = dividend / divisor; // constructs quotient + remainder = dividend % divisor; // constructs remainder + } +} + +main: () = { + div := divide(11, 5); + std::cout << "(div.quotient)$, (div.remainder)$\n"; +} +// Prints: +// 2, 1 +``` + +This next example declares a [member function](types.md#this-parameter) with multiple return values in a [type](types.md) named `set`: + +``` cpp title="Member function with multiple/named return values" hl_lines="7 9 10 22-24" +set: type = { + container: std::set; + iterator : type == std::set::iterator; + + // A std::set::insert-like function using named return values + // instead of just a std::pair/tuple + insert: (inout this, value: Key) -> (where: iterator, inserted: bool) = { + set_returned := container.insert(value); + where = set_returned.first; + inserted = set_returned.second; + } + + ssize: (this) -> i64 = std::ssize(container); + + // ... +} + +use_inserted_position: (_) = { } + +main: () = { + m: set = (); + ret := m.insert("xyzzy"); + if ret.inserted { + use_inserted_position( ret.where ); + } + assert( m.ssize() == 1 ); +} +``` + + +#### Function outputs are not implicitly discardable + +A function's outputs are its return values, and the "out" state of any `out` and `inout` parameters. + +Function outputs cannot be silently discarded. To explicitly discard a function output, assign it to `_`. For example: + +``` cpp title="No silent discard" hl_lines="9 11 13 17-18 23-24 29-30" +f: () -> void = { } +g: () -> int = { return 10; } +h: (inout x: int) -> void = { x = 20; } + +main: () += { + f(); // ok, no return value + + std::cout << g(); // ok, use return value + + _ = g(); // ok, explicitly discard return value + + g(); // ERROR, return value is ignored + + { + x := 0; + h( x ); // ok, x is referred to again... + std::cout << x; // ... here, so its new value is used + } + + { + x := 0; + h( x ); // ok, x is referred to again... + _ = x; // ... here where its value explicitly discarded + } + + { + x := 0; + h( x ); // ERROR, this is a definite last use of x + } // so x is not referred to again, and its + // 'out' value can't be implicitly discarded +} +``` + +> Cpp2 imbues Cpp1 code with nondiscardable semantics, while staying fully compatible as usual: +> +> - A function written in Cpp2 syntax that returns something other than `#!cpp void` is always compiled to Cpp1 with `[[nodiscard]]`. +> +> - A function call written in Cpp2 `x.f()` member call syntax always treats a non-`#!cpp void` return type as not discardable, even if the function was written in Cpp1 syntax that did not write `[[nodiscard]]`. + +For details and rationale, see [Design note: Explicit discard](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Explicit-discard). + + +### Using function types + +The same function parameter/return syntax can be used as a function type, for example to instantiate `std::function` or to declare a pointer to function variable. For example: + +``` cpp title="Using function types with std::function and *pfunc" hl_lines="4 7" +decorate_int: (i: i32) -> std::string = "--> (i)$ <--"; + +main: () = { + pf1: std::function< (i: i32) -> std::string > = decorate_int&; + std::cout << "pf1(123) returned \"(pf1(123))$\"\n"; + + pf2: * (i: i32) -> std::string = decorate_int&; + std::cout << "pf2(456) returned \"(pf2(456))$\"\n"; +} +// Prints: +// pf1 returned "--> 123 <--" +// pf2 returned "--> 456 <--" +``` + + +## Control flow + +### `#!cpp if`, `#!cpp else` — Branches + +`if` and `else` are like always in C++, except that `(` `)` parentheses around the condition are not required. Instead, `{` `}` braces around a branch body *are* required. For example: + +``` cpp title="Using if and else" hl_lines="1 4" +if vec.ssize() > 100 { + do_general_algorithm( container ); +} +else { + do_linear_scan( vec ); +} +``` + + +### `#!cpp for`, `#!cpp while`, `#!cpp do` — Loops + +**`#!cpp do`** and **`#!cpp while`** are like always in C++, except that `(` `)` parentheses around the condition are not required. Instead, `{` `}` braces around the loop body *are* required. + +**`#!cpp for range do (e)`** ***statement*** says "for each element in `range`, call it `e` and perform the statement." The loop parameter `(e)` is an ordinary parameter that can be passed using any [parameter kinds](#parameters); as always, the default is `in`, which is read-only and expresses a read-only loop. The statement is not required to be enclosed in braces. + +Every loop can have a `next` clause, that is performed at the end of each loop body execution. This makes it easy to have a counter for any loop, including a range `#!cpp for` loop. + +> Note: Whitespace is just a stylistic choice. This documentation's style generally puts each keyword on its own line and lines up what follows. + +For example: + +``` cpp title="Using loops" hl_lines="4 5 13 16 17 22-24" +words: std::vector = ("Adam", "Betty"); +i := 0; + +while i < words.ssize() // while this condition is true +next i++ // and increment i after each loop body is run +{ // do this loop body + std::cout << "word: (words[i])$\n"; +} +// prints: +// word: Adam +// word: Betty + +do { // do this loop body + std::cout << "**\n"; +} +next i-- // and decrement i after each loop body is run +while i > 0; // while this condition is true +// prints: +// ** +// ** + +for words // for each element in 'words' +next i++ // and increment i after each loop body is run +do (inout word) // declare via 'inout' the loop can change the contents +{ // do this loop body + word = "[" + word + "]"; + std::cout << "counter: (i)$, word: (word)$\n"; +} +// prints: +// counter: 0, word: [Adam] +// counter: 1, word: [Betty] +``` + +There is no special "select" or "where" to perform the loop body for only a subset of matches, because this can naturally be expressed with `if`. For example: + +``` cpp title="Using loops + if" hl_lines="7" +// Continuing the previous example +i = 0; + +for words +next i++ +do (word) +if i % 2 == 1 // if i is odd +{ // do this loop body + std::cout << "counter: (i)$, word: (word)$\n"; +} +// prints: +// counter: 1, word: [Betty] +``` + +Here is the equivalent of the Cpp1 code `for ( int i = 0; i < 10; ++i ){ std::cout << i; }`: + +``` cpp title="Equivalent of Cpp1 'for ( int i = 0; i < 10; ++i ){ std::cout << i; }'" +(copy i := 0) +while i < 10 +next i++ { + std::cout << i; +} +``` + +Line by line: + +- `(copy i := 0)`: Any statement can have [statement-local parameters](declarations.md#from-functions-to-local-scopes-and-back-again), and this is declaring `i` as an `int` that's local to the loop. Parameters by default are `const`, and for not-cheap-to-copy types they bind to the original value; so because we want to modify `i` we say `copy` to explicitly declare this is the loop's own mutable scratch variable. +- `while i < 10`: The termination condition. +- `next i++`: The end-of-loop-iteration statement. Note `++` is always postfix in Cpp2. + + +#### Loop names, `#!cpp break`, and `#!cpp continue` + +Loops can be named using the usual **name `:`** syntax that introduces all names, and `#!cpp break` and `#!cpp continue` can refer to those names. For example: + +``` cpp title="Using named break and continue" hl_lines="1 3 6 10" +outer: while i Move/forward from definite last use + +In a function body, a **definite last use** of a local name is a single use of that name in a statement that is not in a loop, where no control flow path after that statement mentions the name again. + +For each definite last use: + +- If the name is a `copy` or `move` parameter or is a local object whose name does not start with `guard`, we know the object will not be used again before being destroyed, and so the object is automatically treated as an rvalue (move candidate). If the expression that contains the last use is able to move from the rvalue, the move will happen automatically. + +- If the name is a `forward` parameter, the object is automatically forwarded to preserve its constness and value category (`std::forward`-ed). + +> Note: This gives language meaning to a naming convention of `guard` as a name prefix for "guard" stack objects, such as local `std::scoped_lock` objects, whose destructors are always the object's real last use. + +For example: + +``` cpp title="Definite last uses" linenums="1" hl_lines="13 16 19 21" +f: ( + copy x: some_type, + move y: some_type, + forward z: some_type + ) += { + w: some_type = "y"; + + prepare(x); // NOT a definite last use + + if something() { + process(y); + z.process(x); // definite last uses of x and z + } + else { + cout << z; // definite last use of z + } + + transfer(y); // definite last use of y + + offload(w); // definite last use of w +} +``` + +In this example: + +- `x` has a definite last use on one path, but not another. Line 13 is a definite last use that automatically treats `x` as an rvalue. However, if the `#!cpp else` is taken, `x` gets no special automatic handling. Line 9 is not a definite last use because `x` could be used again where it is mentioned later on line 13. + +- `y` has a definite last use on every path, in this case the same on all executions of the function. Line 19 is a definite last use that automatically treats `x` as an rvalue. + +- `z` has a definite last use on every path, but unlike `y` it can be a different last use on different executions of the function. That's fine, each of lines 13 and 16 is a definite last use that automatically forwards the constness and value category of `z`. + +- `w` has a definite last use on every path, in this case the same on all executions of the function. Line 21 is a definite last use that automatically treats `w` as an rvalue. + + +## Generality note: Summary of function defaults + +There is a single function syntax, designed so we can just omit the parts we're not currently using. + +For example, let's express in full verbose detail that `equals` is a function template that has two type parameters `T` and `U`, two ordinary `in` parameters `a` and `b` of type `T` and `U` respectively, and a deduced return type, and its body returns the result of `a == b`: + +``` cpp title="equals: A generic function written in full detail (using no defaults)" +equals: (in a: T, in b: U) -> _ = { return a == b; } +``` + +We can write all that, but we don't have to. + +First, `: type` is the default for template parameters, so we can omit it since that's what we want: + +``` cpp title="equals: Identical meaning, now using the :type default for template parameters" +equals: (in a: T, in b: U) -> _ = { return a == b; } +``` + +So far, the return type is already using one common default available throughout Cpp2: the wildcard `_` (pronounced "don't care"). Since this function's body doesn't actually use the parameter type names `T` and `U`, we can just use wildcards for the parameter types too: + +``` cpp title="equals: Identical meaning, now using the _ wildcard also for the parameter types" +equals: (in a: _, in b: _) -> _ = { return a == b; } +``` + +Next, `: _` is also the default parameter type, so we don't need to write even that: + +``` cpp title="equals: Identical meaning, now using the :_ default parameter type" +equals: (in a, in b) -> _ = { return a == b; } +``` + +Next, `in` is the default [parameter kind](#parameters). So we can use that default too: + +``` cpp title="equals: Identical meaning, now using the 'in' default parameter kind" +equals: (a, b) -> _ = { return a == b; } +``` + +We already saw that `{ return` ... `; }` is the default for a single-expression function body that deduces its return type: + +``` cpp title="equals: Identical meaning, now using the { return ... } default body decoration" +equals: (a, b) -> _ = a == b; +``` + +Next, `#!cpp -> forward _` (fully deduced return type) is the default for single-expression functions that return something, and in this case will have the same meaning as `#!cpp -> _` : + +``` cpp title="equals: Identical meaning, now using the -> _ = default for functions that return something" +equals: (a, b) = a == b; +``` + +Finally, at expression scope (aka "lambda/temporary") functions/objects aren't named, and the trailing `;` is optional: + +``` cpp title="(not) 'equals': Identical meaning, but without a name as an unnamed function at expression scope" +:(a, b) = a == b +``` + +Here are some additional examples of unnamed function expressions: + +``` cpp title="Some more examples of unnamed function expressions" +std::ranges::for_each( a, :(x) = std::cout << x ); + +std::ranges::transform( a, std::back_inserter(b), :(x) = x+1 ); + +where_is = std::ranges::find_if( b, :(x) = x == waldo$ ); +``` + +> Note: Cpp2 doesn't have a separate "lambda" syntax; you just use the regular function syntax at expression scope to write an unnamed function, and the syntactic defaults are chosen to make such function expressions convenient to write. And because in Cpp2 every local variable [capture](expressions.md#captures) (for example, `waldo$` above) is written in the body, it doesn't affect the function syntax. + diff --git a/docs/cpp2/generalized-copy-move-construction-assignment.png b/docs/cpp2/generalized-copy-move-construction-assignment.png new file mode 100644 index 0000000000..61713bbb2d Binary files /dev/null and b/docs/cpp2/generalized-copy-move-construction-assignment.png differ diff --git a/docs/cpp2/metafunctions.md b/docs/cpp2/metafunctions.md new file mode 100644 index 0000000000..8a8eaf7516 --- /dev/null +++ b/docs/cpp2/metafunctions.md @@ -0,0 +1,472 @@ + +# Metafunctions + +## Overview + +A metafunction is a compile-time function that can participate in interpreting the meaning of a declaration, and can: + +- apply defaults (e.g., `interface` makes functions virtual by default) + +- enforce constraints (e.g., `value` enforces that the type has no virtual functions) + +- generate additional functions and other code (e.g., `value` generates copy/move/comparison operations for a type if it didn't write them explicitly) + +The most important thing about metafunctions is that they are not hardwired language features — they are compile-time library code that uses the reflection and code generation API, that lets the author of an ordinary type easily opt into a named set of defaults, requirements, and generated contents. This approach is essential to making the language simpler, because it lets us avoid hardwiring special "extra" types into the language and compiler. + +## Applying metafunctions using `@` + +Metafunctions provide an easy way for a type author to opt into a group of defaults, constraints, and generated functions: Just write `@name` afer the `:` of a declaration, where `name` is the name of the metafunction. This lets the type author declare (and the human reader see) the intent up front: "This isn't just any `type`, this is a `@value type`" which automatically gives the type default/copy/move construction and assignment, `<=>` with `std::strong_ordering` comparisons, and guarantees that it has a public destructor and no protected or virtual functions: + +``` cpp title="Using the value metafunction when writing a type" hl_lines="1" +point2d: @value type = { + x: i32 = 0; + y: i32 = 0; + // @value automatically generates default/copy/move + // construction/assignment and <=> strong_ordering comparison, + // and emits an error if you try to write a non-public + // destructor or any protected or virtual function +} +``` + +## Generating source code at compile time + +A metafunction applied to a definition using `@` gets to participate in interpreting the meaning of the definition by inspecting and manipulating the definition's parse tree. For example: + +``` cpp title="shape.cpp2: Using @interface @print" hl_lines="1" +shape: @interface @print type = { + draw : (this); + move_by: (this, dx: double, dy: double); +} +``` + +The above code: + +- applies `@interface`, which makes functions pure virtual by default and defines a virtual destructor with a do-nothing body if there isn't already a virtual destructor (among other things), and + +- then applies `@print`, which pretty-prints the resulting parse tree as source code to the console so that we can see the results of what the first metafunction did. + +The result of compiling this is the following cppfront output, which is the `@interface`-modified Cpp2 source code as printed by `@print`: + +``` cpp title="'cppfront shape.cpp2' output to the console, from @print" hl_lines="1" +shape:/* @interface @print */ type = +{ + public draw:(virtual in this); + + public move_by:( + virtual in this, + in dx: double, + in dy: double + ); + + operator=:(virtual move this) = + { + } +} +``` + +Finally, cppfront also emits the following in `shape.cpp`: + +``` cpp title="'cppfront shape.cpp' output to 'shape.cpp'" +class shape { + public: virtual auto draw() const -> void = 0; + public: virtual auto move_by(cpp2::in dx, cpp2::in dy) const -> void = 0; + public: virtual ~shape() noexcept; + + public: shape() = default; + public: shape(shape const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(shape const&) -> void = delete; + +}; + +shape::~shape() noexcept{} +``` + + +## Built-in metafunctions + +The following metafunctions are provided in the box with cppfront. + + +### For regular value-like types (copyable, comparable) + + +#### `ordered`, `weakly_ordered`, `partially_ordered` + +An `ordered` (or `weakly_ordered` or `partially_ordered`) type has an `#!cpp operator<=>` three-way comparison operator that returns `std::strong_ordering` (or `std::weak_ordering` or `std::partial_ordering`, respectively). This means objects of this type can be used in all binary comparisons: `<`, `<=`, `==`, `!=`, `>=`, and `>`. + +If the user explicitly writes `operator<=>`, its return type must be the same as the one implied by the metafunction they chose. + +If the user doesn't explicitly write `operator<=>`, a default memberwise `operator<=>: (this, that) -> /* appropriate _ordering */;` will be generated for the type. + +These metafunctions will emit a compile-time error if: + +- a user-written `operator<=>` returns a different type than the one implied by the metafunction they chose + +> Note: This feature derived from Cpp2 was already adopted into Standard C++ via paper [P0515](https://wg21.link/p0515), so most of the heavy lifting is done by the Cpp1 C++20/23 compiler, including the memberwise default semantics. In contrast, cppfront has to do the work itself for default memberwise semantics for operator= assignment as those aren't yet part of Standard C++. + + +#### `copyable` + +A `copyable` type has (copy and move) x (construction and assignment). + +If the user explicitly writes any of the copy/move `operator=` functions, they must also write the most general one that takes `(out this, that)`. + +If the user doesn't write any of the copy/move `operator=` functions, a default general memberwise `operator=: (out this, that) = { }` will be generated for the type. + +`copyable` will emit a compile-time error if: + +- there is a user-written `operator=` but no user-written `operator=: (out this, that)` + + +#### `basic_value`, `value`, `weakly_ordered_value`, `partially_ordered_value` + +A `basic_value` type is a regular type: [`copyable`](#copyable), default constructible, and not polymorphic (no protected or virtual functions). + +A `value` (or `weakly_ordered_value` or `partially_ordered_value`) is a `basic_value` that is also [`ordered`](#ordered) (or `weakly_ordered` or `partially_ordered`, respectively). + +```mermaid +graph TD; + value---->basic_value; + weakly_ordered_value---->basic_value; + partially_ordered_value---->basic_value; + basic_value-->copyable; + value-->ordered; + partially_ordered_value-->partially_ordered; + weakly_ordered_value-->weakly_ordered; +``` + +These metafunctions will emit a compile-time error if: + +- any function is protected or virtual + +- the type has a destructor that is not public + + +#### `struct` + +A `struct` is a type with only public bases, objects, and functions, with no virtual functions, and with no user-defined constructors (i.e., no invariants) or assignment or destructors. + +`struct` is implemented in terms of [`cpp1_rule_of_zero`](#cpp1_rule_of_zero). + +`struct` will emit a compile-time error if: + +- any member is non-public + +- any function is virtual + +- there is a user-written `operator=` + + +#### `hashable` + +A `hashable` type provides a `hash: (this) -> size_t` function that performs a memberwise hash of its data members using `std::hash`. + + +### For polymorphic types (interfaces, base classes) + + +#### `interface` + +An `interface` type is an abstract base class having only pure virtual functions. + +Cpp2 has no `interface` feature hardwired into the language, as C# and Java do. Instead you apply the `@interface` metafunction when writing an ordinary `type`. For a detailed example, see [the `shape` example above](#generating-source-code-at-compile-time). + +`interface` will emit a compile-time error if: + +- the type contains a data object + +- the type has a copy or move function (the diagnostic message will suggest a virtual `clone` function instead) + +- any function has a body + +- any function is nonpublic + + +#### `polymorphic_base` + +A `polymorphic_base` type is a pure polymorphic base type that is not copyable, and whose destructor is either public and virtual or protected and nonvirtual. + +Unlike an [interface](#interface), it can have nonpublic and nonvirtual functions. + +`polymorphic_base` will emit a compile-time error if: + +- the type has a copy or move function (the diagnostic message will suggest a virtual `clone` function instead) + +- the type has a destructor that is not public and virtual, and also not protected and nonvirtual + + +### For enumeration types + + +#### `enum` + +Cpp2 has no `enum` feature hardwired into the language. Instead you apply the `@enum` metafunction when writing an ordinary `type`. + +`enum` will emit a compile-time error if: + +- any member has the reserved name `operator=` or `operator<=>`, as these will be generated by the metafunction + +- an enumerator is not public or does not have a deduced type + +For example: + +``` cpp title="Using the @enum metafunction when writing a type" hl_lines="14" +// skat_game is declaratively a safe enumeration type: it has +// default/copy/move construction/assignment and <=> with +// std::strong_ordering, a minimal-size signed underlying type +// by default if the user didn't specify a type, no implicit +// conversion to/from the underlying type, in fact no public +// construction except copy construction so that it can never +// have a value different from its listed enumerators, inline +// constexpr enumerators with values that automatically start +// at 1 and increment by 1 if the user didn't write their own +// value, and conveniences like to_string()... the word "enum" +// carries all that meaning as a convenient and readable +// opt-in, without hardwiring "enum" specially into the language +// +skat_game: @enum type = { + diamonds := 9; + hearts; // 10 + spades; // 11 + clubs; // 12 + grand := 20; + null := 23; +} +``` + +Consider `hearts`: It's a member object declaration, but it doesn't have a type (or a default value) which is normally illegal, but here it's okay because the `@enum` metafunction fills them in: It iterates over all the data members and gives each one the underlying type (here explicitly specified as `i16`, otherwise it would be computed as the smallest signed type that's big enough), and an initializer (by default one higher than the previous enumerator). + +Unlike C `#!cpp enum`, this `@enum` is scoped and strongly typed (does not implicitly convert to the underlying type). + +Unlike C++11 `#!cpp enum class`, it's "just a `type`" which means it can naturally also have member functions and other things that a type can have: + +``` cpp title="An @enum type with a member function" hl_lines="1" +janus: @enum type = { + past; + future; + + flip: (inout this) == { + if this == past { this = future; } + else { this = past; } + } +} +``` + + +#### `flag_enum` + +`flag_enum` is a variation on `enum` that has power-of-two default enumerator values, a default signed underlying type that is large enough to hold the values, and supports bitwise operators to combine and test values. + +`flag_enum` will emit a compile-time error if: + +- any member has the reserved name `operator=`, `operator<=>`, `has`, `set`, `clear`, `to_string`, `get_raw_value`, or `none`, as these will be generated by the metafunction + +- an enumerator is not public or does not have a deduced type + +- the values are outside the range that can be represented by the largest default underlying type + +For example: + +``` cpp title="Using the @flag_enum metafunction when writing a type" hl_lines="11" +// file_attributes is declaratively a safe flag enum type: +// same as enum, but with a minimal-size unsigned underlying +// type by default, and values that automatically start at 1 +// and rise by powers of two if the user didn't write their +// own value, and bitwise operations plus .has(flags), +// .set(flags), and .clear(flags)... the word "flag_enum" +// carries all that meaning as a convenient and readable +// opt-in without hardwiring "[Flags]" specially into the +// language +// +file_attributes: @flag_enum type = { + cached; // 1 + current; // 2 + obsolete; // 4 + cached_and_current := cached | current; +} +``` + + +### For dynamic types + + +#### `union` + +`@union` declaratively opts into writing a safe discriminated union/variant dynamic type. + +`union` will emit a compile-time error if: + +- any alternative is not public or has an initializer + +- any member starts with the reserved name prefix `is_` or `set_`, as these will be generated by the metafunction + + +For example: + +``` cpp title="Using the @union metafunction when writing a type" hl_lines="10 18-20 25 26" +// name_or_number is declaratively a safe union/variant type: +// it has a discriminant that enforces only one alternative +// can be active at a time, members always have a name, and +// each member has .is_member(), .set_member(), and .member() +// accessors using the member name... the word "union" +// carries all that meaning as a convenient and readable +// opt-in without hardwiring "union" specially into the +// language +// +name_or_number: @union type = { + name: std::string; + num : i32; +} + +main: () = { + x: name_or_number = (); + + x.set_name("xyzzy"); // now x is a string + assert( x.is_name() ); + std::cout << x.name(); // prints the string + + // trying to use x.num() here would cause a Type safety + // contract violation, because x is currently a string + + x.set_num( 120 ); // now x is a number + std::cout << x.num() + 3; // prints 123 +} +``` + +Unlike C `#!cpp union`, this `@union` is safe to use because it always ensures only the active type is accessed. + +Unlike C++11 `std::variant`, this `@union` is easier to use because its alternatives are anonymous, and safer to use because each union type is a distinct type. [^variant] + +Each `@union` type has its own type-safe name, has clear and unambiguous named members, and safely encapsulates a discriminator to rule them all. It uses type-unsafe casts in the implementation, but they are fully encapsulated, where they can be tested once and be safe in all uses. + +Because a `@union type` is still a `type`, it can naturally have other things normal types can have, such as template parameter lists and member functions: + +``` cpp title="A templated custom safe union type" hl_lines="1" +name_or_other: @union type += { + name : std::string; + other : T; + + // a custom member function + to_string: (this) -> std::string = { + if is_name() { return name(); } + else if is_other() { return other() as std::string; } + else { return "invalid value"; } + } +} + +main: () = { + x: name_or_other = (); + x.set_other(42); + std::cout << x.other() * 3.14 << "\n"; + std::cout << x.to_string(); // prints "42" here, but is legal + // whichever alternative is active +} +``` + + +### For computational and functional types + + +#### `regex` + +A `regex` type has data members that are regular expression objects. This metafunction replaces all of the type's data members named `regex` or `regex_*` with regular expression objects of the same type. For example: + +``` cpp title="Regular expression example" hl_lines="1 3 4 16 17 19 27 30 31" +name_matcher: @regex type += { + regex := R"((\w+) (\w+))"; // for example: Margaret Hamilton + regex_no_case := R"(/(ab)+/i)"; // case insensitive match of "ab"+ +} + +main: (args) = { + m: name_matcher = (); + + data: std::string = "Donald Duck"; + if args.ssize() >= 2 { + data = args[1]; + } + + // regex.match requires matches to match the entire string, from start to end + result := m.regex.match(data); + if result.matched { + // We found a match; reverse the order of the substrings + std::cout << "Hello (result.group(2))$, (result.group(1))$!\n"; + } + else { + std::cout << "I only know names of the form: .\n"; + } + + // regex.search finds a match anywhere within the target string + std::cout << "Case insensitive match: " + "(m.regex_no_case.search(\"blubabABblah\").group(0))$\n"; +} +// Prints: +// Hello Duck, Donald! +// Case insensitive match: abAB +``` + +The `@regex` metafunction currently supports most of [Perl regex syntax](https://perldoc.perl.org/perlre), except for Unicode characters and the syntax tokens associated with them. See [Supported regular expression features](../notes/regex_status.md) for a list of regex options. + +Each regex object has the type `cpp2::regex::regular_expression`, which is defined in `include/cpp2regex.h2`. The member functions are: + +``` cpp title="Member functions for regular expressions" +// .match() requires matches to match the entire string, from start to end +// .search() finds a match anywhere within the target string + +match : (this, str: std::string_view) -> search_return; +search: (this, str: std::string_view) -> search_return; + +match : (this, str: std::string_view, start) -> search_return; +search: (this, str: std::string_view, start) -> search_return; + +match : (this, str: std::string_view, start, length) -> search_return; +search: (this, str: std::string_view, start, length) -> search_return; + +match : (this, start: Iter, end: Iter) -> search_return; +search: (this, start: Iter, end: Iter) -> search_return; +``` + +The return type `search_return` is defined in `cpp2::regex::regular_expression`. It has these members: + +``` cpp title="Members of a regular expression result" +matched: bool; +pos: int; + +// Functions to access groups by number +group_number: (this) -> size_t;; +group: (this, g: int) -> std::string; +group_start: (this, g: int) -> int; +group_end: (this, g: int) -> int; + +// Functions to access groups by name +group: (this, g: bstring) -> std::string; +group_start: (this, g: bstring) -> int; +group_end: (this, g: bstring) -> int; +``` + + + +### Helpers and utilities + + +#### `cpp1_rule_of_zero` + +A `cpp1_rule_of_zero` type is one that has no user-written copy/move/destructor functions, and for which Cpp2 should generate nothing so that the Cpp1 defaults for generated special member functions are accepted. + +> C.20: If you can avoid defining default operations, do. +> Reason: It's the simplest and gives the cleanest semantics. +> This is known as "the rule of zero". +> — Stroustrup, Sutter, et al. (C++ Core Guidelines) + + +#### `print` + +`print` prints a pretty-printed visualization of the type to the console. + +This is most useful for debugging metafunctions, and otherwise seeing the results of applying previous metafunctions. + +For a detailed example, see [the `shape` example above](#generating-source-code-at-compile-time). + + +[^variant]: With `variant`, there's no way to distinguish in the type system between a `variant` that stores either an employee id or employee name, and a `variant` that stores either a lucky number or a pet unicorn's dominant color. diff --git a/docs/cpp2/namespaces.md b/docs/cpp2/namespaces.md new file mode 100644 index 0000000000..7de21d1d84 --- /dev/null +++ b/docs/cpp2/namespaces.md @@ -0,0 +1,60 @@ + +# Namespaces + +## Overview + +A namespace `N` can contain declarations that are then accessed by writing `N::` or [`using`](#using) the namespace or declaration. For example: + +``` cpp title="Declaring some things in a namespace" hl_lines="2 8" +// A namespace to put all the names provided by a widget library +widgetlib: namespace = { + widget: type = { /*...*/ } + // ... more things ... +} + +main: () = { + w: widgetlib::widget = /*...*/; +} +``` + + +## `using` + +A `#!cpp using` statement brings names declared in another namespace into the current scope as if they had been declared in the current scope. + +`#!cpp using a_namespace::a_name ;` brings the single name `a_name` into scope. + +`#!cpp using a_namespace::_ ;` brings all the namespace's names into scope using the `#!cpp _` wildcard. + +For example: + +``` cpp title="using statements" hl_lines="13 14 20 21" +// A namespace to put all the names provided by a widget library +widgetlib: namespace = { + widget: type = { /*...*/ } + // ... more things ... +} + +main: () = { + // Explicit name qualification + w: widgetlib::widget = /*...*/; + + { + // Using the specific name, no widgetlib:: qualification needed + using widgetlib::widget; + w2: widget = /*...*/; + // ... + } + + { + // Using the whole namespace, no widgetlib:: qualification needed + using widgetlib::_; + w3: widget = /*...*/; + // ... + } + + // ... +} +``` + + diff --git a/docs/cpp2/objects.md b/docs/cpp2/objects.md new file mode 100644 index 0000000000..5f918ad515 --- /dev/null +++ b/docs/cpp2/objects.md @@ -0,0 +1,139 @@ +## Overview + +An object can be declared at any scope: in a namespace, in a `type`, in a function, in an expression. + +Its declaration is written using the same **name `:` kind `=` value** [declaration syntax](../cpp2/declarations.md) as everything in Cpp2: + +- **name** starts with a letter and is followed by other letters, digits, or `_`. Examples: `count`, `skat_game`, `Point2D` are valid names. + +- **kind** is the object's type. In most places, except type scopes, you can write the `_` wildcard as the type (or omit the type entirely) to ask for the type to be deduced. When the type is a template, the templated arguments can be inferred from the constructor (via [CTAD](../welcome/hello-world.md#ctad)). + +- **value** is the object's initial value. To use the default-constructed value, write `()`. + + +For example: + +``` cpp title="Declaring some objects" hl_lines="3 4 7-9 12 13" +// numbers is an object of type std::vector, +// defined as having the initial contents 1, 2, 3 +numbers: std::vector = (1, 2, 3); +numbers: std::vector = (1, 2, 3); // same, deducing the vector's type + +// count is an object of type int, defined as having initial value -1 +count: int = -1; +count: _ = -1; // same, deducing the object's type with the _ wildcard +count := -1; // same, deducing the object's type by just omitting it + +// pi is a variable template; == signifies the value never changes (constexpr) +pi: T == 3.14159'26535'89793'23846L; +pi: _ == 3.14159'26535'89793'23846L; // same, deducing the object's type +``` + +The object type can be deduced by writing `_` (the default, so it can be omitted). You can use `is` to declare a type constraint (e.g., a concept) that a deduced type must match, in which case `_` is required. For example: + +``` cpp title="Declaring an object of constrained deduced type" hl_lines="2" +// number's type is deduced, but must match the std::regular concept +number: _ is std::regular = some_factory_function(); +``` + + +## Guaranteed initialization + +Every object must be initialized using `=` before it is used. + +An object in any scope can be initialized at its declaration. For example: + +``` cpp title="Initializing objects when they are declared" hl_lines="4 10" +shape: type = { + // An object at type scope (data member) + // initialized with its type's default value + points: std::vector = (); + + draw: (this, where: canvas) -> bool + = { + // An object at function scope (local variable) + // initialized with color::red + pen := color::red; + + // ... + } + + // ... +} +``` + +Additionally, at function local scope an object `obj` can be initialized separately from its declaration. This can be useful when the object must be declared before a program-meaningful initial value is known (to avoid a dead write of a wrong 'dummy' value), and/or when the object may be initialized in more than one way depending on other logic (e.g., by using different constructors on different paths). The way to do this is: + +- Declare `obj` without an `= initializer` value, for example: `obj: some_type;`. This allocates stack space for the object, but does not construct it. + +- `obj` must have a definite first use on every `#!cpp if`/`#!cpp else` branch path (and that first use must not be inside any loop), and + +- that definite first use must be of the form `obj = value;` which is a constructor call, or else pass `obj` as an `out` argument to an `out` parameter (which is also effectively a constructor call, and performs the construction in the callee). + +For example: + +``` cpp title="Initializing local objects after they are declared" hl_lines="5 14 17 21" +f: () = { + buf: std::array; // uninitialized + // ... calculate some things ... + // ... no uses of buf here ... + buf = some_calculated_value; // constructs (not assigns) buf + // ... + std::cout << buf[0]; // ok, a has been initialized +} + +g: () = { + buf: std::array; // uninitialized + if flip_coin_is_heads() { + if heads_default_is_available { + buf = copy_heads_default(); // constructs buf + } + else { + buf = (other, constructor); // constructs buf + } + } + else { + load_from_disk( out buf ); // constructs buf (*) + } + std::cout << buf[0]; // ok, a has been initialized +} + +load_from_disk: (out x) = { + x = /* data read from disk */ ; // when `buffer` is uninitialized, +} // constructs it; otherwise, assigns +``` + +In the above example, note the simple rule for branches: The local variable must be initialized on both the `#!cpp if` and `#!cpp else` branches, or neither branch. + + +## Heap objects + +Objects can also be allocated on the heap using `#!cpp xxx.new (/*initializer, arguments*/)` where `xxx` is any object that acts as a memory allocator and provides a `#!cpp .new` function template. Two memory allocators objects are provided in namespace `cpp2`: + +- `#!cpp unique.new` calls `std::make_unique` and returns a `std::unique_ptr`. + +- `#!cpp shared.new` calls `std::make_shared` and returns a `std::shared_ptr`. + +The default is `#!cpp unique.new` if you don't specify an allocator object. + +For example (see [types](types.md) for more details about writing types): + +``` cpp title="Heap allocation" hl_lines="3-6 10-11" +f: () -> std::shared_ptr += { + // Dynamically allocate an object owned by a std::unique_ptr + // 'vec' is a unique_ptr> containing three values + vec := new>(1, 2, 3); + // shorthand for 'unique.new<...>(...)' + std::cout << vec*.ssize(); // prints 3 + // note that * dereference is a suffix operator + + // Dynamically allocate an object with shared ownership + wid := cpp2::shared.new(); + store_a_copy( wid ); // store a copy of 'wid' somewhere + return wid; // and move-return a copy too + +} // as always in C++, vec is destroyed here automatically, which + // destroys the heap vector and deallocates its dynamic memory +``` + diff --git a/docs/cpp2/safety.md b/docs/cpp2/safety.md new file mode 100644 index 0000000000..0375b66500 --- /dev/null +++ b/docs/cpp2/safety.md @@ -0,0 +1,129 @@ + +# Safety and unchecked code + +Cpp2 aims to be safe by default, usually entirely at compile time, and when needed at run time. + +When Cpp2 rejects unsafe code (e.g., signed/unsigned comparison) or ensuring safety can require run-time checks (e.g., subscripts bounds checks), you can opt out as needed in two ways: + +- at a specific place in your code, using `unchecked_*` functions (these are in namespace `cpp2::`, but can be used unqualified from Cpp2 code) +- for a whole source file, using `-no-*-checks` switches + +Nearly always, you should opt out at a specific place in your code where you are confident the result is okay, and if there is a run-time check you have measured that the performance difference matters such as in a hot loop. + + +### Integer mixed-sign `<`, `<=`, `>`, and `>=` (compile-time enforced) + +Comparing signed and unsigned integer values directly using `<`, `<=`, `>`, or `>=` can give wrong results, and so such comparisons are rejected at compile time. + +To disable this check at a specific place in your code, use the appropriate `unchecked_cmp_*` function instead of the operator notation: `unchecked_cmp_less`, `unchecked_cmp_less_eq`,`unchecked_cmp_greater`, or `unchecked_cmp_greater_eq`. + +For example: + +``` cpp title="Integer comparisons" hl_lines="7" +main: () = { + x: i32 = 42; + y: u32 = 43; + + if x < y { } // unsafe, therefore error by default + + if unchecked_cmp_less(x,y) { } // ok, explicit "trust me" opt-out +} +``` + +To disable these checks for the entire source file, you can use cppfront's `-no-comparison-checks` switch: + +``` bash title="Disable prevention of mixed-sign integer comparisons" hl_lines="3" +cppfront myfile.cpp2 # mixed-sign int comparisons banned + +cppfront myfile.cpp2 -no-comparison-checks # mixed-sign int comparisons allowed +``` + + +### Division by zero (run-time checked) + +Dividing integers by zero is undefined behavior, and is rejected at run time by checking the denominator is nonzero. + +To disable this check at a specific place in your code, use `unchecked_div`. For example: + +``` cpp title="Division by zero" hl_lines="7" +main: () = { + x := 42; + y := 0; + + z := x/y; // unsafe, therefore run-time checked + + w := unchecked_div(x,y) // ok, explicit "trust me" opt-out +} +``` + +To disable these checks for the entire source file, you can use cppfront's `-no-div-zero-checks` switch: + +``` bash title="Disable prevention of division by zero" hl_lines="3" +cppfront myfile.cpp2 # division by zero checked + +cppfront myfile.cpp2 -no-div-zero-checks # division by zero not checked +``` + + +### Null dereference (run-time checked) + +Dereferencing a null pointer is undefined behavior, and is rejected at run time by checking the pointer is not null. + +To disable this check at a specific place in your code, use `unchecked_dereference`. For example: + +``` cpp title="Null dereference" hl_lines="6" +main: () = { + p: *int = cpp1_func(); // could be initialized to null + + p* = 42; // unsafe, therefore run-time checked + + unchecked_dereference(p) = 42; // ok, explicit "trust me" opt-out +} +``` + +To disable these checks for the entire source file, you can use cppfront's `-no-null-checks` switch: + +``` bash title="Disable prevention of null deference" hl_lines="3" +cppfront myfile.cpp2 # null dereferences checked + +cppfront myfile.cpp2 -no-null-checks # null dereferences not checked +``` + + +### Subscript bounds (run-time checked) + +Accessing an out of bounds subscript is undefined behavior, and is rejected at run time by checking the subscript is in bounds. For an expression `a[b]` where + +- `a` is contiguous and supports `std::size(a)`, and +- `b` is an integral value + +the cppfront compiler injects a check that **`0 <= b < std::size(a)`** before the call to `a[b]`. + +To disable this check at a specific place in your code, use `unchecked_subscript`. For example: + +``` cpp title="Subscript bounds" hl_lines="12 13" +main: () = { + v: std::vector = ( 1, 2, 3, 4, 5 ); + s: std::span = v; + + idx := calc_index(s); + + v[idx] += 42; // unsafe, therefore run-time checked + s[idx] += 84; // unsafe, therefore run-time checked + + // manually hoist the check and do it myself + if (0 ..< v.size()).contains(idx) { + unchecked_subscript(v,idx) += 42; // ok, explicit "trust me" opt-out + unchecked_subscript(s,idx) += 84; // ok, explicit "trust me" opt-out + } +} +``` + +To disable these checks for the entire source file, you can use cppfront's `-no-subscript-checks` switch: + +``` bash title="Disable prevention of out-of-bounds subscripts" hl_lines="3" +cppfront myfile.cpp2 # subscript bounds checked + +cppfront myfile.cpp2 -no-subscript-checks # subscript bounds not checked +``` + diff --git a/docs/cpp2/types.md b/docs/cpp2/types.md new file mode 100644 index 0000000000..7ee30bc1be --- /dev/null +++ b/docs/cpp2/types.md @@ -0,0 +1,287 @@ + +# Types + +## Overview + +A user-defined `type` is written using the same **name `:` kind `=` value** [declaration syntax](../cpp2/declarations.md) as everything in Cpp2. The type's "value" is a `{}`-enclosed body containing more declarations. + +In a `type`, data members are private by default, and functions and nested types are public by default. To explicitly declare a type scope declaration `#!cpp public`, `#!cpp protected`, or `#!cpp private`, write that keyword at the beginning of the declaration. + +``` cpp title="Writing a simple type" hl_lines="1" +mytype: type = +{ + // data members are private by default + x: std::string; + + // functions are public by default + protected f: (this) = { do_something_with(x); } + + // ... +} +``` + +## `#!cpp this` — The parameter name + +**`#!cpp this`** is a synonym for the current object. Inside the scope of a type that has a member named `member`, `member` by default means `#!cpp this.member`. + +> Note: In Cpp2, `#!cpp this` is not a pointer. + +The name `#!cpp this` may only be used for the first parameter of a type-scope function (aka member function). It is never declared with an explicit `: its_type` because its type is always the current type. + +`#!cpp this` can be an `in` (default), `inout`, `out`, or `move` parameter. Which you choose naturally determines what kind of member function is being declared: + +- **`#!cpp in this`**: Writing `#!cpp myfunc: (this /*...*/)`, which is shorthand for `#!cpp myfunc: (in this /*...*/)`, defines a Cpp1 `#!cpp const`-qualified member function, because `in` parameters are `#!cpp const`. + +- **`#!cpp inout this`**: Writing `#!cpp myfunc: (inout this /*...*/)` defines a Cpp1 non-`#!cpp const` member function. + +- **`#!cpp out this`**: Writing `#!cpp myfunc: (out this /*...*/)` defines a Cpp1 constructor... and more. (See below.) + +- **`#!cpp move this`**: Writing `#!cpp myfunc: (move this /*...*/)` defines a Cpp1 `#!cpp &&`-qualified member function, or if there are no additional parameters it defines the destructor. + +For example, here is how to write read-only member function named `print` that takes a read-only string value and prints this object's data value and the string message: + +``` cpp title="The this parameter" hl_lines="4 6" +mytype: type = { + data: i32; // some data member (private by default) + + print: (this, msg: std::string) = { + std::cout << data << msg; + // "data" is shorthand for "this.data" + } + + // ... +} +``` + +## `#!cpp this` — Inheritance + +Base types are written as members named `#!cpp this`. For example, just as a type could write a data member as `#!cpp data: string = "xyzzy";`, which is pronounced "`data` is a `string` defined as having the default value `#!cpp "xyzzy"`, a base type is written as `#!cpp this: Shape = (default, values);`, which is pronounced "`#!cpp this` is a `Shape` defined as having these default values." + +> Cpp2 syntax has no separate base list or separate member initializer list. + +Because base and member subobjects are all declared in the same place (the type body) and initialized in the same place (an `#!cpp operator=` function body), they can be written in any order, including interleaved, and are still guaranteed to be safely initialized in declared order. This means that in Cpp2 you can declare a data member object before a base subobject, so that it naturally outlives the base subobject. + +> Cpp2 code doesn't need workarounds like Boost's `base_from_member`, because all of the motivating examples for that can be written directly. See [this explanation](https://github.com/hsutter/cppfront/issues/334#issuecomment-1500984173) for details. + +## `#!cpp virtual`, `#!cpp override`, and `#!cpp final` — Virtual functions + +A `#!cpp this` parameter can additionally be declared as one of the following: + +- **`#!cpp virtual`**: Writing `#!cpp myfunc: (virtual this /*...*/)` defines a new virtual function. + +- **`#!cpp override`**: Writing `#!cpp myfunc: (override this /*...*/)` defines an override of an existing base class virtual function. + +- **`#!cpp final`**: Writing `#!cpp myfunc: (final this /*...*/)` defines a final override of an existing base class virtual function. + +A pure virtual function is a function with a `#!cpp virtual this` or `#!cpp override this` parameter and no body. + +For example: + +``` cpp title="Virtual functions" hl_lines="3 4 14 15" +abstract_base: type += { + // A pure virtual function: virtual + no body + print: (virtual this, msg: std::string); + + // ... +} + +derived: type += { + // 'this' is-an 'abstract_base' + this: abstract_base; + + // Explicit override + print: (override this, msg: std::string) = { /*...*/ } + + // ... +} +``` + + +## `implicit` — Controlling conversion functions + +A `#!cpp this` parameter of an `#!cpp operator=` function can additionally be declared as: + +- **`implicit`**: Writing `#!cpp operator=: (implicit out this, /*...*/)` defines a function that will not be marked as "explicit" when lowered to Cpp1 syntax. + +> Note: This reverses the Cpp1 default, where constructors are not "explicit" by default, and you have to write "explicit" to make them explicit. + + +## `#!cpp operator=` — Construction, assignment, and destruction + +All value operations are spelled `#!cpp operator=`, including construction, assignment, and destruction. `#!cpp operator=` sets the value of `#!cpp this` object, so the `#!cpp this` parameter can be passed as anything but `in` (which would imply `#!cpp const`): + +- **`#!cpp out this`:** Writing `#!cpp operator=: (out this /*...*/ )` is naturally both a constructor and an assignment operator, because an `out` parameter can take an uninitialized or initialized argument. If you don't also write a more-specialized `#!cpp inout this` assignment operator, Cpp2 will use the `#!cpp out this` function also for assignment. + +- **`#!cpp inout this`:** Writing `#!cpp operator=: (inout this /*...*/ )` is an assignment operator (only), because an `inout` parameter requires an initialized modifiable argument. + +- **`#!cpp move this`:** Writing `#!cpp operator=: (move this)` is the destructor. No other parameters are allowed, so it connotes "move `#!cpp this` nowhere." + +Unifying `#!cpp operator=` enables usable `out` parameters, which is essential for composable guaranteed initialization. We want the expression syntax `#!cpp x = value` to be able to call a constructor or an assignment operator, so naming them both `#!cpp operator=` is consistent. + +An assignment operator always returns the same type as `#!cpp this` and automatically performs `#!cpp return this;`. + +> Note: Writing `=` always invokes an `#!cpp operator=` (in fact for a Cpp2-authored type, and semantically for a Cpp1-authored type). This avoids the Cpp1 inconsistency that "writing `=` calls `#!cpp operator=`, except when it doesn't" (such as in a Cpp1 variable initialization). Conversely, `#!cpp operator=` is always invoked by `=` in Cpp2. + + +### `that` — A source parameter + +All type-scope functions can have **`that`** as their second parameter, which is a synonym for the object to be copied/moved from. Like `this`, at type scope it is never declared with an explicit `: its_type` because its type is always the current type. + +`that` can be an `in` (default) or `move` parameter. Which you choose naturally determines what kind of member function is being declared: + +- **`in that`**: Writing `#!cpp myfunc: (/*...*/ this, that)`, which is shorthand for `#!cpp myfunc: (/*...*/ this, in that)`, is naturally both a copy and move function, because it can accept an lvalue or an rvalue `that` argument. If you don't write a more-specialized `move that` move function, Cpp2 will automatically use the `in that` function also for move. + +- **`move that`**: Writing `#!cpp myfunc: (/*...*/ this, move that)` defines a move function. + +Putting `this` and `that` together: The most general form of `#!cpp operator=` is **`#!cpp operator=: (out this, that)`**. It works as a unified general {copy, move} x { constructor, assignment } operator, and generates all of four of those in the lowered Cpp1 code if you didn't write a more specific one yourself. + + +### `#!cpp operator=` can generalize (A)ssignment from construction, and (M)ove from copy + +As mentioned above: +- If you don't write an `#!cpp inout this` function, Cpp2 will use your `#!cpp out this` function in its place (if you wrote one). +- If you don't write a `move that` function, Cpp2 will use your `in that` function in its place (if you wrote one). + +> Note: When lowering to Cpp1, this just means generating the applicable special member functions from the appropriate Cpp2 function. + +This graphic summarizes these generalizations. For convenience I've numbered the (A)ssignment and (M)ove defaults. + +![image](generalized-copy-move-construction-assignment.png) + +In Cpp1 terms, they can be described as follows: + +- **(M)ove, M1, M2:** If you write a copy constructor or assignment operator, but not a corresponding move constructor or assignment operator, the latter is generated. + +- **(A)ssignment, A1, A3:** If you write a generalized constructor, but none of the three more-specific copy/move constructor/assignment functions, the latter three get generated. If you write a converting copy constructor, but no converting assignment operator for the same type and this is not a polymorphic type, the latter is generated. + +- **The arrows are transitive.** For example, if you write a copy constructor and nothing else, the move constructor, copy assignment operator, and move assignment operator are generated. + +The most general `#!cpp operator=` with `that` is `#!cpp (out this, that)`. In Cpp1 terms, it generates all four combinations of { copy, move } x { constructor, assignment }. This is often sufficient, so you can write all these value-setting functions just once. If you do want to write a more specific version that does something else, though, you can always write it too. + +> Note: Generating `#!cpp inout this` (assignment) from `#!cpp out this` also generates **converting assignment** from converting construction, which is a new thing. Today in Cpp1, if you write a converting constructor from another type `X`, you may or may not write the corresponding assignment from `X`; in Cpp2 you will get that by default, and it sets the object to the same state as the converting constructor from `X` does. + + + +### Minimal functions generated by default + +There are only two defaults the language will generate implicitly for a type: + +- The only special function every type must have is the destructor. If you don't write it by hand, a public nonvirtual destructor is generated by default. + +- If no `#!cpp operator=` functions other than the destructor are written by hand, a public default constructor is generated by default. + +All other `#!cpp operator=` functions are explicitly written, either by hand or by opting into applying a metafunction (see below). + +> Note: Because generated functions are always opt-in, you can never get a generated function that's wrong for your type, and so Cpp2 doesn’t need to support "=delete" for the purpose of suppressing unwanted generated functions. + +### Memberwise by default + +All copy/move/conversion `#!cpp operator=` functions are memberwise by default in Cpp2. That includes when you write memberwise construction and assignment yourself. + +In a hand-written `#!cpp operator=`: + +- The body must begin with a series of `member = value;` statements, one for each of the type's data members (including base classes) in declaration order. + +- If the body does not mention a member in the appropriate place in the beginning section, by default the member's default initializer is used. + +- In an assignment operator (`#!cpp inout this`), you can explicitly skip setting a member by writing `member = _;` where it would normally be set if you know you have a reason to set its value later instead or if the existing value needs to be preserved. (This is rare; for an example, see the generated implementation of the [`union` metafunction](metafunctions.md#union).) + +For example: + +``` cpp title="Memberwise operator= semantics" hl_lines="9-11 20-22" +mytype: type += { + // data members (private by default) + name: std::string; + social_handle: std::string = "(unknown)"; + + // conversion from string (construction + assignment) + operator=: (out this, who: std::string) = { + name = who; + // if social_handle is not mentioned, defaults to: + // social_handle = "(unknown)"; + + // now that the members have been set, + // any other code can follow... + print(); + } + + // copy/move constructor/assignment + operator=: (out this, that) = { + // if neither data member is mentioned, defaults to: + // name = that.name; + // social_handle = that.social_handle; + print(); + } + + print: (this) = { std::cout << "value is [(name)$] [(social_handle)$]\n"; } +} + +// The above definition of mytype allows all of the following... +main: () = { + x: mytype = "Jim"; // construct from string + x = "John"; // assign from string + y := x; // copy construct + y = x; // copy assign + z := (move x); // move construct + z = (move y); // move assign + x.print(); // "value is [] []" - moved from + y.print(); // "value is [] []" - moved from +} +``` + +> Note: This makes memberwise semantics symmetric for construction and assignment. In Cpp1, only non-copy/move constructors have a default, which is to initialize a member with its default initializer. In Cpp2, both constructors and assignment operators default to using the default initializer if it's a conversion function (non-`that`, aka non-copy/move), and using memberwise `member = that.member;` for copy/move functions. + + +## `#!cpp operator<=>` — Unified comparisons + +To write comparison functions for your type, usually you just need to write either or both of `operator<=>` and `operator==` with a first parameter of `this` and a second parameter of any type (usually `that` which is of the same type). If you omit the function body, a memberwise comparison will be generated by default. + +`operator<=>` must return one of `std::strong_ordering`, `std::partial_ordering`, or `std::weak_ordering`. It makes `<`, `<=`, `>`, and `>=` comparisons available for your type. Prefer a strong ordering unless you have a reason to use a partial or weak ordering. If you write `operator<=>` without a custom function body, `operator==` is generated for you. + +`operator==` must return `bool`. It makes `==` and `!=` comparisons available for your type. + +For example: + +``` cpp title="Writing the <=> operator" hl_lines="5-7 13" +item: type = { + x: i32 = (); + y: std::string = (); + + operator<=>: (this, that) -> std::strong_ordering; + // memberwise by default: first compares x <=> that.x, + // then if those are equal compares y <=> that.y + + // ... +} + +test: (x: item, y: item) = { + if x != y { // ok + // ... + } +} +``` + +The above is the same as in Cpp1 because most of Cpp2's `#!cpp operator<=>` feature has already been merged into ISO C++ (Cpp1). In addition, in Cpp2 comparisons with the same precedence can be safely chained, and always have the mathematically sound transitive meaning or else are rejected at compile time: + +- **Valid chains: All `<`/`<=`, all `>`/`>=`, or all `==`.** All mathematically sound and safe chains like `a <= b < c` are supported, with efficient single evaluation of each term. They are "sound" because they are transitive; these chains imply a relationship between `a` and `c` (in this case, the chain implies that `a <= c` is also true). + +> Note: These valid chains always give mathematically expected results, even when invoking existing comparison operators authored in Cpp1 syntax. + +- **Invalid chains: Everything else.** Nonsense chains like `a >= b < c` and `a != b != c` are compile time errors. They are "nonsense" because they are non-transitive; these chains do not imply any relationship between `a` and `c`. + +- **Non-chains: Mixed precedence is not a chain.** Expressions like `a // Cpp1 +#include // Cpp1 + +N: namespace = { // Cpp2 + hello: (msg: std::string_view) = // Cpp2 + std::cout << "Hello, (msg)$!\n"; // Cpp2 +} // Cpp2 + +int main() { // Cpp1 + auto words = std::vector{ "Alice", "Bob" }; // Cpp1 + N::hello( words[0] ); // Cpp1 + N::hello( words[1] ); // Cpp1 + std::cout << "... and goodnight\n"; // Cpp1 +} // Cpp1 +``` + +## Not allowed: Nesting Cpp1 inside Cpp2 (and vice versa) + +However, the following source file is not valid, because it tries to nest Cpp2 code inside Cpp1 code, and vice versa: + +``` cpp title="ERROR.cpp2 — this is NOT allowed" linenums="1" hl_lines="5 6 9 14" +#include // Cpp1 +#include // Cpp1 + +namespace N { // Cpp1 + hello: (msg: std::string_view) = // Cpp2 (ERROR here) + std::cout << "Hello, (msg)$!\n"; // Cpp2 (ERROR here) +} // Cpp1 + +main: () = { // Cpp2 + auto words = std::vector{ "Alice", "Bob" }; // Cpp1 (ERROR here) + N::hello( words[0] ); // ? + N::hello( words[1] ); // ? + std::cout << "... and goodnight\n"; // ? +} // Cpp2 +``` + +The above nesting is not supported because it would create not just parsing problems but also semantic ambiguities. For example, lines 11-13 are syntactically valid as Cpp1 or as Cpp2, but if they are treated as Cpp2 then the `#!cpp words[0]` and `#!cpp words[1]` expressions' `#!cpp std::vector::operator[]` calls are bounds-checked and bounds-safe by default, whereas if they are treated as Cpp1 then they are not bounds-checked. And that's a pretty important difference to be sure about! + diff --git a/docs/cppfront/options.md b/docs/cppfront/options.md new file mode 100644 index 0000000000..62cbeb8b1a --- /dev/null +++ b/docs/cppfront/options.md @@ -0,0 +1,126 @@ +# Cppfront command line options + +Cppfront is invoked using + + cppfront [options] file ... + +where + +- **options** is optional, and can include options described on this page + +- **file ...** is a list of one or more `.cpp2` filenames to be compiled + +Command line options are spelled starting with `-` or `/` followed by the option name. For example, `-help` prints help. + +For convenience, you can shorten the name to any unique prefix not shared with another option. For example: + +- `-help` can be equivalently written as `-hel`, `-he`, or `-h`, because no other option starts with `h`. +- `-import-std` and `-include-std` can be shortened to `-im` and `-in` respectively, but not `-i` which would be ambiguous with each other. + + +## Basic command line options + +### `-help`, `-h`, `-?` + +Prints an abbreviated version of this documentation page. + +### `-import-std`, `-im` + +Makes the entire C++ standard library (namespace `std::`) available via a module `import std.compat;` (which implies `import std;`). + +> When you use either `-import-std` or `-include-std`, your `.cpp2` program will not need to explicitly `import` any C++ standard library module or `#include` any C++ standard library header (it can still do that, but it would be redundant). + +This option is implicitly set if `-pure-cpp2` is selected. + +This option is ignored if `-include-std` is selected. If your Cpp1 compiler does not yet support standard library modules `std` and `std.compat`, this option automatically uses `-include-std` instead as a fallback. + +### `-include-std`, `-in` + +Makes the entire C++ standard library (namespace `std::`) available via an '#include" of every standard header. + +This option should always work with all standard headers, including draft-standard C++26 headers that are not yet in a published standard, because it tracks new headers as they are added and uses feature tests to not include headers that are not yet available on your Cpp1 implementation. + +### `-pure-cpp2`, `-p` + +Allow Cpp2 syntax only. + +This option also sets `-import-std`. + +### `-version`, `-vers` + +Print version, build, copyright, and license information. + + +## Additional dynamic safety check controls + +### `-no-comparison-checks`, `-no-c` + +Disable mixed-sign comparison safety checks. If not disabled, mixed-sign comparisons are diagnosed by default. + +### `-no-div-zero-checks`, `-no-d` + +Disable integer division by zero checks. If not disabled, integer division by zero checks are performed by default when both the numerator and denominator are integer types. + +### `-no-null-checks`, `-no-n` + +Disable null safety checks. If not disabled, null dereference checks are performed by default. + +### `-no-subscript-checks`, `-no-s` + +Disable subscript bounds safety checks. If not disabled, subscript bounds safety checks are performed by default. + + +## Support for constrained target environments + +### `-fno-exceptions`, `-fno-e` + +Disable C++ exception handling. This should be used only if you must run in an environment that bans C++ exception handling, and so you are already using a similar command line option for your Cpp1 compiler. + +If this option is selected, a failed `as` for `std::variant` will assert. + +### `-fno-rtti`, `-fno-r` + +Disable C++ run-time type information (RTTI). This should be used only if you must run in an environment that bans C++ RTTI, and so you are already using a similar command line option for your Cpp1 compiler. + +If this option is selected, trying to using `as` for `*` (raw pointers) or `std::any` will assert. + + +## Cpp1 file content options + +### `-clean-cpp1`, `-cl` + +Emit clean `.cpp` files without `#line` directives and other extra information that cppfront normally emits in the `.cpp` to light up C++ tools (e.g., to let IDEs integrate cppfront error message output, debuggers step to the right lines in Cpp2 source code, and so forth). In normal use, you won't need `-c`. + +### `-emit-cppfront-info`, `-e` + +Emit cppfront version and build in the `.cpp` file. + +### `-line-paths`, `-l` + +Emit absolute paths in `#line` directives. + +## Cppfront output options + +### `-cwd` _path_, `-cw` _path_ + +Changes the current working directory to 'path'. Can be useful in build scripts to control where generated Cpp1 files are places; see also `-output`. + +### `-debug`, `-d` + +Emit compiler debug output. This is only useful when debugging cppfront itself. + +### `-format-colon-errors`, `-fo` + +Emit cppfront diagnostics using `:line:col:` format for line and column numbers, if that is the format better recognized by your IDE, so that it will pick up cppfront messages and integrate them in its normal error message output location. If not set, by default cppfront diagnostics use `(line,col)` format. + +### `-output` _filename_, `-o` _filename_ + +Output to 'filename' (can be 'stdout'). If not set, the default output filename for is the same as the input filename without the `2` (e.g., compiling `hello.cpp2` by default writes its output to `hello.cpp`, and `header.h2` to `header.h`). + +### `-quiet`, `-q` + +Print no console output unless there are errors to report. + +### `-verbose`, `-verb` + +Print verbose statistics and `-debug` output. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000000..da2eb60e27 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,31 @@ + +# Cpp2 and cppfront: An experimental 'C++ syntax 2' and its first compiler + +## Welcome & getting started + +- [Overview: What are Cpp2 and cppfront? How do I get and build cppfront?](welcome/overview.md) +- [Hello, world!](welcome/hello-world.md) +- [Adding cppfront to your existing C++ project](welcome/integration.md) + +## Cpp2 reference + +- [Common concepts](cpp2/common.md) +- [Expressions](cpp2/expressions.md) +- [Declarations and aliases](cpp2/declarations.md) +- [Objects, initialization, and memory](cpp2/objects.md) +- [Functions, branches, and loops](cpp2/functions.md) +- [Contracts](cpp2/contracts.md) +- [Types and inheritance](cpp2/types.md) +- [Metafunctions and reflection](cpp2/metafunctions.md) +- [Namespaces](cpp2/namespaces.md) +- [Safety and "unchecked"](cpp2/safety.md) + +## Cppfront reference + +- [Using Cpp1 (today's syntax) and Cpp2 in the same source file](cppfront/mixed.md) +- [Cppfront command line options](cppfront/options.md) + +## Notes and supplemental topics + +- [`@regex` status: Regular expression features](notes/regex_status.md) + diff --git a/docs/notes/regex_status.md b/docs/notes/regex_status.md new file mode 100644 index 0000000000..0ef8ef7f8e --- /dev/null +++ b/docs/notes/regex_status.md @@ -0,0 +1,233 @@ +# Supported regular expression features + +The listings are taken from the [Perl regex docs](https://perldoc.perl.org/perlre). Regular expressions are applied via the [`regex` metafunction](../cpp2/metafunctions.md#regex). + + +## Currently supported or planned features + + +### Modifiers + +| Modifier | Notes | Status | +| --- | --- | --- | +| **`i`** | Do case-insensitive pattern matching. For example, "A" will match "a" under `/i`. | Supported | +| **`m`** | Treat the string being matched against as multiple lines. That is, change `^` and `$` from matching the start of the string's first line and the end of its last line to matching the start and end of each line within the string. | Supported | +| **`s`** | Treat the string as single line. That is, change `.` to match any character whatsoever, even a newline, which normally it would not match. | Supported | +| ***`x` and `xx`** | Extend your pattern's legibility by permitting whitespace and comments. For details see: [Perl regex docs: `/x` and `/xx`](https://perldoc.perl.org/perlre#/x-and-/xx). | Supported | +| **`n`** | Prevent the grouping metacharacters `(` and `)` from capturing. This modifier will stop `$1`, `$2`, etc. from being filled in. | Supported | +| **`c`** | Keep the current position during repeated matching. | Supported | + + +### Escape sequences __(Complete)__ + +| Escape sequence | Notes | Status | +| --- | --- | --- | +| **`\t`** | Tab (HT, TAB)X | Supported | +| **`\n`** | Newline (LF, NL) | Supported | +| **`\r`** | Return (CR) | Supported | +| **`\f`** | Form feed (FF) | Supported | +| **`\a`** | Alarm (bell) (BEL) | Supported | +| **`\e`** | Escape (think troff) (ESC) | Supported | +| **`\x{}`, `\x00`** | Character whose ordinal is the given hexadecimal number | Supported | +| **`\o{}`, `\000`** | Character whose ordinal is the given octal number | Supported | + + +### Quantifiers __(Complete)__ + +| Quantifier | Notes | Status | +| --- | --- | --- | +| **`*`** | Match 0 or more times | Supported | +| **`+`** | Match 1 or more times | Supported | +| **`?`** | Match 1 or 0 times | Supported | +| **`{n}`** | Match exactly n times | Supported | +| **`{n,}`** | Match at least n times | Supported | +| **`{,n}`** | Match at most n times | Supported | +| **`{n,m}`** | Match at least n but not more than m times | Supported | +| | | | +| **`*?`** | Match 0 or more times, not greedily | Supported | +| **`+?`** | Match 1 or more times, not greedily | Supported | +| **`??`** | Match 0 or 1 time, not greedily | Supported | +| **`{n}?`** | Match exactly n times, not greedily (redundant) | Supported | +| **`{n,}?`** | Match at least n times, not greedily | Supported | +| **`{,n}?`** | Match at most n times, not greedily | Supported | +| **`{n,m}?`** | Match at least n but not more than m times, not greedily | Supported | +| | | | +| **`*+`** | Match 0 or more times and give nothing back | Supported | +| **`++`** | Match 1 or more times and give nothing back | Supported | +| **`?+`** | Match 0 or 1 time and give nothing back | Supported | +| **`{n}+`** | Match exactly n times and give nothing back (redundant) | Supported | +| **`{n,}+`** | Match at least n times and give nothing back | Supported | +| **`{,n}+`** | Match at most n times and give nothing back | Supported | +| **`{n,m}+`** | Match at least n but not more than m times and give nothing back | Supported | + + +### Character Classes and other Special Escapes __(Complete)__ + +| Feature | Notes | Status | +| --- | --- | --- | +| **`[`...`]`** | Match a character according to the rules of the bracketed character class defined by the "...". Example: `[a-z]` matches "a" or "b" or "c" ... or "z" | Supported | +| **`[[:`...`:]]`** | Match a character according to the rules of the POSIX character class "..." within the outer bracketed character class. Example: `[[:upper:]]` matches any uppercase character. | Supported | +| **`\g1`** or **`\g{-1}`** | Backreference to a specific or previous group. The number may be negative indicating a relative previous group and may optionally be wrapped in curly brackets for safer parsing. | Supported | +| **`\g{name}`** | Named backreference | Supported | +| **`\k`** | Named backreference | Supported | +| **`\k'name'`** | Named backreference | Supported | +| **`\k{name}`** | Named backreference | Supported | +| **`\w`** | Match a "word" character (alphanumeric plus "_", plus other connector punctuation chars plus Unicode marks) | Supported | +| **`\W`** | Match a non-"word" character | Supported | +| **`\s`** | Match a whitespace character | Supported | +| **`\S`** | Match a non-whitespace character | Supported | +| **`\d`** | Match a decimal digit character | Supported | +| **`\D`** | Match a non-digit character | Supported | +| **`\v`** | Vertical whitespace | Supported | +| **`\V`** | Not vertical whitespace | Supported | +| **`\h`** | Horizontal whitespace | Supported | +| **`\H`** | Not horizontal whitespace | Supported | +| **`\1`** | Backreference to a specific capture group or buffer. '1' may actually be any positive integer. | Supported | +| **`\N`** | Any character but \n. Not affected by /s modifier | Supported | +| **`\K`** | Keep the stuff left of the \K, don't include it in $& | Supported | + + +### Assertions + +| Assertion | Notes | Status | +| --- | --- | --- | +| **`\b`** | Match a \w\W or \W\w boundary | Supported | +| **`\B`** | Match except at a \w\W or \W\w boundary | Supported | +| **`\A`** | Match only at beginning of string | Supported | +| **`\Z`** | Match only at end of string, or before newline at the end | Supported | +| **`\z`** | Match only at end of string | Supported | +| **`\G`** | Match only at pos() (e.g. at the end-of-match position of prior m//g) | Supported | + + +### Capture groups __(Complete)__ + +| Feature | Status | +| --- | --- | +| **`(`...`)`** | Supported | + + +### Quoting metacharacters __(Complete)__ + +| Feature | Status | +| --- | --- | +| **For `^.[]$()*{}?+|\`** | Supported | + + +### Extended Patterns + +| Extended pattern | Notes | Status | +| --- | --- | --- | +| **`(?pattern)`** | Named capture group | Supported | +| **`(?#text)`** | Comments | Supported | +| **`(?adlupimnsx-imnsx)`** | Modification for surrounding context | Supported | +| **`(?^alupimnsx)`** | Modification for surrounding context | Supported | +| **`(?:pattern)`** | Clustering, does not generate a group index. | Supported | +| **`(?adluimnsx-imnsx:pattern)`** | Clustering, does not generate a group index and modifications for the cluster. | Supported | +| **`(?^aluimnsx:pattern)`** | Clustering, does not generate a group index and modifications for the cluster. | Supported | +| **`(?`|`pattern)`** | Branch reset | Supported | +| **`(?'NAME'pattern)`** | Named capture group | Supported | +| **`(?(condition)yes-pattern`|`no-pattern)`** | Conditional patterns. | Planned | +| **`(?(condition)yes-pattern)`** | Conditional patterns. | Planned | +| **`(?>pattern)`** | Atomic patterns. (Disable backtrack.) | Supported | +| **`(*atomic:pattern)`** | Atomic patterns. (Disable backtrack.) | Supported | + + +### Lookaround Assertions + +| Lookaround assertion | Notes | Status | +| --- | --- | --- | +| **`(?=pattern)`** | Positive look ahead. | Supported | +| **`(*pla:pattern)`** | Positive look ahead. | Supported | +| **`(*positive_lookahead:pattern)`** | Positive look ahead. | Supported | +| **`(?!pattern)`** | Negative look ahead. | Supported | +| **`(*nla:pattern)`** | Negative look ahead. | Supported | +| **`(*negative_lookahead:pattern)`** | Negative look ahead. | Supported | +| **`(?<=pattern)`** | Positive look behind. | Supported | +| **`(*plb:pattern)`** | Positive look behind. | Supported | +| **`(*positive_lookbehind:pattern)`** | Positive look behind. | Supported | +| **`(?Supported | +| **`(*nlb:pattern)`** | Negative look behind. | Supported | +| **`(*negative_lookbehind:pattern)`** | Negative look behind. | Supported | + + +### Special Backtracking Control Verbs + +| Backtracking control verb | Notes | Status | +| --- | --- | --- | +| **`(*SKIP) (*SKIP:NAME)`** | Start next search here. | Planned | +| **`(*PRUNE) (*PRUNE:NAME)`** | No backtracking over this point. | Planned | +| **`(*MARK:NAME) (*:NAME)`** | Place a named mark. | Planned | +| **`(*THEN) (*THEN:NAME)`** | Like PRUNE. | Planned | +| **`(*COMMIT) (*COMMIT:arg)`** | Stop searching. | Planned | +| **`(*FAIL) (*F) (*FAIL:arg)`** | Fail the pattern/branch. | Planned | +| **`(*ACCEPT) (*ACCEPT:arg)`** | Accept the pattern/subpattern. | Planned | + + +## Not planned (Mainly because of Unicode or perl specifics) + +### Modifiers + +| Modifier | Notes | Status | +| --- | --- | --- | +| `p` | Preserve the string matched such that ${^PREMATCH}, ${^MATCH}, and ${^POSTMATCH} are available for use after matching. | Not planned | +| `a`, `d`, `l`, and `u` | These modifiers affect which character-set rules (Unicode, etc.) are used, as described below in "Character set modifiers". | Not planned | +| `g` | globally match the pattern repeatedly in the string | Not planned | +| `e` | evaluate the right-hand side as an expression | Not planned | +| `ee` | evaluate the right side as a string then eval the result | Not planned | +| `o` | pretend to optimize your code, but actually introduce bugs | Not planned | +| `r` | perform non-destructive substitution and return the new value | Not planned | + + +### Escape sequences + +| Escape sequence | Notes | Status | +| --- | --- | --- | +| `\cK` | control char (example: VT) | Not planned | +| `\N{name}` | named Unicode character or character sequence | Not planned | +| `\N{U+263D}` | Unicode character (example: FIRST QUARTER MOON) | Not planned | +| `\l` | lowercase next char (think vi) | Not planned | +| `\u` | uppercase next char (think vi) | Not planned | +| `\L` | lowercase until \E (think vi) | Not planned | +| `\U` | uppercase until \E (think vi) | Not planned | +| `\Q` | quote (disable) pattern metacharacters until \E | Not planned | +| `\E` | end either case modification or quoted section, think vi | Not planned | + + +### Character Classes and other Special Escapes + +| Character class or escape | Notes | Status | +| --- | --- | --- | +| `(?[...])` | Extended bracketed character class | Not planned | +| `\pP` | Match P, named property. Use \p{Prop} for longer names | Not planned | +| `\PP` | Match non-P | Not planned | +| `\X` | Match Unicode "eXtended grapheme cluster" | Not planned | +| `\R` | Linebreak | Not planned | + + +### Assertions + +| Assertion | Notes | Status | +| --- | --- | --- | +| `\b{}` | Match at Unicode boundary of specified type | Not planned | +| `\B{}` | Match where corresponding \b{} doesn't match | Not planned | + +### Extended Patterns + + +| Extended pattern | Notes | Status | +| --- | --- | --- | +| `(?{ code })` | Perl code execution. | Not planned | +| `(*{ code })` | Perl code execution. | Not planned | +| `(??{ code })` | Perl code execution. | Not planned | +| `(?PARNO)` `(?-PARNO)` `(?+PARNO)` `(?R)` `(?0)` | Recursive subpattern. | Not planned | +| `(?&NAME)` | Recursive subpattern. | Not planned | + + +### Script runs + +| Script runs | Notes | Status | +| --- | --- | --- | +| `(*script_run:pattern)` | All chars in pattern need to be of the same script. | Not planned | +| `(*sr:pattern)` | All chars in pattern need to be of the same script. | Not planned | +| `(*atomic_script_run:pattern)` | Without backtracking. | Not planned | +| `(*asr:pattern)` | Without backtracking. | Not planned | diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000000..16198feddb --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,15 @@ +/* + the default font sizes look small and encourage zooming, + which loses the navigation side panels +*/ +p { font-size: 16px; } +td { font-size: 16px; } + +/* + todo: try to make the nav pane section labels larger + + for now, this at least adds space between sections + so that section starts are easier to see +*/ +.md-nav__item { font-size: 20pt; } +.md-nav__link { font-size: medium; } diff --git a/docs/welcome/hello-world.md b/docs/welcome/hello-world.md new file mode 100644 index 0000000000..5a59a60302 --- /dev/null +++ b/docs/welcome/hello-world.md @@ -0,0 +1,155 @@ +# **Hello, world!** + +``` mermaid +graph LR + A["` hello.cpp**2** `"] ==> B(["` **cppfront** `"]); + B ==> C[hello.cpp]; + C ==> D([Your favorite
C++ compiler

... and IDE / libraries / build
system / in-house tools / ...]); +``` + +## A `hello.cpp2` program + +Here is the usual one-line starter program that prints `Hello, world!`. Note that this is a complete program, no `#!cpp #include` required: + +``` cpp title="hello.cpp2 — on one line" +main: () = std::cout << "Hello, world!\n"; +``` + +But let's add a little more, just to show a few things: + +``` cpp title="hello.cpp2 — slightly more interesting" +main: () = { + words: std::vector = ( "Alice", "Bob" ); + hello( words[0] ); + hello( words[1] ); +} + +hello: (msg: std::string_view) = { + std::cout << "Hello, (msg)$!\n"; +} +``` + +This short program code already illustrates a few Cpp2 essentials. + +**Consistent context-free syntax.** Cpp2 is designed so that there is one general way to spell a given thing, that works consistently everywhere. All Cpp2 types/functions/objects/namespaces are written using the unambiguous and context-free [declaration syntax](../cpp2/declarations.md) **"_name_ `:` _kind_ `=` _statement_"**. The `:` is pronounced **"is a,"** and the `=` is pronounced **"defined as."** + +- `main` **is a** function that takes no arguments and returns nothing, and is **defined as** the code body shown. + +- `words` **is a** `std::vector`, initially **defined as** holding `#!cpp "Alice"` and `#!cpp "Bob"`. + +- `hello` **is a** function that takes a `std::string_view` it will only read from and that returns nothing, and is **defined as** code that prints the string to `cout` the usual C++ way. + +All grammar is context-free. In particular, we (the human reading the code, and the compiler) never need to do name lookup to figure out how to parse something — there is never a ["vexing parse"](https://en.wikipedia.org/wiki/Most_vexing_parse) in Cpp2. For details, see [Design note: Unambiguous parsing](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Unambiguous-parsing). + +**Simple, safe, and efficient by default.** Cpp2 has contracts (tracking draft C++26 contracts), `inspect` pattern matching, string interpolation, automatic move from last use, and more. + +- Declaring `words` uses **"CTAD"** (C++'s normal [constructor template argument deduction](https://en.cppreference.com/w/cpp/language/class_template_argument_deduction)) to deduce the type of elements in the `vector`. + +- Calling `#!cpp words[0]` and `#!cpp words[1]` is **bounds-checked by default**. From Cpp2 code, ordinary `std::vector` subscript accesses are safely bounds-checked by default without requiring any upgrade to your favorite standard library, and that's true for any similar subscript of something whose size can be queried using `std::size()` and `std::ssize()`, and for which `std::begin()` returns a random access iterator, including any in-house integer-indexed container types you already have that can easily provide `std::size()` and `std::ssize()` if they don't already. + +- `hello` uses **string interpolation** to be able to write `#!cpp "Hello, (msg)$!\n"` instead of `#!cpp "Hello, " << msg << "!\n"`. String interpolation also supports [standard C++ format specifications](https://en.cppreference.com/w/cpp/utility/format/spec), so you won't need iostream manipulators. + +**Simplicity through generality + defaults.** A major way that Cpp2 delivers simplicity is by providing just one powerful general syntax for a given thing (e.g., one function definition syntax), but designing it so you can omit the parts you're not currently using (e.g., where you're happy with the defaults). We're already using some of those defaults above: + +- We can omit writing the `#!cpp -> void` return type for a function that doesn't return anything, as both of these functions do. + +- We can omit the `{` `}` around single-statement function bodies, as `hello` does. + +- We can omit the `in` on the `msg` parameter. Cpp2 has just six ways to pass parameters: The most common ones are `in` for read-only (the default so we can omit it, as `hello` does), and `inout` for read-write. The others are `copy`, `out`, `move`, and `forward`. + +For details, see [Design note: Defaults are one way to say the same thing](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Defaults-are-one-way-to-say-the-same-thing). + +**Order-independent by default.** Did you notice that `main` called `hello`, which was defined later? Cpp2 code is order-independent by default — there are no forward declarations. + +**Seamless compatibility and interop.** We can just use `std::cout` and `#!cpp std::operator<<` and `std::string_view` directly as usual. Cpp2 code works with any C++ code or library, including the standard library, using ordinary direct calls without any wrapping/marshaling/thunking. + +**C++ standard library is always available.** We didn't need `#!cpp #include ` or `#!cpp import std;`. The full C++ standard library is always available by default if your source file contains only syntax-2 code and you compile using cppfront's `-p` (short for `-pure-cpp2`), or if you use `-im` (short for `-import-std`). Cppfront is regularly updated to be compatible with C++23 and the latest draft C++26 library additions as soon as the ISO C++ committee votes them into the C++26 working draft, so as soon as you have a C++ implementation that has a new standard (or bleeding-edge draft standard!) C++ library feature, you'll be able to fully use it in Cpp2 code. + + +## Building `hello.cpp2` + +Now use `cppfront` to compile `hello.cpp2` to a standard C++ file `hello.cpp`: + +``` bash title="Call cppfront to produce hello.cpp" +cppfront hello.cpp2 -p +``` + +The result is an ordinary C++ file that looks like this: [^clean-cpp1] + +``` cpp title="hello.cpp — created by cppfront" linenums="1" +#define CPP2_IMPORT_STD Yes + +#include "cpp2util.h" + +auto main() -> int; + +auto hello(cpp2::in msg) -> void; +auto main() -> int{ + std::vector words {"Alice", "Bob"}; + hello(CPP2_ASSERT_IN_BOUNDS_LITERAL(words, 0)); + hello(CPP2_ASSERT_IN_BOUNDS_LITERAL(std::move(words), 1)); +} + +auto hello(cpp2::in msg) -> void { + std::cout << ("Hello, " + cpp2::to_string(msg) + "!\n"); } +``` + +Here we can see more of how Cpp2 makes its features work. + +**How: Consistent context-free syntax.** + +- **All compiled lines are portable C++20 code** we can build with pretty much any C++ compiler released circa 2019 or later. Cpp2's context-free syntax converts directly to today's Cpp1 syntax. We can write and read our C++ types/functions/objects in simpler Cpp2 syntax without wrestling with context sensitivity and ambiguity, and they're all still just ordinary types/functions/objects. + +**How: Simple, safe, and efficient by default.** + +- **Line 9: CTAD** just works, because it turns into ordinary C++ code which already supports CTAD. +- **Lines 10-11: Automatic bounds checking** is added to `#!cpp words[0]` and `#!cpp words[1]` nonintrusively at the call site by default. Because it's nonintrusive, it works seamlessly with all existing container types that are `std::size` and `std::ssize`-aware, when you use them from safe Cpp2 code. +- **Line 11: Automatic move from last use** ensures the last use of `words` will automatically avoid a copy if it's being passed to something that's optimized for rvalues. +- **Line 15: String interpolation** performs the string capture of `msg`'s current value via `cpp2::to_string`. That uses `std::to_string` when available, and it also works for additional types (such as `#!cpp bool`, to print `#!cpp false` and `#!cpp true` instead of `0` and `1`, without having to remember to use `std::boolalpha`). + +**How: Simplicity through generality + defaults.** + +- **Line 7: `in` parameters** are implemented using `#!cpp cpp2::in<>`, which is smart enough to pass by `#!cpp const` value when that's safe and appropriate, otherwise by `#!cpp const&`, so you don't have to choose the right one by hand. + +**How: Order-independent by default.** + +- **Lines 5 and 7: Order independence** happens because cppfront generates all the type and function forward declarations for you, so you don't have to. That's why `main` can just call `hello`: both are forward-declared, so they can both see each other. + +**How: Seamless compatibility and interop.** + +- **Lines 9-11 and 15: Ordinary direct calls** to existing C++ code, so there's never a need for wrapping/marshaling/thunking. + +**How: C++ standard library always available.** + +- **Lines 1 and 3: `std::` is available** because cppfront was invoked with `-p`, which implies either `-im` (short for `-import-std`) or `-in` (short for `-include-std`, for compilers that don't support modules yet). The generated code tells `cpp2util.h` to `#!cpp import` the entire standard library as a module (or do the equivalent via headers if modules are not available). + + +## Building and running `hello.cpp` with any recent C++ compiler + +Finally, just build `hello.cpp` using your favorite C++20 compiler, where `CPPFRONT_INCLUDE` is the path to `/cppfront/include`: + + + +``` title="MSVC (Visual Studio 2019 version 16.11 or higher)" +> cl hello.cpp -std:c++20 -EHsc -I CPPFRONT_INCLUDE +> hello.exe +Hello, world! +``` + +``` bash title="GCC (GCC 10 or higher)" +$ g++ hello.cpp -std=c++20 -ICPPFRONT_INCLUDE -o hello +$ ./hello.exe +Hello, world! +``` + +``` bash title="Clang (Clang 12 or higher)" +$ clang++ hello.cpp -std=c++20 -ICPPFRONT_INCLUDE -o hello +$ ./hello.exe +Hello, world! +``` + + +### ➤ Next: [Adding cppfront to your existing C++ project](integration.md) + + +[^clean-cpp1]: For presentation purposes, this documentation generally shows the `.cpp` as generated when using cppfront's `-cl` (short for `-clean-cpp1`), which suppresses extra information cppfront normally emits in the `.cpp` to light up C++ tools (e.g., to let IDEs integrate cppfront error message output, debuggers step to the right lines in Cpp2 source code, and so forth). In normal use, you won't need or even want `-cl`. diff --git a/docs/welcome/integration.md b/docs/welcome/integration.md new file mode 100644 index 0000000000..16c1151555 --- /dev/null +++ b/docs/welcome/integration.md @@ -0,0 +1,45 @@ + +# Adding cppfront in your IDE / build system + +To start trying out Cpp2 syntax in any existing C++ project, just add a build step to translate the Cpp2 to Cpp1 syntax: + +- Copy the `.cpp` file to the same name with a `.cpp2` extension. +- Add the `.cpp2` file to the project, and ensure the `.cpp` is in C++20 mode. +- Tell the IDE to build that file using a custom build tool to invoke cppfront. + +That's it... The result Just Works with every C++20 or higher compiler and all existing C++ tools (debuggers, build systems, sanitizers, etc.). The IDE build should just pick up the `.cpp2` file source locations for any error messages, and the debugger should just step through the `.cpp2` file. + +The following uses Visual Studio as an example, but others have done the same in Xcode, Qt Creator, CMake, and other IDEs. + +## 1. Add the `.cpp2` file to the project, and ensure the `.cpp` is in C++20 mode + +For Visual Studio: In the Solution Explorer, right-click on Source Files and pick Add to add the file to the project. + +

+ +Also in Solution Explorer, right-click on the `.cpp` file Properties and make sure it's in C++20 (or C++latest) mode. + +

+ + +## 2. Tell the project system to build that file using a custom build tool to invoke cppfront, and add `cppfront/include` to the include path + +For Visual Studio: In Solution Explorer, right-click on the `.cpp2` file and select Properties, and add the custom build tool. Remember to also tell it that the custom build tool produces the `.cpp` file, so that it knows about the build dependency: + +

+ +Finally, put the `/cppfront/include` directory on your `INCLUDE` path. In Solution Explorer, right-click the app and select Properties, and add it to the VC++ Directories > Include Directories: + +

+ + +## That's it: Error message outputs, debuggers, visualizers, and other tools should just work + +That's enough to enable builds, and the IDE just picks up the rest from the `.cpp` file that cppfront generated: + +- **The cppfront error messages in `filename(line, col)` format.** Most C++ IDEs recognize these, and usually automatically merge any diagnostic output wherever compiler error output normally appears. If your IDE prefers `filename:line:col`, just use the cppfront `-format-colon-errors` command line option. + +- **The `#line` directives cppfront emits in the generated `.cpp` file.** Most C++ debuggers recognize these and will know to step through the `.cpp2` file. Note that `#line` emission is on by default, but if you choose `-c` (short for `-clean-cpp1`) these will be suppressed and then the debugger will step through the generated C++ code instead. If your debugger can't find the files, you may need to use `-line-paths` to have absolute paths instead of relative paths in the `#line` directives. + +- **Regardless of syntax, every type/function/object/namespace/etc. is still just an ordinary C++ type/function/object/namespace/etc.** Most C++ debugger visualizers will just work and show beautiful output for the types your program uses, including to use any in-the-box visualizers for all the `std::` types (since those are used directly as usual) and any custom visualizers you may have already written for your own types or popular library types. + diff --git a/docs/welcome/overview.md b/docs/welcome/overview.md new file mode 100644 index 0000000000..9c4abdc499 --- /dev/null +++ b/docs/welcome/overview.md @@ -0,0 +1,69 @@ + +# Overview: What are Cpp2 and cppfront? How do I get and build cppfront? + +``` cpp title="hello.cpp2" +main: () = { + std::cout << "Hello, world!\n"; +} +``` + +## What is Cpp2? + +"Cpp2," short for "C++ syntax 2," is my ([Herb Sutter's](https://github.com/hsutter)) personal project to try to make writing ordinary C++ types/functions/objects be much **simpler and safer**, without breaking backward compatibility. + +**What it isn't.** Cpp2 is not a successor or alternate language with its own divergent or incompatible ecosystem. For example, it does not have its own nonstandard incompatible modules/concepts/etc. that compete with the Standard C++ features; it does not replace your Standard C++ compiler or other tools; and it does not require any changes to your Standard C++ compiler or standard library or other libraries or tools to keep fully using all of them. + +**What it is.** Cpp2 aims to be another "skin" for C++ itself, just a simpler and safer way to write ordinary C++ types/functions/objects, and a faster way to experiment with proposals for future new Standard C++ features in a simpler compiler and syntax flavor. It seamlessly uses Standard C++ modules and concepts requirements and other features, and it works with all existing C++20 or higher compilers and libraries and tools right out of the box with no changes required to use them all seamlessly and directly with zero overhead. + + Bjarne Stroustrup said it best: + + > "Inside C++, there is a much smaller and cleaner language struggling to get out."
  — Bjarne Stroustrup, _The Design and Evolution of C++_ (D&E), 1994 +> +> "Say 10% of the size of C++ in definition and similar in front-end compiler size. ... most of the simplification would come from generalization."
  — Bjarne Stroustrup, _ACM History of Programming Languages III_, 2007 + +**My goal is to try to prove that Stroustrup is right:** that it's possible and desirable to have true C++ with all its expressive power and control and with full backward compatibility, but in a flavor that's 10x simpler with fewer quirks and special cases to remember, [^simpler] and 50x safer where it's far easier to not write security bugs by accident. + +We can't make an improvement that large to C++ via gradual evolution to today's syntax, because some important changes would require changing the meaning of code written in today's syntax. For example, we can never change a language feature default in today's syntax, not even if the default creates a security vulnerability pitfall, because changing a default would break vast swathes of existing code. Having a distinct alternative syntax gives us a "bubble of new code" that doesn't exist today, and have: + +- **Freedom to make any desired improvement, without breaking any of today's code.** Cpp2 is designed to take all the consensus C++ best-practices guidance we already teach, and make them the default when using "syntax 2." Examples: Writing type-unsafe casts is just not possible in Cpp2 syntax; and Cpp2 can change language defaults to make them simpler and safer. You can always "break the glass" when needed to violate the guidance, but you have to opt out explicitly to write type-unsafe code (usually using the word `unchecked`), so if the program has a bug you can grep for those places to look at first. For details, see [Design note: unsafe code](https://github.com/hsutter/cppfront/wiki/Design-note%3A-Unsafe-code). + +- **Perfect link compatibility always on, perfect source compatibility always available (but you pay for it only if you use it).** Any type/function/object/namespace written in either syntax is always still just a normal C++ type/function/object/namespace, so any code or library written in either Cpp2 or today's C++ syntax ("Cpp1" for short) can seamlessly call each other, with no wrapping/marshaling/thunking. You can write a "mixed" source file that has both Cpp2 and Cpp1 code and get perfect backward C++ source compatibility (even SFINAE and macros), or you can write a "pure" all-Cpp2 source file and write code in a 10x simpler syntax. + + +## What is cppfront? + +[**Cppfront**](https://github.com/hsutter/cppfront) is a compiler that compiles Cpp2 syntax to today's Cpp1 syntax. This lets you start trying out Cpp2 syntax in any existing C++ project and build system just by renaming a source file from `.cpp` to `.cpp2` and [adding a build step](integration.md), and the result Just Works with every C++20 or higher compiler and all existing C++ tools (debuggers, build systems, sanitizers, etc.). + +This deliberately follows Bjarne Stroustrup's wise approach with [**cfront**](https://en.wikipedia.org/wiki/Cfront), the original C++ compiler: In the 1980s and 1990s, Stroustrup created cfront to translate C++ to pure C, and similarly ensured that C++ could be interleaved with C in the same source file, and that C++ could always call any C code with no wrapping/marshaling/thunking. By providing a C++ compiler that emitted pure C, Stroustrup ensured full compatibility with the C ecosystems that already existed, and made it easy for people to start trying out C++ code in any existing C project by adding just another build step to translate the C++ to C first, and the result Just Worked with existing C tools. + + +## How do I get and build cppfront? + +The full source code for cppfront is at the [**Cppfront GitHub repo**](https://github.com/hsutter/cppfront). + +Cppfront builds with any recent C++ compiler. Go to the `/cppfront/source` directory, and run one of the following: + + + +``` bash title="MSVC build instructions (Visual Studio 2019 version 16.11 or higher)" +cl cppfront.cpp -std:c++20 -EHsc +``` + +``` bash title="GCC build instructions (GCC 10 or higher)" +g++ cppfront.cpp -std=c++20 -o cppfront +``` + +``` bash title="Clang build instructions (Clang 12 or higher)" +clang++ cppfront.cpp -std=c++20 -o cppfront +``` + +That's it! + +> Note: On Windows (including MinGW), if you get a "too many sections" message, specify the switch `/bigobj` (MSVC) or `/Wa,-mbig-obj` (GCC). + + +### ➤ Next: [Hello, world!](hello-world.md) + + +[^simpler]: I'd ideally love to obsolete ~90% of my own books. I know that Cpp2 can eliminate that much of the C++ guidance I've personally had to write and teach over the past quarter century, by removing inconsistencies and pitfalls and gotchas, so that they're either impossible to write or are compile-time errors (either way, we don't have to teach them). I love writing C++ code... I just want it to be easier and safer by default. + diff --git a/experimental/extrinsic_storage.h b/experimental/extrinsic_storage.h new file mode 100644 index 0000000000..aeea072bfd --- /dev/null +++ b/experimental/extrinsic_storage.h @@ -0,0 +1,379 @@ + +// Copyright 2022-2024 Herb Sutter +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the Cppfront Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://github.com/hsutter/cppfront/blob/main/LICENSE for license information. + +#ifndef CPP2_EXPERIMENTAL_EXTRINSIC_STORAGE_H +#define CPP2_EXPERIMENTAL_EXTRINSIC_STORAGE_H + +// ***************************************************************** +// Enable/disable debug instrumentation and statistics printing here +constexpr inline auto debug_instrumentation = true; + +// Try with/without m_o_relaxed +#define M_O_RELAXED , std::memory_order_relaxed +#define M_O_RELAXED_NOCOMMA std::memory_order_relaxed +//#define M_O_RELAXED +//#define M_O_RELAXED_NOCOMMA +// ***************************************************************** + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +//----------------------------------------------------------------------------------- +// Some helpers +// +auto print(std::integral auto val) -> std::string { + auto ret = std::to_string(val % 10); + auto pos = 0; + while ((val /= 10) > 0) { + if ((++pos % 3) == 0) { ret = ',' + ret; } + ret = std::to_string(val % 10) + ret; + } + return ret; +} + +constexpr inline auto is_prime(auto value) noexcept -> bool { + constexpr int primes[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919 }; + return std::find( std::begin(primes), std::end(primes), value ) != std::end(primes); +} + + +//----------------------------------------------------------------------------------- +// Stub in just enough of atomic for this use +// +template +class atomic_unique_ptr { + std::atomic p = {}; +public: + atomic_unique_ptr () noexcept = default; + atomic_unique_ptr (atomic_unique_ptr const&) noexcept = delete; // this use doesn't need movability + ~atomic_unique_ptr() noexcept { delete p.load(); } + + explicit atomic_unique_ptr(std::unique_ptr&& up) noexcept : p{up.release()} { } // but we do need convertability + auto release() noexcept -> std::unique_ptr { auto ret = std::unique_ptr(p.load()); p.store(nullptr); return ret; } + // and for an emptying mutating move function I prefer a named function + // to a conversion operator (even one that's 'explicit' and &&-qualified) + + auto load(std::memory_order m_o = std::memory_order_seq_cst) const noexcept -> T* { return p.load(m_o); } + + // We only need c_e_weak(null, desired), so can provide this simpler API + auto compare_exchange_weak_null(std::unique_ptr& desired) noexcept -> bool { + T* null = nullptr; + if (p.compare_exchange_weak(null, desired.get())) { + desired.release(); + return true; + } + return false; + } +}; + + +//----------------------------------------------------------------------------------- +// extrinsic_storage - constant-time lock-free data structure to nonintrusively +// store external additional data for program objects +// +// Template parameters +// Data additional data to store for each object +// Buckets #buckets in hash table +// +// Concurrency +// Lock-free for insertion if additional storage is needed +// Wait-free for all other operations, including insertion that doesn't grow storage +// Note: Every use of explicit std::memory_order_* (aka m_o_*) MUST include rationale +// +// Complexity +// Constant-time could be achieved by periodically growing the hash table (not implemented) +// + +template // fixed is ok for prototyping; can be made dynamic to maintain O(K) + requires (is_prime(Buckets)) +class extrinsic_storage { +public: + using Key = std::atomic; + using Value = Data; + + ~extrinsic_storage() { + debug_report(); + + // Explicitly iterate the cleanup work to minimize stack use + // Without this cleanup traversal, automatic safe-by-construction + // cleanup would still occur but would be recursive and could + // require stack space proportional to the largest bucket size + for (auto& bucket : buckets) { + auto pnext = bucket.next.release(); + while (pnext) { + auto pdelete = std::move(pnext); + pnext = pdelete->next.release(); + } + } + } + + //-------------------------------------------------------------------------- + // find_or_insert( pobj ) - returns the data entry for pobj + // + // If pobj does not yet have an entry, creates it + // Returns null only if not present and allocation is needed but fails + // + auto find_or_insert(void* pobj) noexcept -> Value* { + if constexpr (debug_instrumentation) { + // m_o_relaxed is enough, inc order doesn't matter for totals + instrument_access_count.fetch_add(1 M_O_RELAXED); + } + return lookup(pobj, lookup_mode::find_or_insert); + } + + //-------------------------------------------------------------------------- + // find( pobj ) - returns the data entry for pobj or null if not present + // + auto find(void* pobj) noexcept -> Value* { + if constexpr (debug_instrumentation) { + // m_o_relaxed is enough, inc order doesn't matter for totals + instrument_access_count.fetch_add(1 M_O_RELAXED); + } + return lookup(pobj, lookup_mode::find); + } + + //-------------------------------------------------------------------------- + // erase( pobj ) - removes the entry for pobj + // + auto erase(void* pobj) noexcept -> void { + if constexpr (debug_instrumentation) { + // m_o_relaxed is enough, inc order doesn't matter for totals + instrument_erase_count.fetch_add(1 M_O_RELAXED); + } + lookup(pobj, lookup_mode::erase); + } + +private: + static inline constexpr std::size_t ChunkSize = 32; + struct chunk { + std::array keys = { }; // SOA for better key locality + std::array values = { }; + atomic_unique_ptr next = { }; + }; + std::array buckets = {}; // prime for some minimal QoI + + //-------------------------------------------------------------------------- + // lookup( pobj ) - shared helper for operator[]/erase + // + // Parameters + // pobj the key to look up + // mode if erase, reset key to null and return nullptr + // if find, return a pointer to the value if it exists, or null + // if find_or_insert, return a pointer to the value (inserted if + // not present) or null if allocation was needed and failed + // + // (*) This function requires that the calling code has exclusive access to + // *pobj, and if *pobj is shared has done any necessary synchronization + // to access *pobj (else the calling code already has a data race on + // *pobj). This function uses the knowledge that no other thread can be + // concurrently calling us with this pobj value to perform m_o_relaxed + // loads of .keys and .next, noted with (*) below. + // + // Using m_o_relaxed on loads will likely not be significantly faster + // on modern Intel and ARM architectures, where SC loads are already + // the same or similar speed as relaxed loads. But they should be a + // good performance gain on PPC, older ARM, and any other hardware + // that has inefficient SC loads, so I'll use m_o_relaxed with care + // as I think they can be correct here and they are in the hot path + // of the data structure traversal. + // + enum class lookup_mode { find, find_or_insert, erase }; + auto lookup( + void* pobj, + lookup_mode mode + ) noexcept + -> Value* + { + //auto hash = std::hash{}(pobj) % Buckets; // A + auto hash = (((std::size_t)pobj)>>2) % Buckets; // B + // across the three major C++ implementations I tried, hash B has + // smoother utilization (2% to 5% difference between the most vs + // least popular bucket) than hash A (3.5% to 16% difference) + + assert( 0 <= hash && hash < Buckets ); + if constexpr (debug_instrumentation) { + // m_o_relaxed is enough, inc order doesn't matter for totals + instrument_bucket_access[hash].fetch_add(1 M_O_RELAXED); + } + + // 1. If we find key==pobj, we're done + auto pchunk = &buckets[hash]; + while (pchunk) { + for ( auto i = std::size_t{0}; i < ChunkSize; ++i ) { + // (*) m_o_relaxed is enough, equality means we own the slot + // and so this thread already has exclusive access to *pobj + // and its .values data + if (pchunk->keys[i].load(M_O_RELAXED_NOCOMMA) == pobj) { + if (mode == lookup_mode::erase) { + pchunk->keys[i].store(nullptr M_O_RELAXED); + return nullptr; + } + // Else + return &pchunk->values[i]; + } + } + // (*) m_o_relaxed is enough here, because .next is immutable after + // it is first set to non-null, and if a new chunk(s) was just + // concurrently added by a different thread then that new + // chunk(s) cannot contain an entry for pobj + pchunk = pchunk->next.load(M_O_RELAXED_NOCOMMA); + } + + // 2. Otherwise, if we're not allowed to insert we're done + // but we didn't actually find something so return null + if (mode != lookup_mode::find_or_insert) { + if constexpr (debug_instrumentation) { + if (mode == lookup_mode::erase) { + // m_o_relaxed is enough, inc order doesn't matter for totals + instrument_erase_fail_count.fetch_add(1 M_O_RELAXED); + } + } + return nullptr; + } + + // 3. Otherwise, we need to insert it + // a) Prefer claiming an existing null slot if one exists + pchunk = &buckets[hash]; + while (true) { + auto i = std::size_t{0}; + for ( ; i < ChunkSize; ++i ) { + void* null = nullptr; + if ( + // m_o_relaxed is enough for this first load... + pchunk->keys[i].load(M_O_RELAXED_NOCOMMA) == nullptr + // ... because it's just a best-effort optimization to + // avoid this maybe-unneeded c_e_weak (which is safely SC) + && pchunk->keys[i].compare_exchange_weak( null, pobj ) + ) { + if constexpr (debug_instrumentation) { + // m_o_relaxed is enough, inc order doesn't matter for totals + instrument_insert_count.fetch_add(1 M_O_RELAXED); + } + return &pchunk->values[i]; + } + } + // (*) m_o_relaxed is enough here, because if a new chunk(s) + // was just concurrently added by a different thread then we'll + // just add an extra chunk which is fine + if ( pchunk->next.load(M_O_RELAXED_NOCOMMA) == nullptr ) { + break; + } + pchunk = pchunk->next.load(); + } + + // b) Otherwise, we need to allocate a new chunk for it + // At this point, pchunk points to the last chunk in this bucket + assert (pchunk); + + // Not using make_unique: In principle, if allocation fails we don't + // want to change well-formed program behavior. (In practice, if this + // small allocation ever fails the program is already in deep trouble; + // unless Key or Data are large, a chunk is usually well under 1KB) + auto pnew = std::unique_ptr( new (std::nothrow) chunk{} ); + if (pnew == nullptr) { return nullptr; } + + pnew->keys[0] = pobj; + auto ret = &pnew->values[0]; + while (!pchunk->next.compare_exchange_weak_null(pnew)) { + pchunk = pchunk->next.load(); + assert (pchunk); + } + + if constexpr (debug_instrumentation) { + // m_o_relaxed is enough, inc order doesn't matter for totals + instrument_alloc_count.fetch_add(1 M_O_RELAXED); + } + return ret; + } + + // Debug instrumentation + // + static inline std::atomic instrument_access_count = {}; + static inline std::atomic instrument_insert_count = {}; + static inline std::atomic instrument_alloc_count = {}; + static inline std::atomic instrument_erase_count = {}; + static inline std::atomic instrument_erase_fail_count = {}; + static inline std::array, Buckets> instrument_bucket_access = {}; + + auto debug_report() -> void { + if constexpr (debug_instrumentation) { + std::cout << "Report for extrinsic_storage<" << typeid(Data).name() << ">\n\n" + << " all accesses (incl. inserts) " << print(instrument_access_count .load()) << "\n" + << " inserts " << print(instrument_insert_count .load() + +instrument_alloc_count .load()) << "\n" + << " used existing storage " << print(instrument_insert_count .load()) << "\n" + << " allocated new storage " << print(instrument_alloc_count .load()) << "\n" + << " erases " << print(instrument_erase_count .load()) << "\n" + << " succeeded " << print(instrument_erase_count .load() + -instrument_erase_fail_count .load()) << "\n" + << " failed " << print(instrument_erase_fail_count .load()) << "\n\n"; + + auto empty = std::size_t{0}; + auto tot = 0; + auto min = instrument_bucket_access[0].load(); + auto max = 0; + for (auto const& count : instrument_bucket_access) { + if (count == 0) { ++empty; } + tot += count; + if (count < min) { min = count; } + if (count > max) { max = count; } + } + + auto sizes = std::map{}; + for (auto const& ch : buckets) { + auto size = 0; + for (auto next = ch.next.load(); next; ++size, next = next->next.load()) { ; } + ++sizes[size]; + } + std::cout << " bucket utilization\n" + << " # buckets\n" + << " total " << print(Buckets) << "\n" + << " empty " << print(empty) << "\n" + << " # accesses\n" + << " total " << print(tot) << "\n" + << " to least popular bucket " << print(min) << "\n" + << " to most popular bucket " << print(max) << "\n" + << " # extra allocations\n"; + for (auto [size, count] : sizes | std::views::reverse) { + std::cout << " " << size << " - " << count << " buckets\n"; + } + std::cout << "\n"; + + auto static_space = sizeof(extrinsic_storage); + auto dynamic_space = instrument_alloc_count.load() * sizeof(chunk); + std::cout << " total extrinsic storage used = keys + data + housekeeping + padding (in bytes)\n" + << " static " << print(static_space) << "\n" + << " dynamic " << print(dynamic_space) << "\n" + << " total " << print(static_space + dynamic_space) << "\n" + << " for comparison, intrinsic would have used approx. " + << print((instrument_alloc_count.load()+Buckets) * sizeof(chunk::values)) << "\n\n"; + + std::cout << " sizes (in bytes)\n" + << " sizeof(chunk) " << print(sizeof(chunk)) << "\n" + << " sizeof(chunk.keys) " << print(sizeof(chunk::keys)) << "\n" + << " sizeof(chunk.values) " << print(sizeof(chunk::values)) << "\n" + << " sizeof(chunk.next) " << print(sizeof(chunk::next)) << "\n" + << " wasted chunk.* padding " << print(sizeof(chunk) - sizeof(chunk::keys) - sizeof(chunk::values) - sizeof(chunk::next)) << "\n" + << " sizeof(buckets) " << print(sizeof(buckets)) << "\n" + << " #buckets * sizeof(chunk) " << print(Buckets * sizeof(chunk)) << "\n" + << "\n"; + } + } +}; + +#endif diff --git a/experimental/extrinsic_storage_std_locked.h b/experimental/extrinsic_storage_std_locked.h new file mode 100644 index 0000000000..bfbea55336 --- /dev/null +++ b/experimental/extrinsic_storage_std_locked.h @@ -0,0 +1,76 @@ + +// Copyright 2022-2024 Herb Sutter +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the Cppfront Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://github.com/hsutter/cppfront/blob/main/LICENSE for license information. + +#ifndef CPP2_EXPERIMENTAL_EXTRINSIC_STORAGE_STD_LOCKED_H +#define CPP2_EXPERIMENTAL_EXTRINSIC_STORAGE_STD_LOCKED_H + +#include +#include +#include +#include + + +//----------------------------------------------------------------------------------- +// Some helpers +// +auto print(std::integral auto val) -> std::string { + auto ret = std::to_string(val % 10); + auto pos = 0; + while ((val /= 10) > 0) { + if ((++pos % 3) == 0) { ret = ',' + ret; } + ret = std::to_string(val % 10) + ret; + } + return ret; +} + + +//----------------------------------------------------------------------------------- +// A "brute-force" locked implementation to measure against +// +// NOTE: For performance comparison only, not recommended +// +template +class extrinsic_storage { + std::mutex mut; + //std::map data; + std::unordered_map data; +public: + //-------------------------------------------------------------------------- + // find_or_insert( pobj ) - returns the data entry for pobj + // + // If pobj does not yet have an entry, creates it + // + auto find_or_insert(void* pobj) -> Data* { + auto _ = std::lock_guard{mut}; + return &data[pobj]; + } + + //-------------------------------------------------------------------------- + // find( pobj ) - returns the data entry for pobj or null if not present + // + auto find(void* pobj) noexcept -> Data* { + auto _ = std::lock_guard{mut}; + if (auto iter = data.find(pobj); + iter != data.end() + ) + { + return &iter->second; + } + // Else + return nullptr; + } + + //-------------------------------------------------------------------------- + // erase( pobj ) - removes the entry for pobj + // + auto erase(void* pobj) noexcept -> void { + auto _ = std::lock_guard{mut}; + data.erase(pobj); + } +}; + +#endif diff --git a/experimental/union_test.cpp b/experimental/union_test.cpp new file mode 100644 index 0000000000..9a06626d5a --- /dev/null +++ b/experimental/union_test.cpp @@ -0,0 +1,213 @@ + +// Copyright 2022-2024 Herb Sutter +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the Cppfront Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://github.com/hsutter/cppfront/blob/main/LICENSE for license information. + +//#include "extrinsic_storage_std_locked.h" +#include "extrinsic_storage.h" + +#include +#include +#include +#include +#include +#include +#include +#include + + +//------------------------------------------------------------------------------- +// Union instrumentation API for compiler integration +// +// Template parameters +// Tag discriminator tag to store for each object (uintNN_t where +// NN is large enough to hold the #alternatives in the union) +// +// For an object U of union type that +// has a unique address, when Inject a call to this (zero-based alternative #s) +// +// U is created initialized on_set_alternative(&U,0) = the first alternative# is active +// +// U is created uninitialized on_set_alternative(&U,invalid) +// +// U.A = xxx (alt A is assigned to) on_set_alternative(&U,#A) +// +// U or U.A is passed to a function by on_set_alternative(&U,unknown) +// pointer/reference to non-const +// and we don't know the function +// is compiled in this mode +// +// U.A (alt A is otherwise used) on_get_alternative(&U,#A) +// and A is not a common initial +// sequence +// +// U is destroyed / goes out of scope on_destroy(&U) +// +// That's it. Here's an example: +// { +// union Test { int a; double b; }; +// Test t = {42}; union_registry<>::on_set_alternative(&u,0); +// std::cout << t.a; union_registry<>::on_get_alternative(&u,0); +// t.b = 3.14159; union_registry<>::on_set_alternative(&u,1); +// std::cout << t.b; union_registry<>::on_get_alternative(&u,1); +// } union_registry<>::on_destroy(&u); +// +// For all unions with up to 254 alternatives, use union_registry<> +// For all unions with between 255 and 16k-2 alternatives, use union_registry +// If you find a union with >16k-2 alternatives, email me the story and use union_registry +// +template +class union_registry { + static inline auto tags = extrinsic_storage{}; + static inline auto log = std::ofstream{ "union-violations.log" }; +public: + static inline auto invalid = std::numeric_limits::max(); + static inline auto unknown = std::numeric_limits::max()-1; + + static inline auto on_destroy(void* pobj) noexcept -> void { tags.erase(pobj); } + + static inline auto on_set_alternative(void* pobj, uint32_t alt) noexcept -> void { + if (auto p = tags.find_or_insert(pobj)) { *p = alt; } + } + + static inline auto on_get_alternative(void* pobj, uint32_t alt, std::source_location where = std::source_location::current()) -> void { + if (auto active = tags.find(pobj); + active // if we have discriminator info for this union + && *active != alt // and the discriminator not what is expected + && *active != unknown // and is not unknown + ) + { + log << where.file_name() << '(' << where.line() + << "): union type safety violation - active member " << (*active == invalid ? "invalid" : std::to_string(*active)) + << ", attempted to access " << alt << "\n"; + } + } +}; + + +//------------------------------------------------------------------------------- +// Sample union +// +union Union { + char alt0; + int alt1; + long double alt2; +}; + + +//------------------------------------------------------------------------------- +// Multithreaded test harness +// +template +auto test(int threads = 1) -> void +{ + auto fault_inject_counter = std::atomic{9900}; + auto size = 10'000/threads; + + // 1M unions, 10K at a time + auto run = [&] { + for (auto iteration = 0; iteration < 100; ++iteration) + { + auto us = std::vector{}; + us.reserve(size); + for (int i = 0; i < size; ++i) { + us.emplace_back('x'); + if constexpr (SafetyChecks) { union_registry<>::on_set_alternative(&us[i],0); } // 1st access for this union + } + + for (auto& u : us) + { + if (--fault_inject_counter != 0) { // occasionally forget to set .alt1 + u.alt1 = 123; + if constexpr (SafetyChecks) { union_registry<>::on_set_alternative(&u,1); } // 2nd + } + + if constexpr (SafetyChecks) { union_registry<>::on_get_alternative(&u,1); } // 3rd + u.alt1 += 456; + + u.alt2 = 12.345678; + if constexpr (SafetyChecks) { union_registry<>::on_set_alternative(&u,2); } // 4th + + if constexpr (SafetyChecks) { union_registry<>::on_get_alternative(&u,2); } // 5th + u.alt2 += 3.14169265; + + u.alt0 = 'y'; + if constexpr (SafetyChecks) { union_registry<>::on_set_alternative(&u,0); } // 6th + + if constexpr (SafetyChecks) { union_registry<>::on_get_alternative(&u,0); } // 7th + auto _ = u.alt0; + + u.alt2 = 3.1415926535; + if constexpr (SafetyChecks) { union_registry<>::on_set_alternative(&u,2); } // 8th + + if constexpr (SafetyChecks) { union_registry<>::on_get_alternative(&u,2); } // 9th + u.alt2 += 3.14169265; + } + + for (int i = 0; i < size; ++i) { + if constexpr (SafetyChecks) { union_registry<>::on_destroy(&us[i]); } // 10th and last + } + } + }; + + std::vector thds; + for (auto i = 0; i < threads; ++i) { + thds.emplace_back( run ); + } +} + + +class timer { + std::chrono::time_point start; +public: + timer() : start{ std::chrono::high_resolution_clock::now() } { } + auto microseconds() const { return std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count(); } +}; + + +int main() +{ + auto tot_raw = int64_t{0}; + auto tot_chk = int64_t{0}; + + auto stats = std::map{}; + + // Repeat test sequence a few times + for (auto reps = 0; reps < 5; ++reps) + { + // Run "raw" vs "checked" test for 1, 2, 4, 8, 16, 32, 64, and 128 threads + for (auto i = 1; i <= 128; i *= 2) + { + ////std::cout << "# threads: " << i << "\n"; + + // First without checks + auto t = timer{}; + test(i); + auto raw_time = t.microseconds(); + ////std::cout << " raw: " << print(raw_time) << "\n"; + tot_raw += raw_time; + + // Then with checks, via specifying + t = timer{}; + test(i); + auto chk_time = t.microseconds(); + ////std::cout << " checked: " << print(chk_time) << "\n"; + stats[i] += chk_time-raw_time; + tot_chk += chk_time; + } + } + + // Print each #threads timings in an Excel-friendly format + for (auto [threads, timings] : stats) { + if (threads != 1) { std::cout << " "; } + std::cout << timings; + } + std::cout << "\n"; + + std::cout << "totals\n" + << " raw: " << print(tot_raw) << "\n" + << " checked: " << print(tot_chk) << "\n"; + +} diff --git a/gen_build.bat b/gen_build.bat new file mode 100644 index 0000000000..cb75c7697b --- /dev/null +++ b/gen_build.bat @@ -0,0 +1,4 @@ +@echo off +cppfront -_gen_build > source\build.info +@echo Build stamp updated to: +type source\build.info diff --git a/include/cpp2regex.h b/include/cpp2regex.h new file mode 100644 index 0000000000..e2a69946ca --- /dev/null +++ b/include/cpp2regex.h @@ -0,0 +1,1497 @@ + +#ifndef CPP2REGEX_H_CPP2 +#define CPP2REGEX_H_CPP2 + + +//=== Cpp2 type declarations ==================================================== + + +#include "cpp2util.h" + +#line 1 "cpp2regex.h2" + +#line 20 "cpp2regex.h2" +namespace cpp2 { + +namespace regex { + +#line 36 "cpp2regex.h2" +template class match_group; + +#line 46 "cpp2regex.h2" +template class match_return; + +#line 54 "cpp2regex.h2" +template class match_context; + +#line 125 "cpp2regex.h2" +template class reverse_match_context; + +#line 194 "cpp2regex.h2" +class true_end_func; + +#line 202 "cpp2regex.h2" +class no_reset; + +#line 209 "cpp2regex.h2" +template class on_return; + +#line 236 "cpp2regex.h2" +template class single_class_entry; + +#line 245 "cpp2regex.h2" +template class range_class_entry; + +#line 254 "cpp2regex.h2" +template class combined_class_entry; + +#line 263 "cpp2regex.h2" +template class list_class_entry; + +#line 272 "cpp2regex.h2" +template class named_class_entry; + +#line 279 "cpp2regex.h2" +template class negated_class_entry; + +#line 288 "cpp2regex.h2" +template class shorthand_class_entry; + +#line 336 "cpp2regex.h2" +template class alternative_token_matcher; + +#line 427 "cpp2regex.h2" +template class class_token_matcher; + +#line 604 "cpp2regex.h2" +class range_flags; + + +#line 613 "cpp2regex.h2" +template class range_token_matcher; + +#line 785 "cpp2regex.h2" +template class regular_expression; + +#line 921 "cpp2regex.h2" +} +} + + +//=== Cpp2 type definitions and function declarations =========================== + +#line 1 "cpp2regex.h2" + +// Copyright 2022-2025 Herb Sutter +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the Cppfront Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://github.com/hsutter/cppfront/blob/main/LICENSE for license information. + + +//=========================================================================== +// Regex support +//=========================================================================== + +#ifndef CPP2_CPP2REGEX_H +#define CPP2_CPP2REGEX_H + + +template +using matcher_context_type = typename matcher::template context; + +#line 20 "cpp2regex.h2" +namespace cpp2 { + +namespace regex { + +template using bstring = std::basic_string; +template using bview = std::basic_string_view; + +//----------------------------------------------------------------------- +// +// Helper structures for the expression matching. +// +//----------------------------------------------------------------------- +// + +// Structure for storing group information. +// +template class match_group + { + public: Iter start {}; + public: Iter end {}; + + public: bool matched {false}; + public: match_group(auto const& start_, auto const& end_, auto const& matched_); +public: match_group(); + +#line 42 "cpp2regex.h2" +}; + +// Return value for every matcher. +// +template class match_return + { + public: bool matched {false}; + public: Iter pos {}; + public: match_return(auto const& matched_, auto const& pos_); +public: match_return(); + +#line 50 "cpp2regex.h2" +}; + +// Modifiable state during matching. +// +template class match_context + { + public: Iter begin; + public: Iter end; + + private: std::array,max_groups> groups {}; + + public: match_context(Iter const& begin_, Iter const& end_); + +#line 66 "cpp2regex.h2" + public: match_context(match_context const& that); +#line 66 "cpp2regex.h2" + public: auto operator=(match_context const& that) -> match_context& ; +#line 66 "cpp2regex.h2" + public: match_context(match_context&& that) noexcept; +#line 66 "cpp2regex.h2" + public: auto operator=(match_context&& that) noexcept -> match_context& ; + + // String end and start positions + // + public: [[nodiscard]] auto get_string_start() const& -> decltype(auto); + public: [[nodiscard]] auto get_string_end() const& -> decltype(auto); + + // Getter and setter for groups + // + public: [[nodiscard]] auto get_group(auto const& group) const& -> decltype(auto); + + public: [[nodiscard]] auto get_group_end(auto const& group) const& -> int; + +#line 83 "cpp2regex.h2" + public: [[nodiscard]] auto get_group_start(auto const& group) const& -> int; + +#line 89 "cpp2regex.h2" + public: [[nodiscard]] auto get_group_string(auto const& group) const& -> std::string; + +#line 96 "cpp2regex.h2" + public: auto set_group_end(auto const& group, auto const& pos) & -> void; + +#line 101 "cpp2regex.h2" + public: auto set_group_invalid(auto const& group) & -> void; + +#line 105 "cpp2regex.h2" + public: auto set_group_start(auto const& group, auto const& pos) & -> void; + +#line 109 "cpp2regex.h2" + public: [[nodiscard]] auto size() const& -> decltype(auto); + + // Misc functions + // + public: [[nodiscard]] auto fail() const& -> decltype(auto); + public: [[nodiscard]] auto pass(cpp2::impl::in cur) const& -> decltype(auto); + + public: auto reset() & -> void; + +#line 121 "cpp2regex.h2" +}; + +// Wrapper of context for reverse matches. Implements only the minimal interface for matching. +// +template class reverse_match_context + { + public: using ReverseIter = std::reverse_iterator; + public: match_context* forward_context; + + public: ReverseIter begin; + public: ReverseIter end; + + public: reverse_match_context(auto const& forward_context_); +#line 133 "cpp2regex.h2" + public: auto operator=(auto const& forward_context_) -> reverse_match_context& ; + +#line 139 "cpp2regex.h2" + public: reverse_match_context(reverse_match_context const& that); +#line 139 "cpp2regex.h2" + public: auto operator=(reverse_match_context const& that) -> reverse_match_context& ; +#line 139 "cpp2regex.h2" + public: reverse_match_context(reverse_match_context&& that) noexcept; +#line 139 "cpp2regex.h2" + public: auto operator=(reverse_match_context&& that) noexcept -> reverse_match_context& ; + + // String end and start positions + // + public: [[nodiscard]] auto get_string_start() const& -> decltype(auto); + public: [[nodiscard]] auto get_string_end() const& -> decltype(auto); + + // Getter and setter for groups + // + public: auto set_group_end(auto const& group, auto const& pos) & -> void; + +#line 152 "cpp2regex.h2" + public: auto set_group_invalid(auto const& group) & -> void; + +#line 156 "cpp2regex.h2" + public: auto set_group_start(auto const& group, auto const& pos) & -> void; + +#line 160 "cpp2regex.h2" + // Misc functions + // + public: [[nodiscard]] auto fail() const& -> decltype(auto); + public: [[nodiscard]] auto pass(cpp2::impl::in cur) const& -> decltype(auto); +}; + +// Helpers for creating wrappers of the match context. +// +template [[nodiscard]] auto make_forward_match_context(match_context& ctx) -> decltype(auto); + +#line 172 "cpp2regex.h2" +template [[nodiscard]] auto make_forward_match_context(reverse_match_context& ctx) -> decltype(auto); + +#line 176 "cpp2regex.h2" +template [[nodiscard]] auto make_reverse_match_context(match_context& ctx) -> auto; + +#line 180 "cpp2regex.h2" +template [[nodiscard]] auto make_reverse_match_context(reverse_match_context& ctx) -> decltype(auto); + +#line 184 "cpp2regex.h2" +// Helpers for creating wrappers of the iterators. +// +template [[nodiscard]] auto make_forward_iterator(Iter const& pos) -> auto; +template [[nodiscard]] auto make_forward_iterator(std::reverse_iterator const& pos) -> auto; +template [[nodiscard]] auto make_reverse_iterator(Iter const& pos) -> auto; +template [[nodiscard]] auto make_reverse_iterator(std::reverse_iterator const& pos) -> auto; + +#line 192 "cpp2regex.h2" +// End function that returns a valid match. +// +class true_end_func + { + public: [[nodiscard]] auto operator()(auto const& cur, auto& ctx) const& -> decltype(auto); +}; + +#line 200 "cpp2regex.h2" +// Empty group reset function. +// +class no_reset + { + public: auto operator()([[maybe_unused]] auto& unnamed_param_2) const& -> void; +}; + +#line 208 "cpp2regex.h2" +// Evaluate func on destruction of the handle. +template class on_return + { + private: Func func; + + public: on_return(Func const& f); +#line 213 "cpp2regex.h2" + public: auto operator=(Func const& f) -> on_return& ; + +#line 217 "cpp2regex.h2" + public: ~on_return() noexcept; + public: on_return(on_return const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(on_return const&) -> void = delete; + + +#line 220 "cpp2regex.h2" +}; + +#line 223 "cpp2regex.h2" +// Helper for auto deduction of the Func type. +template [[nodiscard]] auto make_on_return(Func const& func) -> decltype(auto); + +#line 227 "cpp2regex.h2" +//----------------------------------------------------------------------- +// +// Character classes for regular expressions. +// +//----------------------------------------------------------------------- +// + +// Class syntax: Example: a +// +template class single_class_entry + { + public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> decltype(auto); + public: [[nodiscard]] static auto to_string() -> decltype(auto); + public: single_class_entry() = default; + public: single_class_entry(single_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(single_class_entry const&) -> void = delete; + +#line 240 "cpp2regex.h2" +}; + +#line 243 "cpp2regex.h2" +// Class syntax: - Example: a-c +// +template class range_class_entry + { + public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> decltype(auto); + public: [[nodiscard]] static auto to_string() -> decltype(auto); + public: range_class_entry() = default; + public: range_class_entry(range_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(range_class_entry const&) -> void = delete; + +#line 249 "cpp2regex.h2" +}; + +#line 252 "cpp2regex.h2" +// Helper for combining two character classes +// +template class combined_class_entry + { + public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> decltype(auto); + public: [[nodiscard]] static auto to_string() -> decltype(auto); + public: combined_class_entry() = default; + public: combined_class_entry(combined_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(combined_class_entry const&) -> void = delete; + +#line 258 "cpp2regex.h2" +}; + +#line 261 "cpp2regex.h2" +// Class syntax: Example: abcd +// +template class list_class_entry + { + public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> decltype(auto); + public: [[nodiscard]] static auto to_string() -> decltype(auto); + public: list_class_entry() = default; + public: list_class_entry(list_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(list_class_entry const&) -> void = delete; + +#line 267 "cpp2regex.h2" +}; + +#line 270 "cpp2regex.h2" +// Class syntax: [: class named_class_entry + { + public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> decltype(auto); + public: [[nodiscard]] static auto to_string() -> decltype(auto); + public: named_class_entry() = default; + public: named_class_entry(named_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(named_class_entry const&) -> void = delete; + +#line 276 "cpp2regex.h2" +}; + +#line 279 "cpp2regex.h2" +template class negated_class_entry +: public Inner { + + public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> decltype(auto); + public: negated_class_entry() = default; + public: negated_class_entry(negated_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(negated_class_entry const&) -> void = delete; + +#line 283 "cpp2regex.h2" +}; + +#line 286 "cpp2regex.h2" +// Short class syntax: \ Example: \w +// +template class shorthand_class_entry + { + public: [[nodiscard]] static auto includes(cpp2::impl::in c) -> decltype(auto); + public: [[nodiscard]] static auto to_string() -> decltype(auto); + public: shorthand_class_entry() = default; + public: shorthand_class_entry(shorthand_class_entry const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(shorthand_class_entry const&) -> void = delete; + +#line 292 "cpp2regex.h2" +}; + +#line 295 "cpp2regex.h2" +// Named basic character classes +// +template using digits_class = named_class_entry>; +template using lower_class = named_class_entry>; +template using upper_class = named_class_entry>; + +// Named other classes +// +template using alnum_class = named_class_entry,upper_class,digits_class>>; +template using alpha_class = named_class_entry,upper_class>>; +template using ascii_class = named_class_entry>; +template using blank_class = named_class_entry>; +template using cntrl_class = named_class_entry,single_class_entry>>; +template using graph_class = named_class_entry>; +template using hor_space_class = named_class_entry>; +template using print_class = named_class_entry>; +template using punct_class = named_class_entry','?','@','[','\\',']','^','_','`','{','|','}','~',']'>>; +template using space_class = named_class_entry>; +template using ver_space_class = named_class_entry>; +template using word_class = named_class_entry,single_class_entry>>; +template using xdigit_class = named_class_entry,range_class_entry,digits_class>>; + +// Shorthand class entries +// +template using short_digits_class = shorthand_class_entry>; +template using short_hor_space_class = shorthand_class_entry>; +template using short_space_class = shorthand_class_entry>; +template using short_vert_space_class = shorthand_class_entry>; +template using short_word_class = shorthand_class_entry>; + +template using short_not_digits_class = negated_class_entry>>; +template using short_not_hor_space_class = negated_class_entry>>; +template using short_not_space_class = negated_class_entry>>; +template using short_not_vert_space_class = negated_class_entry>>; +template using short_not_word_class = negated_class_entry>>; + +#line 332 "cpp2regex.h2" +// Regex syntax: | Example: ab|ba +// +// Non greedy implementation. First alternative that matches is chosen. +// +template class alternative_token_matcher + { + public: [[nodiscard]] static auto match(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& ...functions) -> auto; + +#line 342 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_first(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& cur_func, auto const& cur_reset, Other const& ...other) -> auto; + public: alternative_token_matcher() = default; + public: alternative_token_matcher(alternative_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(alternative_token_matcher const&) -> void = delete; + + +#line 360 "cpp2regex.h2" +}; + +#line 363 "cpp2regex.h2" +// Regex syntax: . +// +template [[nodiscard]] auto any_token_matcher(auto& cur, auto& ctx) -> bool; + +#line 377 "cpp2regex.h2" +// Regex syntax: (?>) Example: a(?>bc|c)c +// +template [[nodiscard]] auto atomic_group_matcher(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> auto; + +#line 391 "cpp2regex.h2" +// TODO: Check if vectorization works at some point with this implementation. +// char_token_matcher: (inout cur, inout ctx) -> bool = { +// if !(std::distance(cur, ctx.end) < tokens..size()) { +// return false; +// } +// matched : bool = true; +// (copy i: int = 0) while i < tokens..size() next i += 1 { +// if tokens..data()[i] != cur[i] { +// matched = false; // No break for performance optimization. Without break, the loop vectorizes. +// } +// } +// if matched { +// cur += tokens..size(); +// } +// return matched; +// } + +// char_token_case_insensitive_matcher: (inout cur, inout ctx) -> bool = { +// if !(std::distance(cur, ctx.end) < lower..size()) { +// return false; +// } +// matched : bool = true; +// (copy i : int = 0) while i < lower..size() next i += 1 { +// if !(lower..data()[i] == cur[i] || upper..data()[i] == cur[i]) { +// matched = false; // No break for performance optimization. Without break, the loop vectorizes. +// } +// } +// if matched { +// cur += lower..size(); +// } +// return matched; +// } + +#line 425 "cpp2regex.h2" +// Regex syntax: [] Example: [abcx-y[:digits:]] +// +template class class_token_matcher + { + public: [[nodiscard]] static auto match(auto& cur, auto& ctx) -> bool; + +#line 458 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_any(cpp2::impl::in c) -> bool; + public: class_token_matcher() = default; + public: class_token_matcher(class_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(class_token_matcher const&) -> void = delete; + + +#line 471 "cpp2regex.h2" + // TODO: Implement proper to string + // to_string: () -> bstring = { + // r: bstring = "["; + // if negate { + // r += "^"; + // } + // r += (bstring() + ... + List::to_string()); + // r += "]"; + + // return r; + // } +#line 482 "cpp2regex.h2" +}; + +#line 485 "cpp2regex.h2" +// Named short classes +// +template using named_class_no_new_line = class_token_matcher>; +template using named_class_digits = class_token_matcher>; +template using named_class_hor_space = class_token_matcher>; +template using named_class_space = class_token_matcher>; +template using named_class_ver_space = class_token_matcher>; +template using named_class_word = class_token_matcher>; + +template using named_class_not_digits = class_token_matcher>; +template using named_class_not_hor_space = class_token_matcher>; +template using named_class_not_space = class_token_matcher>; +template using named_class_not_ver_space = class_token_matcher>; +template using named_class_not_word = class_token_matcher>; + +#line 501 "cpp2regex.h2" +// Regex syntax: \ Example: \1 +// \g{name_or_number} +// \k{name_or_number} +// \k +// \k'name_or_number' +// +template [[nodiscard]] auto group_ref_token_matcher(auto& cur, auto& ctx) -> bool; + +#line 546 "cpp2regex.h2" +// Regex syntax: $ Example: aa$ +// +template [[nodiscard]] auto line_end_token_matcher(auto const& cur, auto& ctx) -> bool; + +#line 562 "cpp2regex.h2" +// Regex syntax: ^ Example: ^aa +// +template [[nodiscard]] auto line_start_token_matcher(auto const& cur, auto& ctx) -> bool; + +#line 571 "cpp2regex.h2" +// Regex syntax: (?=) or (?!) or (*pla), etc. Example: (?=AA) +// +// Parsed in group_token. +// +template [[nodiscard]] auto lookahead_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool; + +#line 585 "cpp2regex.h2" +// Regex syntax: (?<=) or (? [[nodiscard]] auto lookbehind_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool; + +#line 600 "cpp2regex.h2" +// TODO: @enum as template parameter currently not working. See issue https://github.com/hsutter/cppfront/issues/1147 + +#line 603 "cpp2regex.h2" +// Options for range matching. +class range_flags { + public: static const int not_greedy;// Try to take as few as possible. + public: static const int greedy;// Try to take as many as possible. + public: static const int possessive;// Do not give back after a greedy match. No backtracking. + + public: range_flags() = default; + public: range_flags(range_flags const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(range_flags const&) -> void = delete; +}; +#line 609 "cpp2regex.h2" + +#line 611 "cpp2regex.h2" +// Regex syntax: {min, max} Example: a{2,4} +// +template class range_token_matcher + { + + public: template [[nodiscard]] static auto match(Iter const& cur, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& tail) -> auto; + +#line 629 "cpp2regex.h2" + private: [[nodiscard]] static auto is_below_upper_bound(cpp2::impl::in count) -> bool; + +#line 634 "cpp2regex.h2" + private: [[nodiscard]] static auto is_below_lower_bound(cpp2::impl::in count) -> bool; + +#line 639 "cpp2regex.h2" + private: [[nodiscard]] static auto is_in_range(cpp2::impl::in count) -> bool; + +#line 645 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_min_count(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, int& count_r) -> auto; + +#line 661 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_greedy(cpp2::impl::in count, Iter const& cur, Iter const& last_valid, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& other) -> match_return; + +#line 691 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_possessive(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return; + +#line 719 "cpp2regex.h2" + private: template [[nodiscard]] static auto match_not_greedy(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return; + public: range_token_matcher() = default; + public: range_token_matcher(range_token_matcher const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(range_token_matcher const&) -> void = delete; + + +#line 745 "cpp2regex.h2" +}; + +#line 748 "cpp2regex.h2" +// Regex syntax: \b or \B Example: \bword\b +// +// Matches the start end end of word boundaries. +// +template [[nodiscard]] auto word_boundary_token_matcher(auto& cur, auto& ctx) -> bool; + +#line 777 "cpp2regex.h2" +//----------------------------------------------------------------------- +// +// Regular expression implementation. +// +//----------------------------------------------------------------------- +// + +// Regular expression implementation +template class regular_expression + { + public: template using context = matcher_context_type;// TODO: Remove when nested types are allowed: https://github.com/hsutter/cppfront/issues/727 + + public: template class search_return + { + public: bool matched; + public: context ctx; + public: int pos; + + public: search_return(cpp2::impl::in matched_, context const& ctx_, Iter const& pos_); + +#line 801 "cpp2regex.h2" + public: search_return(Iter const& begin, Iter const& end); + +#line 807 "cpp2regex.h2" + public: [[nodiscard]] auto group_number() const& -> decltype(auto); + public: [[nodiscard]] auto group(cpp2::impl::in g) const& -> decltype(auto); + public: [[nodiscard]] auto group_start(cpp2::impl::in g) const& -> decltype(auto); + public: [[nodiscard]] auto group_end(cpp2::impl::in g) const& -> decltype(auto); + + public: [[nodiscard]] auto group(cpp2::impl::in> g) const& -> decltype(auto); + public: [[nodiscard]] auto group_start(cpp2::impl::in> g) const& -> decltype(auto); + public: [[nodiscard]] auto group_end(cpp2::impl::in> g) const& -> decltype(auto); + + public: auto update(cpp2::impl::in> r) & -> void; + +#line 821 "cpp2regex.h2" + private: [[nodiscard]] auto get_group_id(cpp2::impl::in> g) const& -> auto; + public: search_return(search_return const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(search_return const&) -> void = delete; + + +#line 828 "cpp2regex.h2" + }; + + public: [[nodiscard]] auto find_all(auto&& func, cpp2::impl::in> str) const& -> decltype(auto); + public: [[nodiscard]] auto find_all(auto&& func, cpp2::impl::in> str, cpp2::impl::in start) const& -> decltype(auto); + public: [[nodiscard]] auto find_all(auto&& func, cpp2::impl::in> str, cpp2::impl::in start, cpp2::impl::in length) const& -> decltype(auto); + public: template auto find_all(auto const& func, Iter const& start, Iter const& end) const& -> void; + +#line 861 "cpp2regex.h2" + public: [[nodiscard]] auto match(cpp2::impl::in> str) const& -> decltype(auto); + public: [[nodiscard]] auto match(cpp2::impl::in> str, auto const& start) const& -> decltype(auto); + public: [[nodiscard]] auto match(cpp2::impl::in> str, auto const& start, auto const& length) const& -> decltype(auto); + public: template [[nodiscard]] auto match(Iter const& start, Iter const& end) const& -> search_return; + +#line 872 "cpp2regex.h2" + public: [[nodiscard]] auto search(cpp2::impl::in> str) const& -> decltype(auto); + public: [[nodiscard]] auto search(cpp2::impl::in> str, auto const& start) const& -> decltype(auto); + public: [[nodiscard]] auto search(cpp2::impl::in> str, auto const& start, auto const& length) const& -> decltype(auto); + public: template [[nodiscard]] auto search(Iter const& start, Iter const& end) const& -> search_return; + +#line 882 "cpp2regex.h2" + private: template [[nodiscard]] auto search_with_context(context& ctx, Iter const& start) const& -> match_return; + +#line 907 "cpp2regex.h2" + public: [[nodiscard]] auto to_string() const& -> decltype(auto); + + // Helper functions + // + private: [[nodiscard]] static auto get_iter(cpp2::impl::in> str, auto const& pos) -> auto; + public: regular_expression() = default; + public: regular_expression(regular_expression const&) = delete; /* No 'that' constructor, suppress copy */ + public: auto operator=(regular_expression const&) -> void = delete; + + +#line 919 "cpp2regex.h2" +}; + +} +} +#endif + + +//=== Cpp2 function definitions ================================================= + +#line 1 "cpp2regex.h2" + +#line 20 "cpp2regex.h2" +namespace cpp2 { + +namespace regex { + +template match_group::match_group(auto const& start_, auto const& end_, auto const& matched_) + : start{ start_ } + , end{ end_ } + , matched{ matched_ }{} +template match_group::match_group(){} +template match_return::match_return(auto const& matched_, auto const& pos_) + : matched{ matched_ } + , pos{ pos_ }{} +template match_return::match_return(){} + +#line 61 "cpp2regex.h2" + template match_context::match_context(Iter const& begin_, Iter const& end_) + : begin{ begin_ } + , end{ end_ }{ + +#line 64 "cpp2regex.h2" + } + +#line 66 "cpp2regex.h2" + template match_context::match_context(match_context const& that) + : begin{ that.begin } + , end{ that.end } + , groups{ that.groups }{} +#line 66 "cpp2regex.h2" + template auto match_context::operator=(match_context const& that) -> match_context& { + begin = that.begin; + end = that.end; + groups = that.groups; + return *this; } +#line 66 "cpp2regex.h2" + template match_context::match_context(match_context&& that) noexcept + : begin{ std::move(that).begin } + , end{ std::move(that).end } + , groups{ std::move(that).groups }{} +#line 66 "cpp2regex.h2" + template auto match_context::operator=(match_context&& that) noexcept -> match_context& { + begin = std::move(that).begin; + end = std::move(that).end; + groups = std::move(that).groups; + return *this; } + +#line 70 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_string_start() const& -> decltype(auto) { return begin; } +#line 71 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_string_end() const& -> decltype(auto) { return end; } + +#line 75 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_group(auto const& group) const& -> decltype(auto) { return CPP2_ASSERT_IN_BOUNDS(groups, group); } + +#line 77 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_group_end(auto const& group) const& -> int{ + if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { + return 0; + } + return cpp2::unchecked_narrow(std::distance(begin, CPP2_ASSERT_IN_BOUNDS(groups, group).end)); + } +#line 83 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_group_start(auto const& group) const& -> int{ + if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { + return 0; + } + return cpp2::unchecked_narrow(std::distance(begin, CPP2_ASSERT_IN_BOUNDS(groups, group).start)); + } +#line 89 "cpp2regex.h2" + template [[nodiscard]] auto match_context::get_group_string(auto const& group) const& -> std::string{ + if (cpp2::impl::cmp_greater_eq(group,max_groups) || !(CPP2_ASSERT_IN_BOUNDS(groups, group).matched)) { + return ""; + } + return std::string(CPP2_ASSERT_IN_BOUNDS(groups, group).start, CPP2_ASSERT_IN_BOUNDS(groups, group).end); + } + +#line 96 "cpp2regex.h2" + template auto match_context::set_group_end(auto const& group, auto const& pos) & -> void{ + CPP2_ASSERT_IN_BOUNDS(groups, group).end = pos; + CPP2_ASSERT_IN_BOUNDS(groups, group).matched = true; + } + +#line 101 "cpp2regex.h2" + template auto match_context::set_group_invalid(auto const& group) & -> void{ + CPP2_ASSERT_IN_BOUNDS(groups, group).matched = false; + } + +#line 105 "cpp2regex.h2" + template auto match_context::set_group_start(auto const& group, auto const& pos) & -> void{ + CPP2_ASSERT_IN_BOUNDS(groups, group).start = pos; + } + +#line 109 "cpp2regex.h2" + template [[nodiscard]] auto match_context::size() const& -> decltype(auto) { return max_groups; } + +#line 113 "cpp2regex.h2" + template [[nodiscard]] auto match_context::fail() const& -> decltype(auto) { return match_return(false, end); } +#line 114 "cpp2regex.h2" + template [[nodiscard]] auto match_context::pass(cpp2::impl::in cur) const& -> decltype(auto) { return match_return(true, cur); } + +#line 116 "cpp2regex.h2" + template auto match_context::reset() & -> void{ + for ( auto& g : groups ) { + g.matched = false; + } + } + +#line 133 "cpp2regex.h2" + template reverse_match_context::reverse_match_context(auto const& forward_context_) + : forward_context{ forward_context_ } + , begin{ std::make_reverse_iterator((*cpp2::impl::assert_not_null(forward_context)).end) } + , end{ std::make_reverse_iterator((*cpp2::impl::assert_not_null(forward_context)).begin) }{ + +#line 137 "cpp2regex.h2" + } +#line 133 "cpp2regex.h2" + template auto reverse_match_context::operator=(auto const& forward_context_) -> reverse_match_context& { + forward_context = forward_context_; + begin = std::make_reverse_iterator((*cpp2::impl::assert_not_null(forward_context)).end); + end = std::make_reverse_iterator((*cpp2::impl::assert_not_null(forward_context)).begin); + return *this; + +#line 137 "cpp2regex.h2" + } + +#line 139 "cpp2regex.h2" + template reverse_match_context::reverse_match_context(reverse_match_context const& that) + : forward_context{ that.forward_context } + , begin{ that.begin } + , end{ that.end }{} +#line 139 "cpp2regex.h2" + template auto reverse_match_context::operator=(reverse_match_context const& that) -> reverse_match_context& { + forward_context = that.forward_context; + begin = that.begin; + end = that.end; + return *this; } +#line 139 "cpp2regex.h2" + template reverse_match_context::reverse_match_context(reverse_match_context&& that) noexcept + : forward_context{ std::move(that).forward_context } + , begin{ std::move(that).begin } + , end{ std::move(that).end }{} +#line 139 "cpp2regex.h2" + template auto reverse_match_context::operator=(reverse_match_context&& that) noexcept -> reverse_match_context& { + forward_context = std::move(that).forward_context; + begin = std::move(that).begin; + end = std::move(that).end; + return *this; } + +#line 143 "cpp2regex.h2" + template [[nodiscard]] auto reverse_match_context::get_string_start() const& -> decltype(auto) { return end; } +#line 144 "cpp2regex.h2" + template [[nodiscard]] auto reverse_match_context::get_string_end() const& -> decltype(auto) { return begin; } + +#line 148 "cpp2regex.h2" + template auto reverse_match_context::set_group_end(auto const& group, auto const& pos) & -> void{ + (*cpp2::impl::assert_not_null(forward_context)).set_group_end(group, CPP2_UFCS(base)((pos))); + } + +#line 152 "cpp2regex.h2" + template auto reverse_match_context::set_group_invalid(auto const& group) & -> void{ + (*cpp2::impl::assert_not_null(forward_context)).set_group_invalid(group); + } + +#line 156 "cpp2regex.h2" + template auto reverse_match_context::set_group_start(auto const& group, auto const& pos) & -> void{ + (*cpp2::impl::assert_not_null(forward_context)).set_group_start(group, CPP2_UFCS(base)((pos))); + } + +#line 162 "cpp2regex.h2" + template [[nodiscard]] auto reverse_match_context::fail() const& -> decltype(auto) { return match_return(false, end); } +#line 163 "cpp2regex.h2" + template [[nodiscard]] auto reverse_match_context::pass(cpp2::impl::in cur) const& -> decltype(auto) { return match_return(true, cur); } + +#line 168 "cpp2regex.h2" +template [[nodiscard]] auto make_forward_match_context(match_context& ctx) -> decltype(auto){ + return ctx; +} + +#line 172 "cpp2regex.h2" +template [[nodiscard]] auto make_forward_match_context(reverse_match_context& ctx) -> decltype(auto){ + return *cpp2::impl::assert_not_null(ctx.forward_context); +} + +#line 176 "cpp2regex.h2" +template [[nodiscard]] auto make_reverse_match_context(match_context& ctx) -> auto{ + return reverse_match_context(&ctx); +} + +#line 180 "cpp2regex.h2" +template [[nodiscard]] auto make_reverse_match_context(reverse_match_context& ctx) -> decltype(auto){ + return ctx; +} + +#line 186 "cpp2regex.h2" +template [[nodiscard]] auto make_forward_iterator(Iter const& pos) -> auto { return pos; } +#line 187 "cpp2regex.h2" +template [[nodiscard]] auto make_forward_iterator(std::reverse_iterator const& pos) -> auto { return CPP2_UFCS(base)(pos); } +#line 188 "cpp2regex.h2" +template [[nodiscard]] auto make_reverse_iterator(Iter const& pos) -> auto { return std::make_reverse_iterator(pos); } +#line 189 "cpp2regex.h2" +template [[nodiscard]] auto make_reverse_iterator(std::reverse_iterator const& pos) -> auto { return pos; } + +#line 196 "cpp2regex.h2" + [[nodiscard]] auto true_end_func::operator()(auto const& cur, auto& ctx) const& -> decltype(auto) { return ctx.pass(cur); } + +#line 204 "cpp2regex.h2" + auto no_reset::operator()([[maybe_unused]] auto& unnamed_param_2) const& -> void{} + +#line 213 "cpp2regex.h2" + template on_return::on_return(Func const& f) + : func{ f }{ + +#line 215 "cpp2regex.h2" + } +#line 213 "cpp2regex.h2" + template auto on_return::operator=(Func const& f) -> on_return& { + func = f; + return *this; + +#line 215 "cpp2regex.h2" + } + +#line 217 "cpp2regex.h2" + template on_return::~on_return() noexcept{ + cpp2::move(*this).func(); + } + +#line 224 "cpp2regex.h2" +template [[nodiscard]] auto make_on_return(Func const& func) -> decltype(auto) { return on_return(func); } + +#line 238 "cpp2regex.h2" + template [[nodiscard]] auto single_class_entry::includes(cpp2::impl::in c) -> decltype(auto) { return c == C; } +#line 239 "cpp2regex.h2" + template [[nodiscard]] auto single_class_entry::to_string() -> decltype(auto) { return bstring(1, C); } + +#line 247 "cpp2regex.h2" + template [[nodiscard]] auto range_class_entry::includes(cpp2::impl::in c) -> decltype(auto) { return [_0 = Start, _1 = c, _2 = End]{ return cpp2::impl::cmp_less_eq(_0,_1) && cpp2::impl::cmp_less_eq(_1,_2); }(); } +#line 248 "cpp2regex.h2" + template [[nodiscard]] auto range_class_entry::to_string() -> decltype(auto) { return "" + cpp2::to_string(Start) + "-" + cpp2::to_string(End) + ""; } + +#line 256 "cpp2regex.h2" + template [[nodiscard]] auto combined_class_entry::includes(cpp2::impl::in c) -> decltype(auto) { return (false || ... || List::includes(c)); } +#line 257 "cpp2regex.h2" + template [[nodiscard]] auto combined_class_entry::to_string() -> decltype(auto) { return (bstring() + ... + List::to_string()); } + +#line 265 "cpp2regex.h2" + template [[nodiscard]] auto list_class_entry::includes(cpp2::impl::in c) -> decltype(auto) { return (false || ... || (List == c)); } +#line 266 "cpp2regex.h2" + template [[nodiscard]] auto list_class_entry::to_string() -> decltype(auto) { return (bstring() + ... + List); } + +#line 274 "cpp2regex.h2" + template [[nodiscard]] auto named_class_entry::includes(cpp2::impl::in c) -> decltype(auto) { return Inner::includes(c); } +#line 275 "cpp2regex.h2" + template [[nodiscard]] auto named_class_entry::to_string() -> decltype(auto) { return "[:" + cpp2::to_string(Name.data()) + ":]"; } + +#line 282 "cpp2regex.h2" + template [[nodiscard]] auto negated_class_entry::includes(cpp2::impl::in c) -> decltype(auto) { return !(Inner::includes(c)); } + +#line 290 "cpp2regex.h2" + template [[nodiscard]] auto shorthand_class_entry::includes(cpp2::impl::in c) -> decltype(auto) { return Inner::includes(c); } +#line 291 "cpp2regex.h2" + template [[nodiscard]] auto shorthand_class_entry::to_string() -> decltype(auto) { return Name.str(); } + +#line 338 "cpp2regex.h2" + template [[nodiscard]] auto alternative_token_matcher::match(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& ...functions) -> auto{ + return match_first(cur, ctx, end_func, tail, functions...); + } + +#line 342 "cpp2regex.h2" + template template [[nodiscard]] auto alternative_token_matcher::match_first(auto const& cur, auto& ctx, auto const& end_func, auto const& tail, auto const& cur_func, auto const& cur_reset, Other const& ...other) -> auto + { + auto inner_call {[_0 = (tail), _1 = (end_func)](auto const& tail_cur, auto& tail_ctx) -> auto{ + return _0(tail_cur, tail_ctx, _1); + }}; + auto r {cur_func(cur, ctx, cpp2::move(inner_call))}; + if (r.matched) { + return r; + }else { + cur_reset(ctx); + + if constexpr (0 != sizeof...(Other)) { + return match_first(cur, ctx, end_func, tail, other...); + }else { + return ctx.fail(); + } + } + } + +#line 365 "cpp2regex.h2" +template [[nodiscard]] auto any_token_matcher(auto& cur, auto& ctx) -> bool +{ + if ( cur != ctx.end // any char except the end + && (single_line || *cpp2::impl::assert_not_null(cur) != '\n')) // do not match new lines in multi line mode + { + cur += 1; + return true; + } + // Else + return false; +} + +#line 379 "cpp2regex.h2" +template [[nodiscard]] auto atomic_group_matcher(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> auto +{ + auto r {inner(cur, ctx, true_end_func())}; + + if (r.matched) { + r = other(r.pos, ctx, end_func); + } + + return r; +} + +#line 429 "cpp2regex.h2" + template [[nodiscard]] auto class_token_matcher::match(auto& cur, auto& ctx) -> bool + { + if constexpr (case_insensitive) + { + if ( cur != ctx.end + && negate != ( + match_any(string_util::safe_tolower(*cpp2::impl::assert_not_null(cur))) + || match_any(string_util::safe_toupper(*cpp2::impl::assert_not_null(cur))))) + + { + cur += 1; + return true; + } + else { + return false; + } + } + else + { + if (cur != ctx.end && negate != match_any(*cpp2::impl::assert_not_null(cur))) { + cur += 1; + return true; + } + else { + return false; + } + } + } + +#line 458 "cpp2regex.h2" + template template [[nodiscard]] auto class_token_matcher::match_any(cpp2::impl::in c) -> bool + { + bool r {First::includes(c)}; + + if (!(r)) { + if constexpr (0 != sizeof...(Other)) { + r = match_any(c); + } + } + + return r; + } + +#line 507 "cpp2regex.h2" +template [[nodiscard]] auto group_ref_token_matcher(auto& cur, auto& ctx) -> bool +{ + auto g {ctx.get_group(group)}; + + auto group_pos {g.start}; + auto group_end {g.end}; + int group_inc {1}; + if (reverse) { + group_pos = g.end; + group_end = cpp2::move(g).start; + group_inc = -1; + } + + for( ; + group_pos != group_end + && cur != ctx.end; + (group_pos += group_inc, ++cur) ) + { + if constexpr (case_insensitive) { + if (string_util::safe_tolower(*cpp2::impl::assert_not_null(group_pos)) != string_util::safe_tolower(*cpp2::impl::assert_not_null(cur))) { + return false; + } + } + else { + if (*cpp2::impl::assert_not_null(group_pos) != *cpp2::impl::assert_not_null(cur)) { + return false; + } + } + } + + if (cpp2::move(group_pos) == cpp2::move(group_end)) { + return true; + } + else { + return false; + } +} + +#line 548 "cpp2regex.h2" +template [[nodiscard]] auto line_end_token_matcher(auto const& cur, auto& ctx) -> bool +{ + if (cur == CPP2_UFCS(get_string_end)(ctx) || (match_new_line && *cpp2::impl::assert_not_null(cur) == '\n')) { + return true; + } + else {if (match_new_line_before_end && (*cpp2::impl::assert_not_null(cur) == '\n' && (cur + 1) == CPP2_UFCS(get_string_end)(ctx))) {// Special case for new line at end. + return true; + } + else { + return false; + }} +} + +#line 564 "cpp2regex.h2" +template [[nodiscard]] auto line_start_token_matcher(auto const& cur, auto& ctx) -> bool +{ + return cur == CPP2_UFCS(get_string_start)(ctx) || // Start of string + (match_new_line && *cpp2::impl::assert_not_null((cur - 1)) == '\n'); // Start of new line +} + +#line 575 "cpp2regex.h2" +template [[nodiscard]] auto lookahead_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool +{ + auto r {func(make_forward_iterator(cur), make_forward_match_context(ctx), true_end_func())}; + if (!(positive)) { + r.matched = !(r.matched); + } + + return cpp2::move(r).matched; +} + +#line 589 "cpp2regex.h2" +template [[nodiscard]] auto lookbehind_token_matcher(auto const& cur, auto& ctx, auto const& func) -> bool +{ + auto r {func(make_reverse_iterator(cur), make_reverse_match_context(ctx), true_end_func())}; + if (!(positive)) { + r.matched = !(r.matched); + } + + return cpp2::move(r).matched; +} + +#line 605 "cpp2regex.h2" + inline CPP2_CONSTEXPR int range_flags::not_greedy{ 1 }; + inline CPP2_CONSTEXPR int range_flags::greedy{ 2 }; + inline CPP2_CONSTEXPR int range_flags::possessive{ 3 }; + +#line 616 "cpp2regex.h2" + template template [[nodiscard]] auto range_token_matcher::match(Iter const& cur, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& tail) -> auto + { + if (range_flags::possessive == kind) { + return match_possessive(cur, ctx, inner, end_func, tail); + } + else {if (range_flags::greedy == kind) { + return match_greedy(0, cur, ctx.end, ctx, inner, reset_func, end_func, tail); + } + else { // range_flags::not_greedy == kind + return match_not_greedy(cur, ctx, inner, end_func, tail); + }} + } + +#line 629 "cpp2regex.h2" + template [[nodiscard]] auto range_token_matcher::is_below_upper_bound(cpp2::impl::in count) -> bool{ + if (-1 == max_count) {return true; } + else {return cpp2::impl::cmp_less(count,max_count); } + } + +#line 634 "cpp2regex.h2" + template [[nodiscard]] auto range_token_matcher::is_below_lower_bound(cpp2::impl::in count) -> bool{ + if (-1 == min_count) {return false; } + else {return cpp2::impl::cmp_less(count,min_count); } + } + +#line 639 "cpp2regex.h2" + template [[nodiscard]] auto range_token_matcher::is_in_range(cpp2::impl::in count) -> bool{ + if (-1 != min_count && cpp2::impl::cmp_less(count,min_count)) {return false; } + if (-1 != max_count && cpp2::impl::cmp_greater(count,max_count)) {return false; } + return true; + } + +#line 645 "cpp2regex.h2" + template template [[nodiscard]] auto range_token_matcher::match_min_count(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, int& count_r) -> auto + { // TODO: count_r as out parameter introduces a performance loss. + auto res {ctx.pass(cur)}; + auto count {0}; + + while( is_below_lower_bound(count) && res.matched ) { + res = inner(res.pos, ctx, end_func); + if (res.matched) { + count += 1; + } + } + + count_r = cpp2::move(count); + return res; + } + +#line 661 "cpp2regex.h2" + template template [[nodiscard]] auto range_token_matcher::match_greedy(cpp2::impl::in count, Iter const& cur, Iter const& last_valid, auto& ctx, auto const& inner, auto const& reset_func, auto const& end_func, auto const& other) -> match_return + { + auto inner_call {[_0 = (count + 1), _1 = (cur), _2 = (inner), _3 = (reset_func), _4 = (end_func), _5 = (other)](auto const& tail_cur, auto& tail_ctx) -> auto{ + return match_greedy(_0, tail_cur, _1, tail_ctx, _2, _3, _4, _5); + }}; + auto is_m_valid {true}; + auto r {ctx.fail()}; + if (is_below_upper_bound(count) && (is_below_lower_bound(count) || cur != last_valid)) { + is_m_valid = false; // Group ranges in M are invalidated through the call. + r = inner(cur, ctx, cpp2::move(inner_call)); + } + + if (!(r.matched) && is_in_range(count)) + { + // The recursion did not yield a match try now the tail + r = other(cur, ctx, end_func); + + if (r.matched && !(cpp2::move(is_m_valid))) { + // We have a match rematch M if required + reset_func(ctx); + + if (cpp2::impl::cmp_greater(count,0)) { + static_cast(inner(last_valid, ctx, true_end_func())); + } + } + } + + return r; + } + +#line 691 "cpp2regex.h2" + template template [[nodiscard]] auto range_token_matcher::match_possessive(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return + { + auto count {0}; + auto r {match_min_count(cur, ctx, inner, end_func, count)}; + + if (!(r.matched)) { + return r; + } + + auto pos {r.pos}; + while( + r.matched + && is_below_upper_bound(count) ) + { + r = inner(pos, ctx, true_end_func()); + + if (pos == r.pos) { + break; // Break infinite loop. + } + if (r.matched) { + count += 1; + pos = r.pos; + } + } + + return other(cpp2::move(pos), ctx, end_func); + } + +#line 719 "cpp2regex.h2" + template template [[nodiscard]] auto range_token_matcher::match_not_greedy(Iter const& cur, auto& ctx, auto const& inner, auto const& end_func, auto const& other) -> match_return + { + auto count {0}; + auto start {match_min_count(cur, ctx, inner, end_func, count)}; + if (!(start.matched)) { + return start; + } + + auto pos {cpp2::move(start).pos}; + while( is_below_upper_bound(count) ) + { + auto o {other(pos, ctx, end_func)}; + if (o.matched) { + return o; + } + + auto r {inner(pos, ctx, end_func)}; + if (!(r.matched)) { + return ctx.fail(); + } + count += 1; + pos = cpp2::move(r).pos; + } + + return other(cpp2::move(pos), ctx, end_func); // Upper bound reached. + } + +#line 752 "cpp2regex.h2" +template [[nodiscard]] auto word_boundary_token_matcher(auto& cur, auto& ctx) -> bool +{ + word_class words {}; + auto is_match {false}; + if (cur == ctx.begin) {// Iteration start + if (cur != ctx.end) {// No empty string + is_match = cpp2::move(words).includes(*cpp2::impl::assert_not_null(cur)); + } + } + else {if (cur == ctx.end) {// Iteration end + is_match = cpp2::move(words).includes(*cpp2::impl::assert_not_null((cur - 1))); + } + else { // Middle of iteration + is_match = + (words.includes(*cpp2::impl::assert_not_null((cur - 1))) && !(words.includes(*cpp2::impl::assert_not_null(cur)))) // End of word: \w\W + || (!(words.includes(*cpp2::impl::assert_not_null((cur - 1)))) && words.includes(*cpp2::impl::assert_not_null(cur)));// Start of word: \W\w + }} + if (negate) { + is_match = !(is_match); + } + + return is_match; +} + +#line 795 "cpp2regex.h2" + template template regular_expression::search_return::search_return(cpp2::impl::in matched_, context const& ctx_, Iter const& pos_) + : matched{ matched_ } + , ctx{ ctx_ } + , pos{ cpp2::unchecked_narrow(std::distance(ctx_.begin, pos_)) }{ + +#line 799 "cpp2regex.h2" + } + +#line 801 "cpp2regex.h2" + template template regular_expression::search_return::search_return(Iter const& begin, Iter const& end) + : matched{ false } + , ctx{ begin, end } + , pos{ 0 }{ + +#line 805 "cpp2regex.h2" + } + +#line 807 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_return::group_number() const& -> decltype(auto) { return ctx.size(); } +#line 808 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_return::group(cpp2::impl::in g) const& -> decltype(auto) { return ctx.get_group_string(g); } +#line 809 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_return::group_start(cpp2::impl::in g) const& -> decltype(auto) { return ctx.get_group_start(g); } +#line 810 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_return::group_end(cpp2::impl::in g) const& -> decltype(auto) { return ctx.get_group_end(g); } + +#line 812 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_return::group(cpp2::impl::in> g) const& -> decltype(auto) { return group(get_group_id(g)); } +#line 813 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_return::group_start(cpp2::impl::in> g) const& -> decltype(auto) { return group_start(get_group_id(g)); } +#line 814 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_return::group_end(cpp2::impl::in> g) const& -> decltype(auto) { return group_end(get_group_id(g)); } + +#line 816 "cpp2regex.h2" + template template auto regular_expression::search_return::update(cpp2::impl::in> r) & -> void{ + matched = r.matched; + pos = cpp2::unchecked_narrow(std::distance(ctx.begin, r.pos)); + } + +#line 821 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_return::get_group_id(cpp2::impl::in> g) const& -> auto{ + auto group_id {matcher::get_named_group_index(g)}; + if (-1 == group_id) { + // TODO: Throw error. + } + return group_id; + } + +#line 830 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::find_all(auto&& func, cpp2::impl::in> str) const& -> decltype(auto) { return find_all(CPP2_FORWARD(func), str.begin(), str.end()); } +#line 831 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::find_all(auto&& func, cpp2::impl::in> str, cpp2::impl::in start) const& -> decltype(auto) { return find_all(CPP2_FORWARD(func), get_iter(str, start), str.end()); } +#line 832 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::find_all(auto&& func, cpp2::impl::in> str, cpp2::impl::in start, cpp2::impl::in length) const& -> decltype(auto) { return find_all(CPP2_FORWARD(func), get_iter(str, start), get_iter(str, start + length)); } +#line 833 "cpp2regex.h2" + template template auto regular_expression::find_all(auto const& func, Iter const& start, Iter const& end) const& -> void + { + auto sr {search_return(start, end)}; + auto cont {true}; + auto cur {start}; + + while( cont ) { + auto r {search_with_context(sr.ctx, cur)}; + cont = r.matched; // First update the continue so that the user can override it. + + if (r.matched) { + sr.update(cpp2::move(r)); + cont = func(sr); + } + + if (cont) { + // Prevent infinity loop for zero length match. + cont = 0 != sr.ctx.get_group_end(0) - sr.ctx.get_group_start(0); + } + + // Check now if to continue, user may override + if (cont) { + cur = start + sr.ctx.get_group_end(0); + CPP2_UFCS(reset)(sr.ctx); + } + } + } + +#line 861 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::match(cpp2::impl::in> str) const& -> decltype(auto) { return match(str.begin(), str.end()); } +#line 862 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::match(cpp2::impl::in> str, auto const& start) const& -> decltype(auto) { return match(get_iter(str, start), str.end()); } +#line 863 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::match(cpp2::impl::in> str, auto const& start, auto const& length) const& -> decltype(auto) { return match(get_iter(str, start), get_iter(str, start + length)); } +#line 864 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::match(Iter const& start, Iter const& end) const& -> search_return + { + context ctx {start, end}; + + auto r {matcher::entry(start, ctx)}; + return search_return(r.matched && r.pos == end, cpp2::move(ctx), r.pos); + } + +#line 872 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::search(cpp2::impl::in> str) const& -> decltype(auto) { return search(str.begin(), str.end()); } +#line 873 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::search(cpp2::impl::in> str, auto const& start) const& -> decltype(auto) { return search(get_iter(str, start), str.end()); } +#line 874 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::search(cpp2::impl::in> str, auto const& start, auto const& length) const& -> decltype(auto) { return search(get_iter(str, start), get_iter(str, start + length)); } +#line 875 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search(Iter const& start, Iter const& end) const& -> search_return + { + context ctx {start, end}; + auto r {search_with_context(ctx, start)}; + return search_return(r.matched, cpp2::move(ctx), cpp2::move(r).pos); + } + +#line 882 "cpp2regex.h2" + template template [[nodiscard]] auto regular_expression::search_with_context(context& ctx, Iter const& start) const& -> match_return + { + auto r {ctx.fail()}; + + auto cur {start}; + for( ; true; (++cur) ) { + r = matcher::entry(cur, ctx); + if (r.matched) { + break; + } + + if (cur == ctx.end) { + break; + } + + if (matcher::is_start_match()) { + break; // Always break with \G option. + } + } + + return r; + } + +#line 907 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::to_string() const& -> decltype(auto) { return matcher::to_string(); } + +#line 911 "cpp2regex.h2" + template [[nodiscard]] auto regular_expression::get_iter(cpp2::impl::in> str, auto const& pos) -> auto{ + if (cpp2::impl::cmp_less(pos,str.size())) { + return str.begin() + pos; + } + else { + return str.end(); + } + } + +#line 921 "cpp2regex.h2" +} +} + +#endif diff --git a/include/cpp2regex.h2 b/include/cpp2regex.h2 new file mode 100644 index 0000000000..763c5638be --- /dev/null +++ b/include/cpp2regex.h2 @@ -0,0 +1,923 @@ + +// Copyright 2022-2025 Herb Sutter +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the Cppfront Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://github.com/hsutter/cppfront/blob/main/LICENSE for license information. + + +//=========================================================================== +// Regex support +//=========================================================================== + +#ifndef CPP2_CPP2REGEX_H +#define CPP2_CPP2REGEX_H + + +template +using matcher_context_type = typename matcher::template context; + +cpp2: namespace = { + +regex: namespace = { + +bstring: type == std::basic_string; +bview : type == std::basic_string_view; + +//----------------------------------------------------------------------- +// +// Helper structures for the expression matching. +// +//----------------------------------------------------------------------- +// + +// Structure for storing group information. +// +match_group: @struct type = +{ + start: Iter = (); + end: Iter = (); + + matched: bool = false; +} + +// Return value for every matcher. +// +match_return: @struct type = +{ + matched: bool = false; + pos: Iter = (); +} + +// Modifiable state during matching. +// +match_context: type = +{ + public begin : Iter; + public end : Iter; + + private groups: std::array, max_groups> = (); + + operator=: (out this, begin_: Iter, end_: Iter) = { + begin = begin_; + end = end_; + } + + operator=: (out this, that) = {} + + // String end and start positions + // + get_string_start: (in this) = begin; + get_string_end: (in this) = end; + + // Getter and setter for groups + // + get_group: (in this, group) = groups[group]; + + get_group_end: (in this, group) -> int = { + if group >= max_groups || !groups[group].matched { + return 0; + } + return cpp2::unchecked_narrow( std::distance(begin, groups[group].end) ); + } + get_group_start: (in this, group) -> int = { + if group >= max_groups || !groups[group].matched { + return 0; + } + return cpp2::unchecked_narrow( std::distance(begin, groups[group].start) ); + } + get_group_string: (in this, group) -> std::string = { + if group >= max_groups || !groups[group].matched { + return ""; + } + return std::string(groups[group].start, groups[group].end); + } + + set_group_end: (inout this, group, pos) = { + groups[group].end = pos; + groups[group].matched = true; + } + + set_group_invalid: (inout this, group) = { + groups[group].matched = false; + } + + set_group_start: (inout this, group, pos) = { + groups[group].start = pos; + } + + size: (in this) = max_groups; + + // Misc functions + // + fail: (in this) = match_return(false, end); + pass: (in this, cur: Iter) = match_return(true, cur); + + reset: (inout this) = { + for groups do (inout g) { + g.matched = false; + } + } +} + +// Wrapper of context for reverse matches. Implements only the minimal interface for matching. +// +reverse_match_context: type = +{ + public ReverseIter : type == std::reverse_iterator; + public forward_context : *match_context; + + public begin : ReverseIter; + public end : ReverseIter; + + operator=: (out this, forward_context_) = { + forward_context = forward_context_; + begin = std::make_reverse_iterator(forward_context*.end); + end = std::make_reverse_iterator(forward_context*.begin); + } + + operator=: (out this, that) = {} + + // String end and start positions + // + get_string_start: (in this) = end; + get_string_end: (in this) = begin; + + // Getter and setter for groups + // + set_group_end: (inout this, group, pos) = { + forward_context*..set_group_end(group, (pos).base()); + } + + set_group_invalid: (inout this, group) = { + forward_context*..set_group_invalid(group); + } + + set_group_start: (inout this, group, pos) = { + forward_context*..set_group_start(group, (pos).base()); + } + + // Misc functions + // + fail: (in this) = match_return(false, end); + pass: (in this, cur: ReverseIter) = match_return(true, cur); +} + +// Helpers for creating wrappers of the match context. +// +make_forward_match_context: (inout ctx: match_context) -> forward _ = { + return ctx; +} + +make_forward_match_context: (inout ctx: reverse_match_context) -> forward _ = { + return ctx.forward_context*; +} + +make_reverse_match_context: (inout ctx: match_context) -> _ = { + return reverse_match_context(ctx&); +} + +make_reverse_match_context: (inout ctx: reverse_match_context) -> forward _ = { + return ctx; +} + +// Helpers for creating wrappers of the iterators. +// +make_forward_iterator: (pos: Iter) -> _ = pos; +make_forward_iterator: (pos: std::reverse_iterator) -> _ = pos.base(); +make_reverse_iterator: (pos: Iter) -> _ = std::make_reverse_iterator(pos); +make_reverse_iterator: (pos: std::reverse_iterator) -> _ = pos; + + +// End function that returns a valid match. +// +true_end_func: @struct type = +{ + operator(): (in this, cur, inout ctx) = ctx..pass(cur); +} + + +// Empty group reset function. +// +no_reset: @struct type = +{ + operator(): (this, inout _:) = {} +} + + +// Evaluate func on destruction of the handle. +on_return: type = +{ + func: Func; + + operator=: (out this, f: Func) = { + func = f; + } + + operator=: (move this) = { + func(); + } +} + + +// Helper for auto deduction of the Func type. +make_on_return: (func: Func) = on_return(func); + + +//----------------------------------------------------------------------- +// +// Character classes for regular expressions. +// +//----------------------------------------------------------------------- +// + +// Class syntax: Example: a +// +single_class_entry: type = +{ + includes : (c: CharT) = c == C; + to_string: () = bstring(1, C); +} + + +// Class syntax: - Example: a-c +// +range_class_entry: type = +{ + includes : (c: CharT) = Start <= c <= End; + to_string: () = "(Start)$-(End)$"; +} + + +// Helper for combining two character classes +// +combined_class_entry: type = +{ + includes : (c: CharT) = (false || ... || List::includes(c)); + to_string: () = (bstring() + ... + List::to_string()); +} + + +// Class syntax: Example: abcd +// +list_class_entry: type = +{ + includes : (c: CharT) = (false || ... || (List == c)); + to_string: () = (bstring() + ... + List); +} + + +// Class syntax: [: type = +{ + includes : (c: CharT) = Inner::includes(c); + to_string: () = "[:(Name..data())$:]"; +} + + +negated_class_entry: type = +{ + this : Inner = (); + includes: (c: CharT) = !Inner::includes(c); +} + + +// Short class syntax: \ Example: \w +// +shorthand_class_entry: type = +{ + includes : (c: CharT) = Inner::includes(c); + to_string: () = Name..str(); +} + + +// Named basic character classes +// +digits_class : type == named_class_entry>; +lower_class : type == named_class_entry>; +upper_class : type == named_class_entry>; + +// Named other classes +// +alnum_class : type == named_class_entry, upper_class, digits_class>>; +alpha_class : type == named_class_entry, upper_class>>; +ascii_class : type == named_class_entry>; +blank_class : type == named_class_entry>; +cntrl_class : type == named_class_entry, single_class_entry>>; +graph_class : type == named_class_entry>; +hor_space_class : type == named_class_entry>; +print_class : type == named_class_entry>; +punct_class : type == named_class_entry','?','@','[','\\',']','^','_','`','{','|','}','~',']'>>; +space_class : type == named_class_entry>; +ver_space_class : type == named_class_entry>; +word_class : type == named_class_entry, single_class_entry>>; +xdigit_class : type == named_class_entry, range_class_entry, digits_class>>; + +// Shorthand class entries +// +short_digits_class : type == shorthand_class_entry>; +short_hor_space_class : type == shorthand_class_entry>; +short_space_class : type == shorthand_class_entry>; +short_vert_space_class : type == shorthand_class_entry>; +short_word_class : type == shorthand_class_entry>; + +short_not_digits_class : type == negated_class_entry>>; +short_not_hor_space_class : type == negated_class_entry>>; +short_not_space_class : type == negated_class_entry>>; +short_not_vert_space_class : type == negated_class_entry>>; +short_not_word_class : type == negated_class_entry>>; + + +// Regex syntax: | Example: ab|ba +// +// Non greedy implementation. First alternative that matches is chosen. +// +alternative_token_matcher: type = +{ + match: (cur, inout ctx, end_func, tail, functions ...) -> _ = { + return match_first(cur, ctx, end_func, tail, functions...); + } + + private match_first: (cur, inout ctx, end_func, tail, cur_func, cur_reset, other ...: Other) -> _ = + { + inner_call := :(tail_cur, inout tail_ctx) -> _ == { + return (tail)$(tail_cur, tail_ctx, (end_func)$); + }; + r := cur_func(cur, ctx, inner_call); + if r.matched { + return r; + } else { + cur_reset(ctx); + + if constexpr 0 != sizeof...(Other) { + return match_first(cur, ctx, end_func, tail, other...); + } else { + return ctx..fail(); + } + } + } +} + + +// Regex syntax: . +// +any_token_matcher: (inout cur, inout ctx) -> bool = +{ + if cur != ctx.end // any char except the end + && (single_line || cur* != '\n') // do not match new lines in multi line mode + { + cur += 1; + return true; + } + // Else + return false; +} + +// Regex syntax: (?>) Example: a(?>bc|c)c +// +atomic_group_matcher: (cur: Iter, inout ctx, inner, end_func, other) -> _ = +{ + r := inner(cur, ctx, true_end_func()); + + if r.matched { + r = other(r.pos, ctx, end_func); + } + + return r; +} + + +// TODO: Check if vectorization works at some point with this implementation. +// char_token_matcher: (inout cur, inout ctx) -> bool = { +// if !(std::distance(cur, ctx.end) < tokens..size()) { +// return false; +// } +// matched : bool = true; +// (copy i: int = 0) while i < tokens..size() next i += 1 { +// if tokens..data()[i] != cur[i] { +// matched = false; // No break for performance optimization. Without break, the loop vectorizes. +// } +// } +// if matched { +// cur += tokens..size(); +// } +// return matched; +// } + +// char_token_case_insensitive_matcher: (inout cur, inout ctx) -> bool = { +// if !(std::distance(cur, ctx.end) < lower..size()) { +// return false; +// } +// matched : bool = true; +// (copy i : int = 0) while i < lower..size() next i += 1 { +// if !(lower..data()[i] == cur[i] || upper..data()[i] == cur[i]) { +// matched = false; // No break for performance optimization. Without break, the loop vectorizes. +// } +// } +// if matched { +// cur += lower..size(); +// } +// return matched; +// } + + +// Regex syntax: [] Example: [abcx-y[:digits:]] +// +class_token_matcher: type = +{ + match: (inout cur, inout ctx) -> bool = + { + if constexpr case_insensitive + { + if cur != ctx.end + && negate != ( + match_any(string_util::safe_tolower(cur*)) + || match_any(string_util::safe_toupper(cur*)) + ) + { + cur += 1; + return true; + } + else { + return false; + } + } + else + { + if cur != ctx.end && negate != match_any(cur*) { + cur += 1; + return true; + } + else { + return false; + } + } + } + + private match_any: (c: CharT) -> bool = + { + r: bool = First::includes(c); + + if !r { + if constexpr 0 != sizeof...(Other) { + r = match_any(c); + } + } + + return r; + } + + // TODO: Implement proper to string + // to_string: () -> bstring = { + // r: bstring = "["; + // if negate { + // r += "^"; + // } + // r += (bstring() + ... + List::to_string()); + // r += "]"; + + // return r; + // } +} + + +// Named short classes +// +named_class_no_new_line : type == class_token_matcher>; +named_class_digits : type == class_token_matcher>; +named_class_hor_space : type == class_token_matcher>; +named_class_space : type == class_token_matcher>; +named_class_ver_space : type == class_token_matcher>; +named_class_word : type == class_token_matcher>; + +named_class_not_digits : type == class_token_matcher>; +named_class_not_hor_space : type == class_token_matcher>; +named_class_not_space : type == class_token_matcher>; +named_class_not_ver_space : type == class_token_matcher>; +named_class_not_word : type == class_token_matcher>; + + +// Regex syntax: \ Example: \1 +// \g{name_or_number} +// \k{name_or_number} +// \k +// \k'name_or_number' +// +group_ref_token_matcher: (inout cur, inout ctx) -> bool = +{ + g := ctx..get_group(group); + + group_pos := g.start; + group_end := g.end; + group_inc : int = 1; + if reverse { + group_pos = g.end; + group_end = g.start; + group_inc = -1; + } + + while + group_pos != group_end + && cur != ctx.end + next (group_pos += group_inc, cur++) + { + if constexpr case_insensitive { + if string_util::safe_tolower(group_pos*) != string_util::safe_tolower(cur*) { + return false; + } + } + else { + if group_pos* != cur* { + return false; + } + } + } + + if group_pos == group_end { + return true; + } + else { + return false; + } +} + + +// Regex syntax: $ Example: aa$ +// +line_end_token_matcher: (cur, inout ctx) -> bool = +{ + if cur == ctx.get_string_end() || (match_new_line && cur* == '\n') { + return true; + } + else if match_new_line_before_end && (cur* == '\n' && (cur + 1) == ctx.get_string_end()) { // Special case for new line at end. + return true; + } + else { + return false; + } +} + + +// Regex syntax: ^ Example: ^aa +// +line_start_token_matcher: (cur, inout ctx) -> bool = +{ + return cur == ctx.get_string_start() || // Start of string + (match_new_line && (cur - 1)* == '\n'); // Start of new line +} + + +// Regex syntax: (?=) or (?!) or (*pla), etc. Example: (?=AA) +// +// Parsed in group_token. +// +lookahead_token_matcher: (cur, inout ctx, func) -> bool = +{ + r := func(make_forward_iterator(cur), make_forward_match_context(ctx), true_end_func()); + if !positive { + r.matched = !r.matched; + } + + return r.matched; +} + +// Regex syntax: (?<=) or (? (cur, inout ctx, func) -> bool = +{ + r := func(make_reverse_iterator(cur), make_reverse_match_context(ctx), true_end_func()); + if !positive { + r.matched = !r.matched; + } + + return r.matched; +} + + +// TODO: @enum as template parameter currently not working. See issue https://github.com/hsutter/cppfront/issues/1147 + + +// Options for range matching. +range_flags: type = { + not_greedy: int == 1; // Try to take as few as possible. + greedy: int == 2; // Try to take as many as possible. + possessive: int == 3; // Do not give back after a greedy match. No backtracking. +} + + +// Regex syntax: {min, max} Example: a{2,4} +// +range_token_matcher: type = +{ + + match: (cur: Iter, inout ctx, inner, reset_func, end_func, tail) -> _ = + { + if range_flags::possessive == kind { + return match_possessive(cur, ctx, inner, end_func, tail); + } + else if range_flags::greedy == kind { + return match_greedy(0, cur, ctx.end, ctx, inner, reset_func, end_func, tail); + } + else { // range_flags::not_greedy == kind + return match_not_greedy(cur, ctx, inner, end_func, tail); + } + } + + private is_below_upper_bound: (count: int) -> bool = { + if -1 == max_count { return true; } + else { return count < max_count; } + } + + private is_below_lower_bound: (count: int) -> bool = { + if -1 == min_count { return false; } + else { return count < min_count; } + } + + private is_in_range: (count: int) -> bool = { + if -1 != min_count && count < min_count { return false; } + if -1 != max_count && count > max_count { return false; } + return true; + } + + private match_min_count: (cur: Iter, inout ctx, inner, end_func, inout count_r: int) -> _ = + { // TODO: count_r as out parameter introduces a performance loss. + res := ctx..pass(cur); + count := 0; + + while is_below_lower_bound(count) && res.matched { + res = inner(res.pos, ctx, end_func); + if res.matched { + count += 1; + } + } + + count_r = count; + return res; + } + + private match_greedy: (count: int, cur: Iter, last_valid: Iter, inout ctx, inner, reset_func, end_func, other) -> match_return = + { + inner_call := :(tail_cur, inout tail_ctx) -> _ == { + return match_greedy((count + 1)$, tail_cur, (cur)$, tail_ctx, (inner)$, (reset_func)$, (end_func)$, (other)$); + }; + is_m_valid := true; + r := ctx..fail(); + if is_below_upper_bound(count) && (is_below_lower_bound(count) || cur != last_valid) { + is_m_valid = false; // Group ranges in M are invalidated through the call. + r = inner(cur, ctx, inner_call); + } + + if !r.matched && is_in_range(count) + { + // The recursion did not yield a match try now the tail + r = other(cur, ctx, end_func); + + if r.matched && !is_m_valid{ + // We have a match rematch M if required + reset_func(ctx); + + if count > 0 { + _ = inner(last_valid, ctx, true_end_func()); + } + } + } + + return r; + } + + private match_possessive: (cur: Iter, inout ctx, inner, end_func, other) -> match_return = + { + count :=0; + r := match_min_count(cur, ctx, inner, end_func, count); + + if !r.matched { + return r; + } + + pos := r.pos; + while + r.matched + && is_below_upper_bound(count) + { + r = inner(pos, ctx, true_end_func()); + + if pos == r.pos { + break; // Break infinite loop. + } + if r.matched { + count += 1; + pos = r.pos; + } + } + + return other(pos, ctx, end_func); + } + + private match_not_greedy: (cur: Iter, inout ctx, inner, end_func, other) -> match_return = + { + count := 0; + start := match_min_count(cur, ctx, inner, end_func, count); + if !start.matched { + return start; + } + + pos := start.pos; + while is_below_upper_bound(count) + { + o:= other(pos, ctx, end_func); + if o.matched { + return o; + } + + r:= inner(pos, ctx, end_func); + if !r.matched { + return ctx..fail(); + } + count += 1; + pos = r.pos; + } + + return other(pos, ctx, end_func); // Upper bound reached. + } +} + + +// Regex syntax: \b or \B Example: \bword\b +// +// Matches the start end end of word boundaries. +// +word_boundary_token_matcher: (inout cur, inout ctx) -> bool = +{ + words : word_class = (); + is_match := false; + if cur == ctx.begin { // Iteration start + if cur != ctx.end { // No empty string + is_match = words..includes(cur*); + } + } + else if cur == ctx.end { // Iteration end + is_match = words..includes((cur - 1)*); + } + else { // Middle of iteration + is_match = + (words..includes((cur - 1)*) && !words..includes(cur*)) // End of word: \w\W + || (!words..includes((cur - 1)*) && words..includes(cur*)); // Start of word: \W\w + } + if negate { + is_match = !is_match; + } + + return is_match; +} + + +//----------------------------------------------------------------------- +// +// Regular expression implementation. +// +//----------------------------------------------------------------------- +// + +// Regular expression implementation +regular_expression: type = +{ + context: type == matcher_context_type; // TODO: Remove when nested types are allowed: https://github.com/hsutter/cppfront/issues/727 + + search_return: type = + { + public matched: bool; + public ctx: context; + public pos: int; + + operator=:(out this, matched_: bool, ctx_: context, pos_: Iter) = { + matched = matched_; + ctx = ctx_; + pos = unchecked_narrow(std::distance(ctx_.begin, pos_)); + } + + operator=:(out this, begin: Iter, end: Iter) = { + matched = false; + ctx = (begin, end); + pos = 0; + } + + group_number: (this) = ctx..size(); + group: (this, g: int) = ctx..get_group_string(g); + group_start: (this, g: int) = ctx..get_group_start(g); + group_end: (this, g: int) = ctx..get_group_end(g); + + group: (this, g: bstring) = group(get_group_id(g)); + group_start: (this, g: bstring) = group_start(get_group_id(g)); + group_end: (this, g: bstring) = group_end(get_group_id(g)); + + update: (inout this, r: match_return) = { + matched = r.matched; + pos = unchecked_narrow(std::distance(ctx.begin, r.pos)); + } + + private get_group_id: (this, g: bstring) -> _ = { + group_id := matcher::get_named_group_index(g); + if -1 == group_id { + // TODO: Throw error. + } + return group_id; + } + } + + find_all: (in this, forward func, str: bview) = find_all(func, str..begin(), str..end()); + find_all: (in this, forward func, str: bview, start: int) = find_all(func, get_iter(str, start), str..end()); + find_all: (in this, forward func, str: bview, start: int, length : int) = find_all(func, get_iter(str, start), get_iter(str, start + length)); + find_all: (in this, func, start: Iter, end: Iter) = + { + sr := search_return(start, end); + cont := true; + cur := start; + + while cont { + r := search_with_context(sr.ctx, cur); + cont = r.matched; // First update the continue so that the user can override it. + + if r.matched { + sr..update(r); + cont = func(sr); + } + + if cont { + // Prevent infinity loop for zero length match. + cont = 0 != sr.ctx..get_group_end(0) - sr.ctx..get_group_start(0); + } + + // Check now if to continue, user may override + if cont { + cur = start + sr.ctx..get_group_end(0); + sr.ctx.reset(); + } + } + } + + match: (in this, str: bview) = match(str..begin(), str..end()); + match: (in this, str: bview, start) = match(get_iter(str, start), str..end()); + match: (in this, str: bview, start, length) = match(get_iter(str, start), get_iter(str, start + length)); + match: (in this, start: Iter, end: Iter) -> search_return = + { + ctx: context = (start, end); + + r := matcher::entry(start, ctx); + return search_return(r.matched && r.pos == end, ctx, r.pos); + } + + search: (in this, str: bview) = search(str..begin(), str..end()); + search: (in this, str: bview, start) = search(get_iter(str, start), str..end()); + search: (in this, str: bview, start, length) = search(get_iter(str, start), get_iter(str, start + length)); + search: (in this, start: Iter, end: Iter) -> search_return = + { + ctx: context = (start, end); + r := search_with_context(ctx, start); + return search_return(r.matched, ctx, r.pos); + } + + private search_with_context: (in this, inout ctx: context, start: Iter) -> match_return = + { + r := ctx..fail(); + + cur:= start; + while true next (cur++) { + r = matcher::entry(cur, ctx); + if r.matched { + break; + } + + if cur == ctx.end { + break; + } + + if matcher::is_start_match() { + break; // Always break with \G option. + } + } + + return r; + } + + + + to_string: (in this) = matcher::to_string(); + + // Helper functions + // + private get_iter: (str: bview, pos) -> _ = { + if pos < str..size() { + return str..begin() + pos; + } + else { + return str..end(); + } + } +} + +} +} +#endif diff --git a/include/cpp2util.h b/include/cpp2util.h index bf942b1594..57ed3c6518 100644 --- a/include/cpp2util.h +++ b/include/cpp2util.h @@ -1,64 +1,69 @@ -// Copyright (c) Herb Sutter -// SPDX-License-Identifier: CC-BY-NC-ND-4.0 - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. +// Copyright 2022-2025 Herb Sutter +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Part of the Cppfront Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://github.com/hsutter/cppfront/blob/main/LICENSE for license information. //=========================================================================== // Cpp2 utilities: // Language support implementations // #include'd by generated Cpp1 code + +// There are two kinds of entities in this file. +// +// 1) Entities in namespace cpp2:: itself, and documented at /cppfront/docs +// +// These are intended for programs to use directly, to the extent +// described in the documentation. Using any parts not described in the +// documentation is not supported. +// +// 2) Entities in namespace cpp2::impl::, and macros +// +// These should not be used by the program. They form the language +// support library intended to be called only from generated code. +// +// For example, if a Cpp2 function leaves a local variable +// uninitialized, cppfront will generate uses of impl::deferred_init<> +// under the covers and guarantee it is constructed exactly once, so +// the implementation here doesn't need to check for double construction +// because it can't happen; using the name impl::deferred_init directly +// from program code is not supported. +// +// 3) Entities in other subnamespaces, such as cpp2::string_util +// +// These are typically metafunction "runtime-library" functions, +// implementation details called by metafunction-generated code. +// For example, @regex generates code that uses string_util:: functions. +// //=========================================================================== -#ifndef CPP2_UTIL_H -#define CPP2_UTIL_H +#ifndef CPP2_CPP2UTIL_H +#define CPP2_CPP2UTIL_H // If this implementation doesn't support source_location yet, disable it #include -#if !defined(_MSC_VER) && !defined(__cpp_lib_source_location) - #undef CPP2_USE_SOURCE_LOCATION + +#undef CPP2_USE_SOURCE_LOCATION +#if defined(__cpp_lib_source_location) + #define CPP2_USE_SOURCE_LOCATION Yes #endif -// If the cppfront user requested making the entire C++ standard library -// available via module import or header include, do that +// If the user requested making the entire C++ standard library available +// via module import (incl. via -pure-cpp2) or header include, do that #if defined(CPP2_IMPORT_STD) || defined(CPP2_INCLUDE_STD) - // If C++23 'import std;' was requested and is available, use that + // If C++23 'import std;' was requested but isn't available, fall back + // to the 'include std' path #if defined(CPP2_IMPORT_STD) && defined(__cpp_lib_modules) - - #ifndef _MSC_VER - // This is the ideal -- note that we just voted "import std;" - // into draft C++23 in late July 2022, so implementers haven't - // had time to catch up yet - import std; - #else // MSVC - // Note: When C++23 "import std;" is available, we will switch to that here - // In the meantime, this is what works on MSVC which is the only compiler - // I've been able to get access to that implements modules enough to demo - // (but we'll have more full-C++20 compilers soon!) - #ifdef _MSC_VER - #include "intrin.h" - // Suppress spurious MSVC modules warning - #pragma warning(disable:5050) - #endif - import std.core; - import std.filesystem; - import std.memory; - import std.regex; - import std.threading; - #endif - - // Otherwise, as a fallback if 'import std;' was requested, or else - // because 'include all std' was requested, include all the standard - // headers, with a feature test #ifdef for each header that - // isn't yet supported by all of { VS 2022, g++-10, clang++-12 } + import std.compat; + #include + // If 'include std' was requested, include all standard headers. + // This list tracks the current draft standard, so as of this + // writing includes draft C++26 headers like . + // Use a feature test #ifdef for each header that isn't supported + // by all of { VS 2022, g++-10, clang++-12 } #else #ifdef _MSC_VER #include "intrin.h" @@ -105,6 +110,9 @@ #endif #include #include + #ifdef __cpp_lib_debugging + #include + #endif #include #ifndef CPP2_NO_EXCEPTIONS #include @@ -137,7 +145,13 @@ #ifdef __cpp_lib_generator #include #endif + #ifdef __cpp_lib_hazard_pointer + #include + #endif #include + #ifdef __cpp_lib_inplace_vector + #include + #endif #include #include #include @@ -149,6 +163,9 @@ #include #endif #include + #ifdef __cpp_lib_linalg + #include + #endif #include #include #include @@ -172,6 +189,9 @@ #include #include #include + #ifdef __cpp_lib_rcu + #include + #endif #include #include #ifdef __cpp_lib_semaphore @@ -206,10 +226,13 @@ #include #include #include - #ifdef __cpp_lib_syncstream + #ifdef __cpp_lib_syncbuf #include #endif #include + #ifdef __cpp_lib_text_encoding + #include + #endif #include #include #include @@ -232,6 +255,7 @@ #endif #include #include + #include #include #include #include @@ -240,21 +264,30 @@ #ifndef CPP2_NO_EXCEPTIONS #include #endif + #ifdef __cpp_lib_expected + #include + #endif #if defined(__cpp_lib_format) || (defined(_MSC_VER) && _MSC_VER >= 1929) #include #endif #include #include + #include #include #include + #include #include + #include #include #include #include #if defined(CPP2_USE_SOURCE_LOCATION) #include #endif + #include + #include #include + #include #include #include #include @@ -268,25 +301,128 @@ #include #endif +// Required for pure Cpp2 tests to pass on MSVC +// #include causes C2995 of math tempaltes +#ifndef EXIT_FAILURE + #define EXIT_FAILURE 1 +#endif + +// cpp2util.h uses signed integer types for indices and container sizes +// so disable clang signed-to-unsigned conversion warnings in this header. +#ifdef __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wsign-conversion" +#endif +//----------------------------------------------------------------------- +// +// Macros +// +//----------------------------------------------------------------------- +// #define CPP2_TYPEOF(x) std::remove_cvref_t +#if __cplusplus >= 202302L && \ + ( \ + (defined(__clang_major__) && __clang_major__ >= 15) \ + || (defined(__GNUC__) && __GNUC__ >= 12) \ + ) +#define CPP2_COPY(x) auto(x) +#else +#define CPP2_COPY(x) CPP2_TYPEOF(x)(x) +#endif #define CPP2_FORWARD(x) std::forward(x) #define CPP2_PACK_EMPTY(x) (sizeof...(x) == 0) #define CPP2_CONTINUE_BREAK(NAME) goto CONTINUE_##NAME; CONTINUE_##NAME: continue; goto BREAK_##NAME; BREAK_##NAME: break; // these redundant goto's to avoid 'unused label' warnings +// Compiler version identification. +// +// This can use useful with 'if constexpr' to disable code known not to +// work on some otherwise-supported compilers (without macros), for example: +// +// // Disable tests on lower-level compilers that have blocking bugs +// [] () { if constexpr (V) { +// // ... tests that would fail due to older compilers' bugs ... +// }}(); +// +// Note: Test Clang first because it pretends to be other compilers. +// +#if defined(__clang_major__) + constexpr auto gcc_ver = 0; + constexpr auto clang_ver = __clang_major__ * 100 + __clang_minor__; + constexpr auto msvc_ver = 0; +#elif defined(_MSC_VER) + constexpr auto gcc_ver = 0; + constexpr auto clang_ver = 0; + constexpr auto msvc_ver = _MSC_VER; +#elif defined(__GNUC__) + constexpr auto gcc_ver = __GNUC__ * 100 + __GNUC_MINOR__; + constexpr auto clang_ver = 0; + constexpr auto msvc_ver = 0; +#endif -#if defined(_MSC_VER) - // MSVC can't handle 'inline constexpr' yet in all cases +constexpr auto gcc_clang_msvc_min_versions( + auto gcc, + auto clang, + auto msvc +) +{ + return gcc_ver >= gcc || clang_ver >= clang || msvc_ver >= msvc; +} + + +#if defined(_MSC_VER) && !defined(__clang_major__) + // MSVC can't handle 'inline constexpr' variables yet in all cases #define CPP2_CONSTEXPR const #else #define CPP2_CONSTEXPR constexpr #endif -namespace cpp2 { +// Workaround . +#define CPP2_FORCE_INLINE_LAMBDA_CLANG /* empty */ + +#if defined(_MSC_VER) && !defined(__clang_major__) + #define CPP2_FORCE_INLINE __forceinline + #define CPP2_FORCE_INLINE_LAMBDA [[msvc::forceinline]] + #define CPP2_LAMBDA_NO_DISCARD +#else + #define CPP2_FORCE_INLINE __attribute__((always_inline)) + #if defined(__clang__) + #define CPP2_FORCE_INLINE_LAMBDA /* empty */ + #undef CPP2_FORCE_INLINE_LAMBDA_CLANG + #define CPP2_FORCE_INLINE_LAMBDA_CLANG __attribute__((always_inline)) + #else + #define CPP2_FORCE_INLINE_LAMBDA __attribute__((always_inline)) + #endif + + #if defined(__clang_major__) + // Also check __cplusplus, only to satisfy Clang -pedantic-errors + #if __cplusplus >= 202302L && (__clang_major__ > 13 || (__clang_major__ == 13 && __clang_minor__ >= 2)) + #define CPP2_LAMBDA_NO_DISCARD [[nodiscard]] + #else + #define CPP2_LAMBDA_NO_DISCARD + #endif + #elif defined(__GNUC__) + #if __GNUC__ >= 9 + #define CPP2_LAMBDA_NO_DISCARD [[nodiscard]] + #else + #define CPP2_LAMBDA_NO_DISCARD + #endif + #if ((__GNUC__ * 100) + __GNUC_MINOR__) < 1003 + // GCC 10.2 doesn't support this feature (10.3 is fine) + #undef CPP2_FORCE_INLINE_LAMBDA + #define CPP2_FORCE_INLINE_LAMBDA + #endif + #else + #define CPP2_LAMBDA_NO_DISCARD + #endif +#endif + +namespace cpp2 { + //----------------------------------------------------------------------- // // Convenience names for fundamental types @@ -325,43 +461,52 @@ using _uchar = unsigned char; // normally use u8 instead //----------------------------------------------------------------------- // -// General helpers +// An implementation of GSL's narrow_cast with a clearly 'unchecked' name // //----------------------------------------------------------------------- // +namespace impl { -inline constexpr auto max(auto... values) { - return std::max( { values... } ); -} - -template -inline constexpr auto is_any = std::disjunction_v...>; +template< typename To, typename From > +constexpr auto is_narrowing_v = + // [dcl.init.list] 7.1 + (std::is_floating_point_v && std::is_integral_v) || + // [dcl.init.list] 7.2 + (std::is_floating_point_v && std::is_floating_point_v && sizeof(From) > sizeof(To)) || // NOLINT(misc-redundant-expression) + // [dcl.init.list] 7.3 + (std::is_integral_v && std::is_floating_point_v) || + (std::is_enum_v && std::is_floating_point_v) || + // [dcl.init.list] 7.4 + (std::is_integral_v && std::is_integral_v && sizeof(From) > sizeof(To)) || // NOLINT(misc-redundant-expression) + (std::is_enum_v && std::is_integral_v && sizeof(From) > sizeof(To)) || + // [dcl.init.list] 7.5 + (std::is_pointer_v && std::is_same_v) + ; -template -struct aligned_storage { - alignas(Align) unsigned char data[Len]; -}; +} -//----------------------------------------------------------------------- -// -// String: A helper workaround for passing a string literal as a -// template argument -// -//----------------------------------------------------------------------- -// -template -struct String +template +constexpr auto unchecked_narrow( X x ) noexcept + -> decltype(auto) + requires ( + impl::is_narrowing_v + || ( + std::is_arithmetic_v + && std::is_arithmetic_v + ) + ) { - constexpr String(const char (&str)[N]) - { - std::copy_n(str, N, value); - } + return static_cast(x); +} - auto operator<=>(String const&) const = default; - char value[N] = {}; -}; +template +constexpr auto unchecked_cast( X&& x ) noexcept + -> decltype(auto) +{ + return static_cast(CPP2_FORWARD(x)); +} //----------------------------------------------------------------------- @@ -372,15 +517,17 @@ struct String // #ifdef CPP2_USE_SOURCE_LOCATION - #define CPP2_SOURCE_LOCATION_PARAM , std::source_location where - #define CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT , std::source_location where = std::source_location::current() - #define CPP2_SOURCE_LOCATION_PARAM_SOLO std::source_location where + #define CPP2_SOURCE_LOCATION_PARAM , [[maybe_unused]] std::source_location where + #define CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT , [[maybe_unused]] std::source_location where = std::source_location::current() + #define CPP2_SOURCE_LOCATION_PARAM_SOLO [[maybe_unused]] std::source_location where #define CPP2_SOURCE_LOCATION_ARG , where + #define CPP2_SOURCE_LOCATION_VALUE (cpp2::to_string(where.file_name()) + "(" + cpp2::to_string(where.line()) + ") " + where.function_name()) #else #define CPP2_SOURCE_LOCATION_PARAM #define CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT #define CPP2_SOURCE_LOCATION_PARAM_SOLO #define CPP2_SOURCE_LOCATION_ARG + #define CPP2_SOURCE_LOCATION_VALUE std::string("") #endif // For C++23: make this std::string_view and drop the macro @@ -398,8 +545,7 @@ class contract_group { constexpr contract_group (handler h = {}) : reporter{h} { } constexpr auto set_handler(handler h = {}) { reporter = h; } - constexpr auto get_handler() const -> handler { return reporter; } - constexpr auto has_handler() const -> bool { return reporter != handler{}; } + constexpr auto is_active () const -> bool { return reporter != handler{}; } constexpr auto enforce(bool b, CPP2_MESSAGE_PARAM msg = "" CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT) -> void { if (!b) report_violation(msg CPP2_SOURCE_LOCATION_ARG); } @@ -421,175 +567,832 @@ class contract_group { std::cerr << ": " << msg; } std::cerr << "\n"; - std::terminate(); + std::exit(EXIT_FAILURE); } -auto inline Default = contract_group( +auto inline cpp2_default = contract_group( [](CPP2_MESSAGE_PARAM msg CPP2_SOURCE_LOCATION_PARAM)noexcept { report_and_terminate("Contract", msg CPP2_SOURCE_LOCATION_ARG); } ); -auto inline Bounds = contract_group( +auto inline bounds_safety = contract_group( [](CPP2_MESSAGE_PARAM msg CPP2_SOURCE_LOCATION_PARAM)noexcept { report_and_terminate("Bounds safety", msg CPP2_SOURCE_LOCATION_ARG); } ); -auto inline Null = contract_group( +auto inline null_safety = contract_group( [](CPP2_MESSAGE_PARAM msg CPP2_SOURCE_LOCATION_PARAM)noexcept { report_and_terminate("Null safety", msg CPP2_SOURCE_LOCATION_ARG); } ); -auto inline Type = contract_group( +auto inline type_safety = contract_group( [](CPP2_MESSAGE_PARAM msg CPP2_SOURCE_LOCATION_PARAM)noexcept { report_and_terminate("Type safety", msg CPP2_SOURCE_LOCATION_ARG); } ); -auto inline Testing = contract_group( +auto inline testing = contract_group( [](CPP2_MESSAGE_PARAM msg CPP2_SOURCE_LOCATION_PARAM)noexcept { report_and_terminate("Testing", msg CPP2_SOURCE_LOCATION_ARG); } ); -// Null pointer deref checking +//----------------------------------------------------------------------- // -auto assert_not_null(auto&& p CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT) -> decltype(auto) +// String utilities +// + +namespace string_util { + +// Break a string_view into a vector of views of simple qidentifier +// substrings separated by other characters +inline auto split_string_list(std::string_view str) + -> std::vector { - // NOTE: This "!= T{}" test may or may not work for STL iterators. The standard - // doesn't guarantee that using == and != will reliably report whether an - // STL iterator has the default-constructed value. So use it only for raw *... - if constexpr (std::is_pointer_v) { - if (p == CPP2_TYPEOF(p){}) { - Null.report_violation("dynamic null dereference attempt detected" CPP2_SOURCE_LOCATION_ARG); - }; + std::vector ret; + + auto is_id_char = [](char c) { + return std::isalnum(c) || c == '_'; + }; + + auto pos = decltype(std::ssize(str)){ 0 }; + while( pos < std::ssize(str) ) { + // Skip non-alnum + while (pos < std::ssize(str) && !is_id_char(str[pos])) { + ++pos; + } + auto start = pos; + + // Find the end of the current component + while (pos < std::ssize(str) && is_id_char(str[pos])) { + ++pos; + } + + // Add nonempty substring to the vector + if (start < pos) { + ret.emplace_back(str.substr(start, pos - start)); + } } - return CPP2_FORWARD(p); -} -// Subscript bounds checking -// -#define CPP2_ASSERT_IN_BOUNDS_IMPL \ - requires (std::is_integral_v && \ - requires { std::size(x); std::ssize(x); x[arg]; std::begin(x) + 2; }) \ -{ \ - auto max = [&]() -> auto { \ - if constexpr (std::is_signed_v) { return std::ssize(x); } \ - else { return std::size(x); } \ - }; \ - auto msg = "out of bounds access attempt detected - attempted access at index " + std::to_string(arg) + ", "; \ - if (max() > 0 ) { \ - msg += "[min,max] range is [0," + std::to_string(max()-1) + "]"; \ - } \ - else { \ - msg += "but container is empty"; \ - } \ - if (!(0 <= arg && arg < max())) { \ - Bounds.report_violation(msg.c_str() CPP2_SOURCE_LOCATION_ARG); \ - } \ - return CPP2_FORWARD(x) [ arg ]; \ + return ret; } -template -auto assert_in_bounds(auto&& x CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT) -> decltype(auto) - CPP2_ASSERT_IN_BOUNDS_IMPL -auto assert_in_bounds(auto&& x, auto&& arg CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT) -> decltype(auto) - CPP2_ASSERT_IN_BOUNDS_IMPL +// From https://stackoverflow.com/questions/216823/how-to-trim-a-stdstring -template -auto assert_in_bounds(auto&& x CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT) -> decltype(auto) -{ - return CPP2_FORWARD(x) [ arg ]; +// Trim from start (in place) +inline void ltrim(std::string &s) { + s.erase( + s.begin(), + std::find_if(s.begin(), s.end(), [](unsigned char ch) { return !std::isspace(ch); }) + ); } -auto assert_in_bounds(auto&& x, auto&& arg CPP2_SOURCE_LOCATION_PARAM_WITH_DEFAULT) -> decltype(auto) -{ - return CPP2_FORWARD(x) [ CPP2_FORWARD(arg) ]; +// Trim from end (in place) +inline void rtrim(std::string &s) { + s.erase( + std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), + s.end() + ); } -#define CPP2_ASSERT_IN_BOUNDS(x,arg) (cpp2::assert_in_bounds((x),(arg))) -#define CPP2_ASSERT_IN_BOUNDS_LITERAL(x,arg) (cpp2::assert_in_bounds<(arg)>(x)) +// Trim from both ends (in place) +inline void trim(std::string &s) { + rtrim(s); + ltrim(s); +} +// Trim from both ends (copying) +inline std::string trim_copy(std::string_view s) { + std::string t(s); + trim(t); + return t; +} -//----------------------------------------------------------------------- -// -// Support wrappers that unblock using this file in environments that -// disable EH or RTTI -// -// Note: This is not endorsing disabling those features, it's just -// recognizing that disabling them is popular (e.g., games, WASM) -// and so we should remove a potential adoption blocker... only a -// few features in this file depend on EH or RTTI anyway, and -// wouldn't be exercised in such an environment anyway so there -// is no real net loss here -// -//----------------------------------------------------------------------- -// +// From https://oleksandrkvl.github.io/2021/04/02/cpp-20-overview.html#nttp -[[noreturn]] auto Throw(auto&& x, [[maybe_unused]] char const* msg) -> void { -#ifdef CPP2_NO_EXCEPTIONS - auto err = std::string{"exceptions are disabled with -fno-exceptions - attempted to throw exception with type \"" + typeid(decltype(x)).name() + "\""}; - if (msg) { - err += " and the message \"" + msg + "\""; +template +struct fixed_string { + constexpr fixed_string(const CharT (&s)[N+1]) { + std::copy_n(s, N + 1, c_str); } - Type.report_violation( err ); - std::terminate(); -#else - throw CPP2_FORWARD(x); -#endif + constexpr const CharT* data() const { + return c_str; + } + constexpr std::size_t size() const { + return N; + } + + constexpr auto str() const { + return std::basic_string(c_str); + } + + CharT c_str[N+1]; +}; + +template +fixed_string(const CharT (&)[N])->fixed_string; + +// Other string utility functions. + +constexpr bool is_escaped(std::string_view s) { + return + s.starts_with("\"") + && s.ends_with("\"") + ; } -inline auto Uncaught_exceptions() -> int { -#ifdef CPP2_NO_EXCEPTIONS - return 0; +inline bool string_to_int(std::string const& s, int& v, int base = 10) { +#ifndef CPP2_NO_EXCEPTIONS + try { + v = stoi(s, nullptr, base); + return true; + } + catch (std::invalid_argument const&) + { + return false; + } + catch (std::out_of_range const&) + { + return false; + } #else - return std::uncaught_exceptions(); + errno = 0; + char* end = nullptr; + + auto const num = std::strtol(s.c_str(), &end, base); + + cpp2_default.enforce(end != nullptr); + if ( + end == s.c_str() + || *end != '\0' + ) + { + return false; // invalid argument + } + if ( + errno == ERANGE + || num < std::numeric_limits::min() + || num > std::numeric_limits::max() + ) + { + return false; // out of range + } + + v = unchecked_narrow(num); + return true; #endif } -template -auto Dynamic_cast( [[maybe_unused]] auto&& x ) -> decltype(auto) { -#ifdef CPP2_NO_RTTI - Type.report_violation( "'as' dynamic casting is disabled with -fno-rtti" ); - return nullptr; -#else - return dynamic_cast(CPP2_FORWARD(x)); -#endif +template +inline std::string int_to_string(int i) { + if constexpr (8 == Base) { + std::ostringstream oss; + oss << std::oct << i; + return oss.str(); + } + else if constexpr (10 == Base) { + return std::to_string(i); + } + else if constexpr (16 == Base) { + std::ostringstream oss; + oss << std::hex << i; + return oss.str(); + } + else { + [] () { + static_assert(flag, "Unsupported int_to_string Base"); + }(); + } } -template -auto Typeid() -> decltype(auto) { -#ifdef CPP2_NO_RTTI - Type.report_violation( "'any' dynamic casting is disabled with -fno-rtti" ); -#else - return typeid(T); -#endif +inline char safe_toupper(char ch) { + return static_cast(std::toupper(static_cast(ch))); } -auto Typeid( [[maybe_unused]] auto&& x ) -> decltype(auto) { -#ifdef CPP2_NO_RTTI - Type.report_violation( "'typeid' is disabled with -fno-rtti" ); -#else - return typeid(CPP2_FORWARD(x)); -#endif +inline char safe_tolower(char ch) { + return static_cast(std::tolower(static_cast(ch))); +} + +inline std::string replace_all( + std::string str, + const std::string& from, + const std::string& to +) +{ + size_t start_pos = 0; + while((start_pos = str.find(from, start_pos)) != std::string::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); // safe also when 'to' is a substring of 'from' + } + return str; +} + +template +inline std::string join(List const& list) { + std::string r = ""; + std::string sep = ""; + + for (auto const& cur : list) { + r += sep + cur; + sep = ", "; + } + + return r; } +} // namespace string_util + //----------------------------------------------------------------------- // -// Arena objects for std::allocators +// Conveniences for expressing Cpp1 references (rarely useful) +// +// Note: Only needed in rare cases to take full control of matching an +// odd Cpp1 signature exactly. Most cases don't need this... for +// example, a Cpp1 virtual function signature declaration like +// +// virtual void myfunc(int& val) const +// +// can already be directly overriden by a Cpp2 declaration of +// +// myfunc: (override this, inout val: int) +// // identical to this in Cpp1 syntax: +// // void myfunc(int& val) const override +// +// without any need to say cpp1_ref on the int parameter. +// +//----------------------------------------------------------------------- // -// Note: cppfront translates "new" to "cpp2_new", so in Cpp2 code -// these are invoked by simply "unique.new" etc. +template +using cpp1_ref = std::add_lvalue_reference_t; + +template +using cpp1_rvalue_ref = std::add_rvalue_reference_t; + + +//----------------------------------------------------------------------- +// +// Helper for concepts // //----------------------------------------------------------------------- // -struct { - template - [[nodiscard]] auto cpp2_new(auto&& ...args) const -> std::unique_ptr { - // Prefer { } to ( ) so that initializing a vector with - // (10), (10, 20), and (10, 20, 30) is consistent - if constexpr (requires { T{CPP2_FORWARD(args)...}; }) { + +template +auto argument_of_helper(Ret(*) (Arg)) -> Arg; + +template +auto argument_of_helper(Ret(F::*) (Arg)) -> Arg; + +template +auto argument_of_helper(Ret(F::*) (Arg)&) -> Arg; + +template +auto argument_of_helper(Ret(F::*) (Arg)&&) -> Arg; + +template +auto argument_of_helper(Ret(F::*) (Arg) const) -> Arg; + +template +auto argument_of_helper(Ret(F::*) (Arg) const&) -> Arg; + +template +auto argument_of_helper(Ret(F::*) (Arg) const&&) -> Arg; + +template +auto argument_of_helper(F const&) -> CPP2_TYPEOF(argument_of_helper(&F::operator())); + +template +using argument_of_t = CPP2_TYPEOF(argument_of_helper(std::declval())); + +template +auto argument_of_helper_op_is(F const&) -> CPP2_TYPEOF(argument_of_helper(&F::op_is)); + +template +using argument_of_op_is_t = CPP2_TYPEOF(argument_of_helper_op_is(std::declval())); + +template +using pointee_t = std::iter_value_t; + +template