From 187fc5cc3a248c7e53285ec81531e72967eabe5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Zasso?= Date: Wed, 29 Jan 2025 08:47:42 +0100 Subject: [PATCH] deps: remove deps/simdutf We depend on V8's version of simdutf now. PR-URL: https://github.com/nodejs/node/pull/58070 Reviewed-By: Antoine du Hamel Reviewed-By: Darshan Sen Reviewed-By: Joyee Cheung Reviewed-By: Rafael Gonzaga --- .github/workflows/tools.yml | 9 - LICENSE | 2 +- deps/simdutf/BUILD.gn | 14 - deps/simdutf/LICENSE-MIT | 18 - deps/simdutf/README.md | 13 - deps/simdutf/simdutf.cpp | 66420 ---------------- deps/simdutf/simdutf.gyp | 21 - deps/simdutf/simdutf.h | 6085 -- deps/simdutf/unofficial.gni | 32 - .../maintaining/maintaining-dependencies.md | 7 - tools/dep_updaters/README.md | 25 - tools/dep_updaters/update-simdutf.sh | 65 - tools/license-builder.sh | 4 +- 13 files changed, 3 insertions(+), 72712 deletions(-) delete mode 100644 deps/simdutf/BUILD.gn delete mode 100644 deps/simdutf/LICENSE-MIT delete mode 100644 deps/simdutf/README.md delete mode 100644 deps/simdutf/simdutf.cpp delete mode 100644 deps/simdutf/simdutf.gyp delete mode 100644 deps/simdutf/simdutf.h delete mode 100644 deps/simdutf/unofficial.gni delete mode 100755 tools/dep_updaters/update-simdutf.sh diff --git a/.github/workflows/tools.yml b/.github/workflows/tools.yml index 6e60911d5d9..60781a8528c 100644 --- a/.github/workflows/tools.yml +++ b/.github/workflows/tools.yml @@ -36,7 +36,6 @@ on: - postject - root-certificates - simdjson - - simdutf - sqlite - undici - uvwasi @@ -244,14 +243,6 @@ jobs: cat temp-output tail -n1 temp-output | grep "NEW_VERSION=" >> "$GITHUB_ENV" || true rm temp-output - - id: simdutf - subsystem: deps - label: dependencies - run: | - ./tools/dep_updaters/update-simdutf.sh > temp-output - cat temp-output - tail -n1 temp-output | grep "NEW_VERSION=" >> "$GITHUB_ENV" || true - rm temp-output - id: sqlite subsystem: deps label: dependencies, sqlite diff --git a/LICENSE b/LICENSE index 966aeac83d3..8be7420ddd0 100644 --- a/LICENSE +++ b/LICENSE @@ -1774,7 +1774,7 @@ The externally maintained libraries used by Node.js are: limitations under the License. """ -- simdutf, located at deps/simdutf, is licensed as follows: +- simdutf, located at deps/v8/third_party/simdutf, is licensed as follows: """ Copyright 2021 The simdutf authors diff --git a/deps/simdutf/BUILD.gn b/deps/simdutf/BUILD.gn deleted file mode 100644 index 119d4945691..00000000000 --- a/deps/simdutf/BUILD.gn +++ /dev/null @@ -1,14 +0,0 @@ -############################################################################## -# # -# DO NOT EDIT THIS FILE! # -# # -############################################################################## - -# This file is used by GN for building, which is NOT the build system used for -# building official binaries. -# Please modify the gyp files if you are making changes to build system. - -import("unofficial.gni") - -simdutf_gn_build("simdutf") { -} diff --git a/deps/simdutf/LICENSE-MIT b/deps/simdutf/LICENSE-MIT deleted file mode 100644 index 74c8302ba92..00000000000 --- a/deps/simdutf/LICENSE-MIT +++ /dev/null @@ -1,18 +0,0 @@ -Copyright 2021 The simdutf authors - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/deps/simdutf/README.md b/deps/simdutf/README.md deleted file mode 100644 index e80aa1e7fb7..00000000000 --- a/deps/simdutf/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# simdutf - -This project boosts unicode validation and transcoding performance by -utilizing SIMD operations where possible. - -The source is pulled from: https://github.com/simdutf/simdutf - -Active development occurs in the default branch (currently named `master`). - -## Updating - -See [tools/dep_updaters/README.md#simdutf](../../tools/dep_updaters/README.md#simdutf) -for instructions. diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp deleted file mode 100644 index 6d2278b7180..00000000000 --- a/deps/simdutf/simdutf.cpp +++ /dev/null @@ -1,66420 +0,0 @@ -/* auto-generated on 2025-04-14 21:04:55 -0400. Do not edit! */ -/* begin file src/simdutf.cpp */ -#include "simdutf.h" - -#if SIMDUTF_FEATURE_BASE64 - // We include base64_tables once. -/* begin file src/tables/base64_tables.h */ -#ifndef SIMDUTF_BASE64_TABLES_H -#define SIMDUTF_BASE64_TABLES_H -#include -#include - -namespace simdutf { -namespace { -namespace tables { -namespace base64 { -namespace base64_default { - -const char e0[256] = { - 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'D', 'D', 'D', - 'D', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', - 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 'K', 'K', 'K', 'K', 'L', - 'L', 'L', 'L', 'M', 'M', 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', - 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 'R', 'R', 'S', 'S', 'S', - 'S', 'T', 'T', 'T', 'T', 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', - 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 'Z', 'Z', 'Z', 'Z', 'a', - 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', - 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', 'h', 'h', 'h', - 'h', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', - 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 'o', 'o', 'o', 'o', 'p', - 'p', 'p', 'p', 'q', 'q', 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', - 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 'v', 'v', 'w', 'w', 'w', - 'w', 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', - '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', - '4', '4', '4', '5', '5', '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', - '8', '8', '8', '8', '9', '9', '9', '9', '+', '+', '+', '+', '/', '/', '/', - '/'}; - -const char e1[256] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', - 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', - '/'}; - -const char e2[256] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', - 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', - '/'}; - -const uint32_t d0[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, - 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, - 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, - 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, - 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, - 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, - 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, - 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, - 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, - 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, - 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; - -const uint32_t d1[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, - 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, - 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, - 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, - 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, - 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, - 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, - 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, - 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, - 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, - 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; - -const uint32_t d2[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, - 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, - 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, - 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, - 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, - 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, - 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, - 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, - 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, - 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, - 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; - -const uint32_t d3[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, - 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, - 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, - 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, - 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, - 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, - 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, - 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, - 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, - 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, - 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -} // namespace base64_default - -namespace base64_url { - -const char e0[256] = { - 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'C', 'C', 'D', 'D', 'D', - 'D', 'E', 'E', 'E', 'E', 'F', 'F', 'F', 'F', 'G', 'G', 'G', 'G', 'H', 'H', - 'H', 'H', 'I', 'I', 'I', 'I', 'J', 'J', 'J', 'J', 'K', 'K', 'K', 'K', 'L', - 'L', 'L', 'L', 'M', 'M', 'M', 'M', 'N', 'N', 'N', 'N', 'O', 'O', 'O', 'O', - 'P', 'P', 'P', 'P', 'Q', 'Q', 'Q', 'Q', 'R', 'R', 'R', 'R', 'S', 'S', 'S', - 'S', 'T', 'T', 'T', 'T', 'U', 'U', 'U', 'U', 'V', 'V', 'V', 'V', 'W', 'W', - 'W', 'W', 'X', 'X', 'X', 'X', 'Y', 'Y', 'Y', 'Y', 'Z', 'Z', 'Z', 'Z', 'a', - 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'd', 'd', 'd', 'd', - 'e', 'e', 'e', 'e', 'f', 'f', 'f', 'f', 'g', 'g', 'g', 'g', 'h', 'h', 'h', - 'h', 'i', 'i', 'i', 'i', 'j', 'j', 'j', 'j', 'k', 'k', 'k', 'k', 'l', 'l', - 'l', 'l', 'm', 'm', 'm', 'm', 'n', 'n', 'n', 'n', 'o', 'o', 'o', 'o', 'p', - 'p', 'p', 'p', 'q', 'q', 'q', 'q', 'r', 'r', 'r', 'r', 's', 's', 's', 's', - 't', 't', 't', 't', 'u', 'u', 'u', 'u', 'v', 'v', 'v', 'v', 'w', 'w', 'w', - 'w', 'x', 'x', 'x', 'x', 'y', 'y', 'y', 'y', 'z', 'z', 'z', 'z', '0', '0', - '0', '0', '1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', - '4', '4', '4', '5', '5', '5', '5', '6', '6', '6', '6', '7', '7', '7', '7', - '8', '8', '8', '8', '9', '9', '9', '9', '-', '-', '-', '-', '_', '_', '_', - '_'}; - -const char e1[256] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', - 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', - '_'}; - -const char e2[256] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', - 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', - 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', - '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', - 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_', 'A', 'B', 'C', - 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', - 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', - 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', - 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', - '_'}; - -const uint32_t d0[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000f8, 0x01ffffff, 0x01ffffff, - 0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4, - 0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018, - 0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030, - 0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048, - 0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060, - 0x00000064, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x000000fc, - 0x01ffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078, - 0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090, - 0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8, - 0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0, - 0x000000c4, 0x000000c8, 0x000000cc, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d1[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000e003, 0x01ffffff, 0x01ffffff, - 0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003, - 0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000, - 0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000, - 0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001, - 0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001, - 0x00009001, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x0000f003, - 0x01ffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001, - 0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002, - 0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002, - 0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003, - 0x00001003, 0x00002003, 0x00003003, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d2[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00800f00, 0x01ffffff, 0x01ffffff, - 0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00, - 0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100, - 0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300, - 0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400, - 0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600, - 0x00400600, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00c00f00, - 0x01ffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700, - 0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900, - 0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00, - 0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00, - 0x00400c00, 0x00800c00, 0x00c00c00, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -const uint32_t d3[256] = { - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003e0000, 0x01ffffff, 0x01ffffff, - 0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000, - 0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x00000000, - 0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000, - 0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000, - 0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000, - 0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000, - 0x00190000, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x003f0000, - 0x01ffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000, - 0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000, - 0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000, - 0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000, - 0x00310000, 0x00320000, 0x00330000, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, - 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}; -} // namespace base64_url -const uint64_t thintable_epi8[256] = { - 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, - 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, - 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, - 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, - 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, - 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, - 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, - 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, - 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, - 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, - 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, - 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, - 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, - 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, - 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, - 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, - 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, - 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, - 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, - 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, - 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, - 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, - 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, - 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, - 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, - 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, - 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, - 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, - 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, - 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, - 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, - 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, - 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, - 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, - 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, - 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, - 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, - 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, - 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, - 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, - 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, - 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, - 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, - 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, - 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, - 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, - 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, - 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, - 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, - 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, - 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, - 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, - 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, - 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, - 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, - 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, - 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, - 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, - 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, - 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, - 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, - 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, - 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, - 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, - 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, - 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, - 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, - 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, - 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, - 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, - 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, - 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, - 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, - 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, - 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, - 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, - 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, - 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, - 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, - 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, - 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, - 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, - 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, - 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, - 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, - 0x0000000000000000, -}; - -const uint8_t pshufb_combine_table[272] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -}; - -const unsigned char BitsSetTable256mul2[256] = { - 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, - 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, - 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, - 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, - 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, - 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, - 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, - 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, - 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, - 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, - 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, - 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, - 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, - 14, 10, 12, 12, 14, 12, 14, 14, 16}; - -constexpr uint8_t to_base64_value[] = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, - 255, 255, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, - 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 255, 255, 255, 255, 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, - 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255}; - -constexpr uint8_t to_base64_url_value[] = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 64, 64, 255, 64, 64, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 64, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 62, 255, 255, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, - 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 255, 255, 255, 255, 63, 255, 26, 27, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, - 49, 50, 51, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255}; -static_assert(sizeof(to_base64_value) == 256, - "to_base64_value must have 256 elements"); -static_assert(sizeof(to_base64_url_value) == 256, - "to_base64_url_value must have 256 elements"); -static_assert(to_base64_value[uint8_t(' ')] == 64, - "space must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t(' ')] == 64, - "space must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('\t')] == 64, - "tab must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('\t')] == 64, - "tab must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('\r')] == 64, - "cr must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('\r')] == 64, - "cr must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('\n')] == 64, - "lf must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('\n')] == 64, - "lf must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('\f')] == 64, - "ff must be == 64 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('\f')] == 64, - "ff must be == 64 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('+')] == 62, - "+ must be == 62 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('-')] == 62, - "- must be == 62 in to_base64_url_value"); -static_assert(to_base64_value[uint8_t('/')] == 63, - "/ must be == 62 in to_base64_value"); -static_assert(to_base64_url_value[uint8_t('_')] == 63, - "_ must be == 62 in to_base64_url_value"); -} // namespace base64 -} // namespace tables -} // unnamed namespace -} // namespace simdutf - -#endif // SIMDUTF_BASE64_TABLES_H -/* end file src/tables/base64_tables.h */ -#endif // SIMDUTF_FEATURE_BASE64 - -/* begin file src/encoding_types.cpp */ - -namespace simdutf { -bool match_system(endianness e) { -#if SIMDUTF_IS_BIG_ENDIAN - return e == endianness::BIG; -#else - return e == endianness::LITTLE; -#endif -} - -std::string to_string(encoding_type bom) { - switch (bom) { - case UTF16_LE: - return "UTF16 little-endian"; - case UTF16_BE: - return "UTF16 big-endian"; - case UTF32_LE: - return "UTF32 little-endian"; - case UTF32_BE: - return "UTF32 big-endian"; - case UTF8: - return "UTF8"; - case unspecified: - return "unknown"; - default: - return "error"; - } -} - -namespace BOM { -// Note that BOM for UTF8 is discouraged. -encoding_type check_bom(const uint8_t *byte, size_t length) { - if (length >= 2 && byte[0] == 0xff and byte[1] == 0xfe) { - if (length >= 4 && byte[2] == 0x00 and byte[3] == 0x0) { - return encoding_type::UTF32_LE; - } else { - return encoding_type::UTF16_LE; - } - } else if (length >= 2 && byte[0] == 0xfe and byte[1] == 0xff) { - return encoding_type::UTF16_BE; - } else if (length >= 4 && byte[0] == 0x00 and byte[1] == 0x00 and - byte[2] == 0xfe and byte[3] == 0xff) { - return encoding_type::UTF32_BE; - } else if (length >= 4 && byte[0] == 0xef and byte[1] == 0xbb and - byte[2] == 0xbf) { - return encoding_type::UTF8; - } - return encoding_type::unspecified; -} - -encoding_type check_bom(const char *byte, size_t length) { - return check_bom(reinterpret_cast(byte), length); -} - -size_t bom_byte_size(encoding_type bom) { - switch (bom) { - case UTF16_LE: - return 2; - case UTF16_BE: - return 2; - case UTF32_LE: - return 4; - case UTF32_BE: - return 4; - case UTF8: - return 3; - case unspecified: - return 0; - default: - return 0; - } -} - -} // namespace BOM -} // namespace simdutf -/* end file src/encoding_types.cpp */ -/* begin file src/error.cpp */ -namespace simdutf { -// deliberately empty -} -/* end file src/error.cpp */ -// The large tables should be included once and they -// should not depend on a kernel. -/* begin file src/tables/utf8_to_utf16_tables.h */ -#ifndef SIMDUTF_UTF8_TO_UTF16_TABLES_H -#define SIMDUTF_UTF8_TO_UTF16_TABLES_H -#include - -namespace simdutf { -namespace { -namespace tables { -namespace utf8_to_utf16 { -/** - * utf8bigindex uses about 8 kB - * shufutf8 uses about 3344 B - * - * So we use a bit over 11 kB. It would be - * easy to save about 4 kB by only - * storing the index in utf8bigindex, and - * deriving the consumed bytes otherwise. - * However, this may come at a significant (10% to 20%) - * performance penalty. - */ - -const uint8_t shufutf8[209][16] = { - {0, 255, 1, 255, 2, 255, 3, 255, 4, 255, 5, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 3, 255, 4, 255, 6, 5, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 3, 255, 5, 4, 6, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 3, 255, 5, 4, 7, 6, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 4, 3, 5, 255, 6, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 4, 3, 5, 255, 7, 6, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 4, 3, 6, 5, 7, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 2, 255, 4, 3, 6, 5, 8, 7, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, - {0, 255, 1, 255, 3, 2, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 3, 255, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, - {0, 255, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 4, 255, 5, 255, 6, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 4, 255, 5, 255, 7, 6, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 4, 255, 6, 5, 7, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 4, 255, 6, 5, 8, 7, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 5, 4, 6, 255, 7, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 5, 4, 6, 255, 8, 7, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 5, 4, 7, 6, 8, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 3, 255, 5, 4, 7, 6, 9, 8, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, - {1, 0, 2, 255, 4, 3, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 5, 255, 6, 255, 7, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 5, 255, 6, 255, 8, 7, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 5, 255, 7, 6, 8, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 5, 255, 7, 6, 9, 8, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 6, 5, 7, 255, 8, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 6, 5, 7, 255, 9, 8, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 6, 5, 8, 7, 9, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 4, 255, 6, 5, 8, 7, 10, 9, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 6, 255, 7, 255, 8, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 6, 255, 7, 255, 9, 8, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 6, 255, 8, 7, 9, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 6, 255, 8, 7, 10, 9, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 7, 6, 8, 255, 9, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 7, 6, 8, 255, 10, 9, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 255, 0, 0, 0, 0}, - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 0, 0, 0, 0}, - {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 4, 255, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 5, 4, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 6, 5, 4, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 5, 255, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 6, 5, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 7, 6, 5, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 6, 255, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 7, 6, 255, 255}, - {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 8, 7, 6, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 5, 255, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 6, 5, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 7, 6, 5, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 6, 255, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 7, 6, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 8, 7, 6, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 7, 255, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 8, 7, 255, 255}, - {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 9, 8, 7, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 4, 255, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 5, 4, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 6, 5, 4, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 5, 255, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 6, 5, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 7, 6, 5, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 6, 255, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 7, 6, 255, 255}, - {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 8, 7, 6, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 5, 255, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 6, 5, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 7, 6, 5, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 6, 255, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 7, 6, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 8, 7, 6, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 7, 255, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 8, 7, 255, 255}, - {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 9, 8, 7, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 6, 255, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 7, 6, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 8, 7, 6, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 7, 255, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 8, 7, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 9, 8, 7, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 8, 255, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 9, 8, 255, 255}, - {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 10, 9, 8, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 5, 255, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 6, 5, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 7, 6, 5, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 6, 255, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 7, 6, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 8, 7, 6, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 7, 255, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 8, 7, 255, 255}, - {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 9, 8, 7, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 7, 6, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 8, 7, 6, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 7, 255, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 8, 7, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 9, 8, 7, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 8, 255, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 9, 8, 255, 255}, - {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 10, 9, 8, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 7, 255, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 8, 7, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 9, 8, 7, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 8, 255, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 9, 8, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 10, 9, 8, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 9, 255, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 10, 9, 255, 255}, - {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 11, 10, 9, 255}, - {0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 1, 255, 255, 255, 3, 2, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 1, 255, 255, 255, 4, 3, 2, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 1, 255, 255, 255, 5, 4, 3, 2, 0, 0, 0, 0}, - {0, 255, 255, 255, 2, 1, 255, 255, 3, 255, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 2, 1, 255, 255, 4, 3, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 2, 1, 255, 255, 5, 4, 3, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 2, 1, 255, 255, 6, 5, 4, 3, 0, 0, 0, 0}, - {0, 255, 255, 255, 3, 2, 1, 255, 4, 255, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 3, 2, 1, 255, 5, 4, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 3, 2, 1, 255, 6, 5, 4, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 3, 2, 1, 255, 7, 6, 5, 4, 0, 0, 0, 0}, - {0, 255, 255, 255, 4, 3, 2, 1, 5, 255, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 4, 3, 2, 1, 6, 5, 255, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 4, 3, 2, 1, 7, 6, 5, 255, 0, 0, 0, 0}, - {0, 255, 255, 255, 4, 3, 2, 1, 8, 7, 6, 5, 0, 0, 0, 0}, - {1, 0, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 2, 255, 255, 255, 4, 3, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 2, 255, 255, 255, 5, 4, 3, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 2, 255, 255, 255, 6, 5, 4, 3, 0, 0, 0, 0}, - {1, 0, 255, 255, 3, 2, 255, 255, 4, 255, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 3, 2, 255, 255, 5, 4, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 3, 2, 255, 255, 6, 5, 4, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 3, 2, 255, 255, 7, 6, 5, 4, 0, 0, 0, 0}, - {1, 0, 255, 255, 4, 3, 2, 255, 5, 255, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 4, 3, 2, 255, 6, 5, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 4, 3, 2, 255, 7, 6, 5, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 4, 3, 2, 255, 8, 7, 6, 5, 0, 0, 0, 0}, - {1, 0, 255, 255, 5, 4, 3, 2, 6, 255, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 5, 4, 3, 2, 7, 6, 255, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 5, 4, 3, 2, 8, 7, 6, 255, 0, 0, 0, 0}, - {1, 0, 255, 255, 5, 4, 3, 2, 9, 8, 7, 6, 0, 0, 0, 0}, - {2, 1, 0, 255, 3, 255, 255, 255, 4, 255, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 3, 255, 255, 255, 5, 4, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 3, 255, 255, 255, 6, 5, 4, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 3, 255, 255, 255, 7, 6, 5, 4, 0, 0, 0, 0}, - {2, 1, 0, 255, 4, 3, 255, 255, 5, 255, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 4, 3, 255, 255, 6, 5, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 4, 3, 255, 255, 7, 6, 5, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 4, 3, 255, 255, 8, 7, 6, 5, 0, 0, 0, 0}, - {2, 1, 0, 255, 5, 4, 3, 255, 6, 255, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 5, 4, 3, 255, 7, 6, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 5, 4, 3, 255, 9, 8, 7, 6, 0, 0, 0, 0}, - {2, 1, 0, 255, 6, 5, 4, 3, 7, 255, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 6, 5, 4, 3, 8, 7, 255, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 6, 5, 4, 3, 9, 8, 7, 255, 0, 0, 0, 0}, - {2, 1, 0, 255, 6, 5, 4, 3, 10, 9, 8, 7, 0, 0, 0, 0}, - {3, 2, 1, 0, 4, 255, 255, 255, 5, 255, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 4, 255, 255, 255, 6, 5, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 4, 255, 255, 255, 7, 6, 5, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 4, 255, 255, 255, 8, 7, 6, 5, 0, 0, 0, 0}, - {3, 2, 1, 0, 5, 4, 255, 255, 6, 255, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 5, 4, 255, 255, 7, 6, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 5, 4, 255, 255, 8, 7, 6, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 5, 4, 255, 255, 9, 8, 7, 6, 0, 0, 0, 0}, - {3, 2, 1, 0, 6, 5, 4, 255, 7, 255, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 6, 5, 4, 255, 8, 7, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 6, 5, 4, 255, 9, 8, 7, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 6, 5, 4, 255, 10, 9, 8, 7, 0, 0, 0, 0}, - {3, 2, 1, 0, 7, 6, 5, 4, 8, 255, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 7, 6, 5, 4, 9, 8, 255, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 7, 6, 5, 4, 10, 9, 8, 255, 0, 0, 0, 0}, - {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 0, 0, 0, 0}}; -/* number of two bytes : 64 */ -/* number of two + three bytes : 145 */ -/* number of two + three + four bytes : 209 */ -const uint8_t utf8bigindex[4096][2] = { - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, - {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, - {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, - {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, - {148, 6}, {209, 12}, {151, 6}, {163, 6}, {66, 6}, {209, 12}, {154, 6}, - {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, - {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, - {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {152, 7}, - {164, 7}, {145, 3}, {209, 12}, {155, 7}, {167, 7}, {69, 7}, {179, 7}, - {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {170, 7}, {71, 7}, - {182, 7}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, - {67, 5}, {119, 7}, {73, 5}, {91, 5}, {1, 7}, {209, 12}, {209, 12}, - {173, 7}, {148, 6}, {185, 7}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, - {85, 7}, {103, 7}, {68, 6}, {121, 7}, {74, 6}, {92, 6}, {2, 7}, - {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {76, 6}, {94, 6}, - {4, 7}, {193, 6}, {82, 6}, {100, 6}, {8, 7}, {118, 6}, {16, 7}, - {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {145, 3}, {209, 12}, {156, 8}, {168, 8}, {146, 4}, - {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {171, 8}, - {72, 8}, {183, 8}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, - {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, - {209, 12}, {174, 8}, {148, 6}, {186, 8}, {80, 8}, {98, 8}, {66, 6}, - {198, 8}, {86, 8}, {104, 8}, {68, 6}, {122, 8}, {74, 6}, {92, 6}, - {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {76, 6}, - {94, 6}, {5, 8}, {193, 6}, {82, 6}, {100, 6}, {9, 8}, {118, 6}, - {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, - {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, - {112, 8}, {71, 7}, {130, 8}, {77, 7}, {95, 7}, {6, 8}, {194, 7}, - {83, 7}, {101, 7}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, - {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, - {66, 6}, {197, 7}, {85, 7}, {103, 7}, {12, 8}, {121, 7}, {20, 8}, - {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, - {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, - {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, - {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, - {160, 9}, {172, 9}, {147, 5}, {184, 9}, {150, 5}, {162, 5}, {65, 5}, - {196, 9}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, - {64, 4}, {209, 12}, {209, 12}, {175, 9}, {148, 6}, {187, 9}, {81, 9}, - {99, 9}, {66, 6}, {199, 9}, {87, 9}, {105, 9}, {68, 6}, {123, 9}, - {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {111, 9}, {70, 6}, - {129, 9}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, - {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {190, 9}, {152, 7}, {164, 7}, {145, 3}, {202, 9}, - {89, 9}, {107, 9}, {69, 7}, {125, 9}, {75, 7}, {93, 7}, {64, 4}, - {209, 12}, {158, 7}, {113, 9}, {71, 7}, {131, 9}, {77, 7}, {95, 7}, - {7, 9}, {194, 7}, {83, 7}, {101, 7}, {11, 9}, {119, 7}, {19, 9}, - {35, 9}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {137, 9}, - {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, {13, 9}, - {121, 7}, {21, 9}, {37, 9}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, - {70, 6}, {127, 7}, {25, 9}, {41, 9}, {4, 7}, {193, 6}, {82, 6}, - {49, 9}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, - {205, 9}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, - {64, 4}, {209, 12}, {159, 8}, {115, 9}, {72, 8}, {133, 9}, {78, 8}, - {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, - {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, - {139, 9}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {104, 8}, - {14, 9}, {122, 8}, {22, 9}, {38, 9}, {3, 8}, {209, 12}, {157, 6}, - {110, 8}, {70, 6}, {128, 8}, {26, 9}, {42, 9}, {5, 8}, {193, 6}, - {82, 6}, {50, 9}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, - {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, - {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, - {28, 9}, {44, 9}, {6, 8}, {194, 7}, {83, 7}, {52, 9}, {10, 8}, - {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, - {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, - {56, 9}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, - {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, - {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, - {149, 4}, {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, - {209, 12}, {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, - {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, - {176, 10}, {148, 6}, {188, 10}, {151, 6}, {163, 6}, {66, 6}, {200, 10}, - {154, 6}, {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, - {209, 12}, {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, - {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, - {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {191, 10}, - {152, 7}, {164, 7}, {145, 3}, {203, 10}, {90, 10}, {108, 10}, {69, 7}, - {126, 10}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {114, 10}, - {71, 7}, {132, 10}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, - {101, 7}, {67, 5}, {119, 7}, {73, 5}, {91, 5}, {1, 7}, {209, 12}, - {209, 12}, {173, 7}, {148, 6}, {138, 10}, {79, 7}, {97, 7}, {66, 6}, - {197, 7}, {85, 7}, {103, 7}, {68, 6}, {121, 7}, {74, 6}, {92, 6}, - {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {76, 6}, - {94, 6}, {4, 7}, {193, 6}, {82, 6}, {100, 6}, {8, 7}, {118, 6}, - {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {145, 3}, {206, 10}, {156, 8}, {168, 8}, - {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, - {116, 10}, {72, 8}, {134, 10}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, - {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, - {209, 12}, {209, 12}, {174, 8}, {148, 6}, {140, 10}, {80, 8}, {98, 8}, - {66, 6}, {198, 8}, {86, 8}, {104, 8}, {15, 10}, {122, 8}, {23, 10}, - {39, 10}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, - {27, 10}, {43, 10}, {5, 8}, {193, 6}, {82, 6}, {51, 10}, {9, 8}, - {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, - {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, - {158, 7}, {112, 8}, {71, 7}, {130, 8}, {29, 10}, {45, 10}, {6, 8}, - {194, 7}, {83, 7}, {53, 10}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, - {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, - {97, 7}, {66, 6}, {197, 7}, {85, 7}, {57, 10}, {12, 8}, {121, 7}, - {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, - {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, - {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, - {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {209, 12}, {160, 9}, {172, 9}, {147, 5}, {184, 9}, {150, 5}, {162, 5}, - {65, 5}, {196, 9}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, - {91, 5}, {64, 4}, {209, 12}, {209, 12}, {175, 9}, {148, 6}, {142, 10}, - {81, 9}, {99, 9}, {66, 6}, {199, 9}, {87, 9}, {105, 9}, {68, 6}, - {123, 9}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {111, 9}, - {70, 6}, {129, 9}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, - {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {190, 9}, {152, 7}, {164, 7}, {145, 3}, - {202, 9}, {89, 9}, {107, 9}, {69, 7}, {125, 9}, {75, 7}, {93, 7}, - {64, 4}, {209, 12}, {158, 7}, {113, 9}, {71, 7}, {131, 9}, {30, 10}, - {46, 10}, {7, 9}, {194, 7}, {83, 7}, {54, 10}, {11, 9}, {119, 7}, - {19, 9}, {35, 9}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, - {137, 9}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {58, 10}, - {13, 9}, {121, 7}, {21, 9}, {37, 9}, {2, 7}, {209, 12}, {157, 6}, - {109, 7}, {70, 6}, {127, 7}, {25, 9}, {41, 9}, {4, 7}, {193, 6}, - {82, 6}, {49, 9}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {145, 3}, {205, 9}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, - {161, 4}, {64, 4}, {209, 12}, {159, 8}, {115, 9}, {72, 8}, {133, 9}, - {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, - {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, - {148, 6}, {139, 9}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, - {60, 10}, {14, 9}, {122, 8}, {22, 9}, {38, 9}, {3, 8}, {209, 12}, - {157, 6}, {110, 8}, {70, 6}, {128, 8}, {26, 9}, {42, 9}, {5, 8}, - {193, 6}, {82, 6}, {50, 9}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, - {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, - {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, - {130, 8}, {28, 9}, {44, 9}, {6, 8}, {194, 7}, {83, 7}, {52, 9}, - {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, - {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, - {85, 7}, {56, 9}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, - {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, - {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, - {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, - {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, - {147, 5}, {209, 12}, {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, - {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, - {209, 12}, {209, 12}, {148, 6}, {209, 12}, {151, 6}, {163, 6}, {66, 6}, - {209, 12}, {154, 6}, {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, - {64, 4}, {209, 12}, {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, - {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, - {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {192, 11}, {152, 7}, {164, 7}, {145, 3}, {204, 11}, {155, 7}, {167, 7}, - {69, 7}, {179, 7}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, - {170, 7}, {71, 7}, {182, 7}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, - {83, 7}, {101, 7}, {67, 5}, {119, 7}, {73, 5}, {91, 5}, {1, 7}, - {209, 12}, {209, 12}, {173, 7}, {148, 6}, {185, 7}, {79, 7}, {97, 7}, - {66, 6}, {197, 7}, {85, 7}, {103, 7}, {68, 6}, {121, 7}, {74, 6}, - {92, 6}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, - {76, 6}, {94, 6}, {4, 7}, {193, 6}, {82, 6}, {100, 6}, {8, 7}, - {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {207, 11}, {156, 8}, - {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, - {159, 8}, {117, 11}, {72, 8}, {135, 11}, {78, 8}, {96, 8}, {65, 5}, - {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, - {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, {141, 11}, {80, 8}, - {98, 8}, {66, 6}, {198, 8}, {86, 8}, {104, 8}, {68, 6}, {122, 8}, - {74, 6}, {92, 6}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, - {128, 8}, {76, 6}, {94, 6}, {5, 8}, {193, 6}, {82, 6}, {100, 6}, - {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, - {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, - {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, {77, 7}, {95, 7}, - {6, 8}, {194, 7}, {83, 7}, {101, 7}, {10, 8}, {119, 7}, {18, 8}, - {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, - {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, {12, 8}, - {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, - {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, - {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, - {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, - {64, 4}, {209, 12}, {160, 9}, {172, 9}, {147, 5}, {184, 9}, {150, 5}, - {162, 5}, {65, 5}, {196, 9}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, - {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {175, 9}, {148, 6}, - {143, 11}, {81, 9}, {99, 9}, {66, 6}, {199, 9}, {87, 9}, {105, 9}, - {68, 6}, {123, 9}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, - {111, 9}, {70, 6}, {129, 9}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, - {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {190, 9}, {152, 7}, {164, 7}, - {145, 3}, {202, 9}, {89, 9}, {107, 9}, {69, 7}, {125, 9}, {75, 7}, - {93, 7}, {64, 4}, {209, 12}, {158, 7}, {113, 9}, {71, 7}, {131, 9}, - {31, 11}, {47, 11}, {7, 9}, {194, 7}, {83, 7}, {55, 11}, {11, 9}, - {119, 7}, {19, 9}, {35, 9}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, - {148, 6}, {137, 9}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, - {59, 11}, {13, 9}, {121, 7}, {21, 9}, {37, 9}, {2, 7}, {209, 12}, - {157, 6}, {109, 7}, {70, 6}, {127, 7}, {25, 9}, {41, 9}, {4, 7}, - {193, 6}, {82, 6}, {49, 9}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {145, 3}, {205, 9}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, - {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {115, 9}, {72, 8}, - {133, 9}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, - {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, - {174, 8}, {148, 6}, {139, 9}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, - {86, 8}, {61, 11}, {14, 9}, {122, 8}, {22, 9}, {38, 9}, {3, 8}, - {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {26, 9}, {42, 9}, - {5, 8}, {193, 6}, {82, 6}, {50, 9}, {9, 8}, {118, 6}, {17, 8}, - {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, - {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, - {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, - {71, 7}, {130, 8}, {28, 9}, {44, 9}, {6, 8}, {194, 7}, {83, 7}, - {52, 9}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, - {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, - {197, 7}, {85, 7}, {56, 9}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, - {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, - {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, - {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, - {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {209, 12}, - {209, 12}, {147, 5}, {209, 12}, {150, 5}, {162, 5}, {65, 5}, {209, 12}, - {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, - {209, 12}, {209, 12}, {176, 10}, {148, 6}, {188, 10}, {151, 6}, {163, 6}, - {66, 6}, {200, 10}, {154, 6}, {166, 6}, {68, 6}, {178, 6}, {74, 6}, - {92, 6}, {64, 4}, {209, 12}, {157, 6}, {169, 6}, {70, 6}, {181, 6}, - {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, - {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {191, 10}, {152, 7}, {164, 7}, {145, 3}, {203, 10}, {90, 10}, - {108, 10}, {69, 7}, {126, 10}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, - {158, 7}, {114, 10}, {71, 7}, {132, 10}, {77, 7}, {95, 7}, {65, 5}, - {194, 7}, {83, 7}, {101, 7}, {67, 5}, {119, 7}, {73, 5}, {91, 5}, - {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {138, 10}, {79, 7}, - {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, {68, 6}, {121, 7}, - {74, 6}, {92, 6}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, - {127, 7}, {76, 6}, {94, 6}, {4, 7}, {193, 6}, {82, 6}, {100, 6}, - {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {206, 10}, - {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, - {209, 12}, {159, 8}, {116, 10}, {72, 8}, {134, 10}, {78, 8}, {96, 8}, - {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, - {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, {140, 10}, - {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {62, 11}, {15, 10}, - {122, 8}, {23, 10}, {39, 10}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, - {70, 6}, {128, 8}, {27, 10}, {43, 10}, {5, 8}, {193, 6}, {82, 6}, - {51, 10}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, - {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, - {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, {29, 10}, - {45, 10}, {6, 8}, {194, 7}, {83, 7}, {53, 10}, {10, 8}, {119, 7}, - {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, - {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {57, 10}, - {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, - {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, - {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, - {161, 4}, {64, 4}, {209, 12}, {160, 9}, {172, 9}, {147, 5}, {184, 9}, - {150, 5}, {162, 5}, {65, 5}, {196, 9}, {153, 5}, {165, 5}, {67, 5}, - {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {175, 9}, - {148, 6}, {142, 10}, {81, 9}, {99, 9}, {66, 6}, {199, 9}, {87, 9}, - {105, 9}, {68, 6}, {123, 9}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, - {157, 6}, {111, 9}, {70, 6}, {129, 9}, {76, 6}, {94, 6}, {65, 5}, - {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {190, 9}, {152, 7}, - {164, 7}, {145, 3}, {202, 9}, {89, 9}, {107, 9}, {69, 7}, {125, 9}, - {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {113, 9}, {71, 7}, - {131, 9}, {30, 10}, {46, 10}, {7, 9}, {194, 7}, {83, 7}, {54, 10}, - {11, 9}, {119, 7}, {19, 9}, {35, 9}, {1, 7}, {209, 12}, {209, 12}, - {173, 7}, {148, 6}, {137, 9}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, - {85, 7}, {58, 10}, {13, 9}, {121, 7}, {21, 9}, {37, 9}, {2, 7}, - {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {25, 9}, {41, 9}, - {4, 7}, {193, 6}, {82, 6}, {49, 9}, {8, 7}, {118, 6}, {16, 7}, - {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {145, 3}, {205, 9}, {156, 8}, {168, 8}, {146, 4}, - {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {115, 9}, - {72, 8}, {133, 9}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, - {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, - {209, 12}, {174, 8}, {148, 6}, {139, 9}, {80, 8}, {98, 8}, {66, 6}, - {198, 8}, {86, 8}, {60, 10}, {14, 9}, {122, 8}, {22, 9}, {38, 9}, - {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {26, 9}, - {42, 9}, {5, 8}, {193, 6}, {82, 6}, {50, 9}, {9, 8}, {118, 6}, - {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, - {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, - {112, 8}, {71, 7}, {130, 8}, {28, 9}, {44, 9}, {6, 8}, {194, 7}, - {83, 7}, {52, 9}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, - {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, - {66, 6}, {197, 7}, {85, 7}, {56, 9}, {12, 8}, {121, 7}, {20, 8}, - {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, - {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, - {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, - {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, - {209, 12}, {209, 12}, {147, 5}, {209, 12}, {150, 5}, {162, 5}, {65, 5}, - {209, 12}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, - {64, 4}, {209, 12}, {209, 12}, {209, 12}, {148, 6}, {209, 12}, {151, 6}, - {163, 6}, {66, 6}, {209, 12}, {154, 6}, {166, 6}, {68, 6}, {178, 6}, - {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {169, 6}, {70, 6}, - {181, 6}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, - {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {152, 7}, {164, 7}, {145, 3}, {209, 12}, - {155, 7}, {167, 7}, {69, 7}, {179, 7}, {75, 7}, {93, 7}, {64, 4}, - {209, 12}, {158, 7}, {170, 7}, {71, 7}, {182, 7}, {77, 7}, {95, 7}, - {65, 5}, {194, 7}, {83, 7}, {101, 7}, {67, 5}, {119, 7}, {73, 5}, - {91, 5}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {185, 7}, - {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, {68, 6}, - {121, 7}, {74, 6}, {92, 6}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, - {70, 6}, {127, 7}, {76, 6}, {94, 6}, {4, 7}, {193, 6}, {82, 6}, - {100, 6}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, - {208, 12}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, - {64, 4}, {209, 12}, {159, 8}, {171, 8}, {72, 8}, {183, 8}, {78, 8}, - {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, - {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, - {186, 8}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {104, 8}, - {68, 6}, {122, 8}, {74, 6}, {92, 6}, {3, 8}, {209, 12}, {157, 6}, - {110, 8}, {70, 6}, {128, 8}, {76, 6}, {94, 6}, {5, 8}, {193, 6}, - {82, 6}, {100, 6}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, - {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, - {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, - {77, 7}, {95, 7}, {6, 8}, {194, 7}, {83, 7}, {101, 7}, {10, 8}, - {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, - {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, - {103, 7}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, - {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, - {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, - {149, 4}, {161, 4}, {64, 4}, {209, 12}, {160, 9}, {172, 9}, {147, 5}, - {184, 9}, {150, 5}, {162, 5}, {65, 5}, {196, 9}, {153, 5}, {165, 5}, - {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, - {175, 9}, {148, 6}, {144, 12}, {81, 9}, {99, 9}, {66, 6}, {199, 9}, - {87, 9}, {105, 9}, {68, 6}, {123, 9}, {74, 6}, {92, 6}, {64, 4}, - {209, 12}, {157, 6}, {111, 9}, {70, 6}, {129, 9}, {76, 6}, {94, 6}, - {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, - {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {190, 9}, - {152, 7}, {164, 7}, {145, 3}, {202, 9}, {89, 9}, {107, 9}, {69, 7}, - {125, 9}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {113, 9}, - {71, 7}, {131, 9}, {77, 7}, {95, 7}, {7, 9}, {194, 7}, {83, 7}, - {101, 7}, {11, 9}, {119, 7}, {19, 9}, {35, 9}, {1, 7}, {209, 12}, - {209, 12}, {173, 7}, {148, 6}, {137, 9}, {79, 7}, {97, 7}, {66, 6}, - {197, 7}, {85, 7}, {103, 7}, {13, 9}, {121, 7}, {21, 9}, {37, 9}, - {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {25, 9}, - {41, 9}, {4, 7}, {193, 6}, {82, 6}, {49, 9}, {8, 7}, {118, 6}, - {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {145, 3}, {205, 9}, {156, 8}, {168, 8}, - {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, - {115, 9}, {72, 8}, {133, 9}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, - {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, - {209, 12}, {209, 12}, {174, 8}, {148, 6}, {139, 9}, {80, 8}, {98, 8}, - {66, 6}, {198, 8}, {86, 8}, {104, 8}, {14, 9}, {122, 8}, {22, 9}, - {38, 9}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, - {26, 9}, {42, 9}, {5, 8}, {193, 6}, {82, 6}, {50, 9}, {9, 8}, - {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, - {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, - {158, 7}, {112, 8}, {71, 7}, {130, 8}, {28, 9}, {44, 9}, {6, 8}, - {194, 7}, {83, 7}, {52, 9}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, - {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, - {97, 7}, {66, 6}, {197, 7}, {85, 7}, {56, 9}, {12, 8}, {121, 7}, - {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, - {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, - {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, - {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, - {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, {150, 5}, {162, 5}, - {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, - {91, 5}, {64, 4}, {209, 12}, {209, 12}, {176, 10}, {148, 6}, {188, 10}, - {151, 6}, {163, 6}, {66, 6}, {200, 10}, {154, 6}, {166, 6}, {68, 6}, - {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {169, 6}, - {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, - {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {191, 10}, {152, 7}, {164, 7}, {145, 3}, - {203, 10}, {90, 10}, {108, 10}, {69, 7}, {126, 10}, {75, 7}, {93, 7}, - {64, 4}, {209, 12}, {158, 7}, {114, 10}, {71, 7}, {132, 10}, {77, 7}, - {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, {67, 5}, {119, 7}, - {73, 5}, {91, 5}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, - {138, 10}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {103, 7}, - {68, 6}, {121, 7}, {74, 6}, {92, 6}, {2, 7}, {209, 12}, {157, 6}, - {109, 7}, {70, 6}, {127, 7}, {76, 6}, {94, 6}, {4, 7}, {193, 6}, - {82, 6}, {100, 6}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {145, 3}, {206, 10}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, - {161, 4}, {64, 4}, {209, 12}, {159, 8}, {116, 10}, {72, 8}, {134, 10}, - {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, - {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, - {148, 6}, {140, 10}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, - {63, 12}, {15, 10}, {122, 8}, {23, 10}, {39, 10}, {3, 8}, {209, 12}, - {157, 6}, {110, 8}, {70, 6}, {128, 8}, {27, 10}, {43, 10}, {5, 8}, - {193, 6}, {82, 6}, {51, 10}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, - {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, - {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, - {130, 8}, {29, 10}, {45, 10}, {6, 8}, {194, 7}, {83, 7}, {53, 10}, - {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, - {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, - {85, 7}, {57, 10}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, - {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, - {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, - {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, - {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {160, 9}, {172, 9}, - {147, 5}, {184, 9}, {150, 5}, {162, 5}, {65, 5}, {196, 9}, {153, 5}, - {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, - {209, 12}, {175, 9}, {148, 6}, {142, 10}, {81, 9}, {99, 9}, {66, 6}, - {199, 9}, {87, 9}, {105, 9}, {68, 6}, {123, 9}, {74, 6}, {92, 6}, - {64, 4}, {209, 12}, {157, 6}, {111, 9}, {70, 6}, {129, 9}, {76, 6}, - {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, - {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {190, 9}, {152, 7}, {164, 7}, {145, 3}, {202, 9}, {89, 9}, {107, 9}, - {69, 7}, {125, 9}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, - {113, 9}, {71, 7}, {131, 9}, {30, 10}, {46, 10}, {7, 9}, {194, 7}, - {83, 7}, {54, 10}, {11, 9}, {119, 7}, {19, 9}, {35, 9}, {1, 7}, - {209, 12}, {209, 12}, {173, 7}, {148, 6}, {137, 9}, {79, 7}, {97, 7}, - {66, 6}, {197, 7}, {85, 7}, {58, 10}, {13, 9}, {121, 7}, {21, 9}, - {37, 9}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, - {25, 9}, {41, 9}, {4, 7}, {193, 6}, {82, 6}, {49, 9}, {8, 7}, - {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {205, 9}, {156, 8}, - {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, - {159, 8}, {115, 9}, {72, 8}, {133, 9}, {78, 8}, {96, 8}, {65, 5}, - {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, - {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, {139, 9}, {80, 8}, - {98, 8}, {66, 6}, {198, 8}, {86, 8}, {60, 10}, {14, 9}, {122, 8}, - {22, 9}, {38, 9}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, - {128, 8}, {26, 9}, {42, 9}, {5, 8}, {193, 6}, {82, 6}, {50, 9}, - {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, - {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, - {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, {28, 9}, {44, 9}, - {6, 8}, {194, 7}, {83, 7}, {52, 9}, {10, 8}, {119, 7}, {18, 8}, - {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, - {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {56, 9}, {12, 8}, - {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, - {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, - {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, - {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, - {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, {150, 5}, - {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, - {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {148, 6}, - {209, 12}, {151, 6}, {163, 6}, {66, 6}, {209, 12}, {154, 6}, {166, 6}, - {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, - {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, - {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {192, 11}, {152, 7}, {164, 7}, - {145, 3}, {204, 11}, {155, 7}, {167, 7}, {69, 7}, {179, 7}, {75, 7}, - {93, 7}, {64, 4}, {209, 12}, {158, 7}, {170, 7}, {71, 7}, {182, 7}, - {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, {67, 5}, - {119, 7}, {73, 5}, {91, 5}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, - {148, 6}, {185, 7}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, - {103, 7}, {68, 6}, {121, 7}, {74, 6}, {92, 6}, {2, 7}, {209, 12}, - {157, 6}, {109, 7}, {70, 6}, {127, 7}, {76, 6}, {94, 6}, {4, 7}, - {193, 6}, {82, 6}, {100, 6}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {145, 3}, {207, 11}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, - {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {117, 11}, {72, 8}, - {135, 11}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, - {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, - {174, 8}, {148, 6}, {141, 11}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, - {86, 8}, {104, 8}, {68, 6}, {122, 8}, {74, 6}, {92, 6}, {3, 8}, - {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {76, 6}, {94, 6}, - {5, 8}, {193, 6}, {82, 6}, {100, 6}, {9, 8}, {118, 6}, {17, 8}, - {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, - {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, - {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, - {71, 7}, {130, 8}, {77, 7}, {95, 7}, {6, 8}, {194, 7}, {83, 7}, - {101, 7}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, - {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, - {197, 7}, {85, 7}, {103, 7}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, - {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, - {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, - {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, {209, 12}, - {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {160, 9}, - {172, 9}, {147, 5}, {184, 9}, {150, 5}, {162, 5}, {65, 5}, {196, 9}, - {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, {64, 4}, - {209, 12}, {209, 12}, {175, 9}, {148, 6}, {143, 11}, {81, 9}, {99, 9}, - {66, 6}, {199, 9}, {87, 9}, {105, 9}, {68, 6}, {123, 9}, {74, 6}, - {92, 6}, {64, 4}, {209, 12}, {157, 6}, {111, 9}, {70, 6}, {129, 9}, - {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, {67, 5}, - {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {190, 9}, {152, 7}, {164, 7}, {145, 3}, {202, 9}, {89, 9}, - {107, 9}, {69, 7}, {125, 9}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, - {158, 7}, {113, 9}, {71, 7}, {131, 9}, {31, 11}, {47, 11}, {7, 9}, - {194, 7}, {83, 7}, {55, 11}, {11, 9}, {119, 7}, {19, 9}, {35, 9}, - {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {137, 9}, {79, 7}, - {97, 7}, {66, 6}, {197, 7}, {85, 7}, {59, 11}, {13, 9}, {121, 7}, - {21, 9}, {37, 9}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, - {127, 7}, {25, 9}, {41, 9}, {4, 7}, {193, 6}, {82, 6}, {49, 9}, - {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {205, 9}, - {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, {64, 4}, - {209, 12}, {159, 8}, {115, 9}, {72, 8}, {133, 9}, {78, 8}, {96, 8}, - {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, {73, 5}, - {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, {139, 9}, - {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {61, 11}, {14, 9}, - {122, 8}, {22, 9}, {38, 9}, {3, 8}, {209, 12}, {157, 6}, {110, 8}, - {70, 6}, {128, 8}, {26, 9}, {42, 9}, {5, 8}, {193, 6}, {82, 6}, - {50, 9}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, {145, 3}, - {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, {93, 7}, - {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, {28, 9}, - {44, 9}, {6, 8}, {194, 7}, {83, 7}, {52, 9}, {10, 8}, {119, 7}, - {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, - {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {56, 9}, - {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, {157, 6}, - {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, {193, 6}, - {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {145, 3}, {209, 12}, {209, 12}, {209, 12}, {146, 4}, {209, 12}, {149, 4}, - {161, 4}, {64, 4}, {209, 12}, {209, 12}, {209, 12}, {147, 5}, {209, 12}, - {150, 5}, {162, 5}, {65, 5}, {209, 12}, {153, 5}, {165, 5}, {67, 5}, - {177, 5}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {176, 10}, - {148, 6}, {188, 10}, {151, 6}, {163, 6}, {66, 6}, {200, 10}, {154, 6}, - {166, 6}, {68, 6}, {178, 6}, {74, 6}, {92, 6}, {64, 4}, {209, 12}, - {157, 6}, {169, 6}, {70, 6}, {181, 6}, {76, 6}, {94, 6}, {65, 5}, - {193, 6}, {82, 6}, {100, 6}, {67, 5}, {118, 6}, {73, 5}, {91, 5}, - {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {191, 10}, {152, 7}, - {164, 7}, {145, 3}, {203, 10}, {90, 10}, {108, 10}, {69, 7}, {126, 10}, - {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, {114, 10}, {71, 7}, - {132, 10}, {77, 7}, {95, 7}, {65, 5}, {194, 7}, {83, 7}, {101, 7}, - {67, 5}, {119, 7}, {73, 5}, {91, 5}, {1, 7}, {209, 12}, {209, 12}, - {173, 7}, {148, 6}, {138, 10}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, - {85, 7}, {103, 7}, {68, 6}, {121, 7}, {74, 6}, {92, 6}, {2, 7}, - {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, {76, 6}, {94, 6}, - {4, 7}, {193, 6}, {82, 6}, {100, 6}, {8, 7}, {118, 6}, {16, 7}, - {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {145, 3}, {206, 10}, {156, 8}, {168, 8}, {146, 4}, - {180, 8}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, {159, 8}, {116, 10}, - {72, 8}, {134, 10}, {78, 8}, {96, 8}, {65, 5}, {195, 8}, {84, 8}, - {102, 8}, {67, 5}, {120, 8}, {73, 5}, {91, 5}, {64, 4}, {209, 12}, - {209, 12}, {174, 8}, {148, 6}, {140, 10}, {80, 8}, {98, 8}, {66, 6}, - {198, 8}, {86, 8}, {62, 11}, {15, 10}, {122, 8}, {23, 10}, {39, 10}, - {3, 8}, {209, 12}, {157, 6}, {110, 8}, {70, 6}, {128, 8}, {27, 10}, - {43, 10}, {5, 8}, {193, 6}, {82, 6}, {51, 10}, {9, 8}, {118, 6}, - {17, 8}, {33, 8}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, - {189, 8}, {152, 7}, {164, 7}, {145, 3}, {201, 8}, {88, 8}, {106, 8}, - {69, 7}, {124, 8}, {75, 7}, {93, 7}, {64, 4}, {209, 12}, {158, 7}, - {112, 8}, {71, 7}, {130, 8}, {29, 10}, {45, 10}, {6, 8}, {194, 7}, - {83, 7}, {53, 10}, {10, 8}, {119, 7}, {18, 8}, {34, 8}, {1, 7}, - {209, 12}, {209, 12}, {173, 7}, {148, 6}, {136, 8}, {79, 7}, {97, 7}, - {66, 6}, {197, 7}, {85, 7}, {57, 10}, {12, 8}, {121, 7}, {20, 8}, - {36, 8}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, {70, 6}, {127, 7}, - {24, 8}, {40, 8}, {4, 7}, {193, 6}, {82, 6}, {48, 8}, {8, 7}, - {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, {209, 12}, {209, 12}, - {209, 12}, {146, 4}, {209, 12}, {149, 4}, {161, 4}, {64, 4}, {209, 12}, - {160, 9}, {172, 9}, {147, 5}, {184, 9}, {150, 5}, {162, 5}, {65, 5}, - {196, 9}, {153, 5}, {165, 5}, {67, 5}, {177, 5}, {73, 5}, {91, 5}, - {64, 4}, {209, 12}, {209, 12}, {175, 9}, {148, 6}, {142, 10}, {81, 9}, - {99, 9}, {66, 6}, {199, 9}, {87, 9}, {105, 9}, {68, 6}, {123, 9}, - {74, 6}, {92, 6}, {64, 4}, {209, 12}, {157, 6}, {111, 9}, {70, 6}, - {129, 9}, {76, 6}, {94, 6}, {65, 5}, {193, 6}, {82, 6}, {100, 6}, - {67, 5}, {118, 6}, {73, 5}, {91, 5}, {0, 6}, {209, 12}, {209, 12}, - {209, 12}, {209, 12}, {190, 9}, {152, 7}, {164, 7}, {145, 3}, {202, 9}, - {89, 9}, {107, 9}, {69, 7}, {125, 9}, {75, 7}, {93, 7}, {64, 4}, - {209, 12}, {158, 7}, {113, 9}, {71, 7}, {131, 9}, {30, 10}, {46, 10}, - {7, 9}, {194, 7}, {83, 7}, {54, 10}, {11, 9}, {119, 7}, {19, 9}, - {35, 9}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, {148, 6}, {137, 9}, - {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, {58, 10}, {13, 9}, - {121, 7}, {21, 9}, {37, 9}, {2, 7}, {209, 12}, {157, 6}, {109, 7}, - {70, 6}, {127, 7}, {25, 9}, {41, 9}, {4, 7}, {193, 6}, {82, 6}, - {49, 9}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, {0, 6}, {209, 12}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {209, 12}, {145, 3}, - {205, 9}, {156, 8}, {168, 8}, {146, 4}, {180, 8}, {149, 4}, {161, 4}, - {64, 4}, {209, 12}, {159, 8}, {115, 9}, {72, 8}, {133, 9}, {78, 8}, - {96, 8}, {65, 5}, {195, 8}, {84, 8}, {102, 8}, {67, 5}, {120, 8}, - {73, 5}, {91, 5}, {64, 4}, {209, 12}, {209, 12}, {174, 8}, {148, 6}, - {139, 9}, {80, 8}, {98, 8}, {66, 6}, {198, 8}, {86, 8}, {60, 10}, - {14, 9}, {122, 8}, {22, 9}, {38, 9}, {3, 8}, {209, 12}, {157, 6}, - {110, 8}, {70, 6}, {128, 8}, {26, 9}, {42, 9}, {5, 8}, {193, 6}, - {82, 6}, {50, 9}, {9, 8}, {118, 6}, {17, 8}, {33, 8}, {0, 6}, - {209, 12}, {209, 12}, {209, 12}, {209, 12}, {189, 8}, {152, 7}, {164, 7}, - {145, 3}, {201, 8}, {88, 8}, {106, 8}, {69, 7}, {124, 8}, {75, 7}, - {93, 7}, {64, 4}, {209, 12}, {158, 7}, {112, 8}, {71, 7}, {130, 8}, - {28, 9}, {44, 9}, {6, 8}, {194, 7}, {83, 7}, {52, 9}, {10, 8}, - {119, 7}, {18, 8}, {34, 8}, {1, 7}, {209, 12}, {209, 12}, {173, 7}, - {148, 6}, {136, 8}, {79, 7}, {97, 7}, {66, 6}, {197, 7}, {85, 7}, - {56, 9}, {12, 8}, {121, 7}, {20, 8}, {36, 8}, {2, 7}, {209, 12}, - {157, 6}, {109, 7}, {70, 6}, {127, 7}, {24, 8}, {40, 8}, {4, 7}, - {193, 6}, {82, 6}, {48, 8}, {8, 7}, {118, 6}, {16, 7}, {32, 7}, - {0, 6}}; -} // namespace utf8_to_utf16 -} // namespace tables -} // unnamed namespace -} // namespace simdutf - -#endif // SIMDUTF_UTF8_TO_UTF16_TABLES_H -/* end file src/tables/utf8_to_utf16_tables.h */ -/* begin file src/tables/utf16_to_utf8_tables.h */ -// file generated by scripts/sse_convert_utf16_to_utf8.py -#ifndef SIMDUTF_UTF16_TO_UTF8_TABLES_H -#define SIMDUTF_UTF16_TO_UTF8_TABLES_H - -namespace simdutf { -namespace { -namespace tables { -namespace utf16_to_utf8 { - -// 1 byte for length, 16 bytes for mask -const uint8_t pack_1_2_utf8_bytes[256][17] = { - {16, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, - {15, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {15, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {15, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {15, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 7, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {15, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 6, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 6, 9, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 8, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 6, 9, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 8, 11, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 9, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 6, 8, 10, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {15, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80}, - {14, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {14, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 7, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 7, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 7, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 7, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 7, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {14, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80}, - {13, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 2, 5, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {13, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 4, 6, 9, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 8, 11, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 9, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 6, 8, 10, 13, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {13, 1, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80}, - {12, 0, 3, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 2, 5, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 5, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 5, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {12, 1, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 3, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 3, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 0, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 2, 4, 6, 9, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 6, 8, 11, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {11, 1, 0, 3, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 3, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 3, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 3, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 1, 0, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 2, 4, 6, 9, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 1, 0, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 0, 2, 4, 6, 8, 10, 12, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}}; - -// 1 byte for length, 16 bytes for mask -const uint8_t pack_1_2_3_utf8_bytes[256][17] = { - {12, 2, 3, 1, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80}, - {9, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {11, 3, 1, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 0, 6, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 2, 3, 1, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {11, 2, 3, 1, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 3, 1, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 7, 5, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 2, 3, 1, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 3, 1, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 4, 10, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 2, 3, 1, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 6, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 1, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 7, 5, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 4, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {11, 2, 3, 1, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 3, 1, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 6, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 2, 3, 1, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 7, 5, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 2, 3, 1, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 4, 11, 9, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 2, 3, 1, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 0, 6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 2, 3, 1, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 7, 5, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 4, 8, 14, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 6, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 1, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 7, 5, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 4, 10, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 2, 3, 1, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 6, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 2, 3, 1, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {2, 3, 1, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {5, 2, 3, 1, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 7, 5, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 2, 3, 1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 3, 1, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 0, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 6, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 3, 1, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 2, 3, 1, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 3, 1, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 0, 7, 5, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 2, 3, 1, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 4, 11, 9, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 3, 1, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 0, 6, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 2, 3, 1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 3, 1, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 0, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 2, 3, 1, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 7, 5, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 2, 3, 1, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 4, 8, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {11, 2, 3, 1, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 3, 1, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 6, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 2, 3, 1, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 7, 5, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 2, 3, 1, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 4, 10, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 6, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 2, 3, 1, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {4, 3, 1, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 0, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 2, 3, 1, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 7, 5, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 2, 3, 1, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 4, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {10, 2, 3, 1, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 0, 6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 2, 3, 1, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 7, 5, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 4, 11, 9, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 6, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 1, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 7, 5, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 4, 8, 15, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 2, 3, 1, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 3, 1, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 0, 6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 2, 3, 1, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 2, 3, 1, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 7, 5, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 2, 3, 1, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 4, 10, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 3, 1, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 0, 6, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 2, 3, 1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 3, 1, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 0, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 2, 3, 1, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 7, 5, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 2, 3, 1, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 4, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {9, 2, 3, 1, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 3, 1, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 6, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 1, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 2, 3, 1, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 7, 5, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 2, 3, 1, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 3, 1, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 4, 11, 9, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 2, 3, 1, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 3, 1, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 6, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 3, 1, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 3, 1, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 0, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 2, 3, 1, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 3, 1, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 0, 7, 5, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 2, 3, 1, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {3, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 3, 1, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}}; - -} // namespace utf16_to_utf8 -} // namespace tables -} // unnamed namespace -} // namespace simdutf - -#endif // SIMDUTF_UTF16_TO_UTF8_TABLES_H -/* end file src/tables/utf16_to_utf8_tables.h */ -/* begin file src/tables/utf32_to_utf16_tables.h */ -// file generated by scripts/sse_convert_utf32_to_utf16.py -#ifndef SIMDUTF_UTF32_TO_UTF16_TABLES_H -#define SIMDUTF_UTF32_TO_UTF16_TABLES_H - -namespace simdutf { -namespace { -namespace tables { -namespace utf32_to_utf16 { - -const uint8_t pack_utf32_to_utf16le[16][16] = { - {0, 1, 4, 5, 8, 9, 12, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 0x80}, - {0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 14, 15, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 0x80, 0x80}, - {0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, 0x80, 0x80, 0x80, 0x80}, - {0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, 0x80, 0x80}, - {0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0x80, 0x80}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, -}; - -const uint8_t pack_utf32_to_utf16be[16][16] = { - {1, 0, 5, 4, 9, 8, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 3, 2, 5, 4, 9, 8, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 5, 4, 7, 6, 9, 8, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 13, 12, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 5, 4, 9, 8, 11, 10, 13, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 3, 2, 5, 4, 9, 8, 11, 10, 13, 12, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 0x80, 0x80}, - {1, 0, 5, 4, 9, 8, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 3, 2, 5, 4, 9, 8, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 5, 4, 7, 6, 9, 8, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 13, 12, 15, 14, 0x80, 0x80}, - {1, 0, 5, 4, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80, 0x80, 0x80}, - {1, 0, 3, 2, 5, 4, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {1, 0, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 0x80, 0x80}, - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, -}; - -} // namespace utf32_to_utf16 -} // namespace tables -} // unnamed namespace -} // namespace simdutf - -#endif // SIMDUTF_UTF16_TO_UTF8_TABLES_H -/* end file src/tables/utf32_to_utf16_tables.h */ -// End of tables. - -// Implementations: they need to be setup before including -// scalar/* code, as the scalar code is sometimes enabled -// only for peculiar build targets. - -// The best choice should always come first! -/* begin file src/simdutf/arm64.h */ -#ifndef SIMDUTF_ARM64_H -#define SIMDUTF_ARM64_H - -#ifdef SIMDUTF_FALLBACK_H - #error "arm64.h must be included before fallback.h" -#endif - - -#ifndef SIMDUTF_IMPLEMENTATION_ARM64 - #define SIMDUTF_IMPLEMENTATION_ARM64 (SIMDUTF_IS_ARM64) -#endif -#if SIMDUTF_IMPLEMENTATION_ARM64 && SIMDUTF_IS_ARM64 - #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_ARM64 0 -#endif - - -#if SIMDUTF_IMPLEMENTATION_ARM64 - -namespace simdutf { -/** - * Implementation for NEON (ARMv8). - */ -namespace arm64 {} // namespace arm64 -} // namespace simdutf - -/* begin file src/simdutf/arm64/implementation.h */ -#ifndef SIMDUTF_ARM64_IMPLEMENTATION_H -#define SIMDUTF_ARM64_IMPLEMENTATION_H - - -namespace simdutf { -namespace arm64 { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("arm64", "ARM NEON", - internal::instruction_set::NEON) {} -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 -}; - -} // namespace arm64 -} // namespace simdutf - -#endif // SIMDUTF_ARM64_IMPLEMENTATION_H -/* end file src/simdutf/arm64/implementation.h */ - -/* begin file src/simdutf/arm64/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "arm64" -// #define SIMDUTF_IMPLEMENTATION arm64 -#define SIMDUTF_SIMD_HAS_BYTEMASK 1 -/* end file src/simdutf/arm64/begin.h */ - - // Declarations -/* begin file src/simdutf/arm64/intrinsics.h */ -#ifndef SIMDUTF_ARM64_INTRINSICS_H -#define SIMDUTF_ARM64_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -#endif // SIMDUTF_ARM64_INTRINSICS_H -/* end file src/simdutf/arm64/intrinsics.h */ -/* begin file src/simdutf/arm64/bitmanipulation.h */ -#ifndef SIMDUTF_ARM64_BITMANIPULATION_H -#define SIMDUTF_ARM64_BITMANIPULATION_H - -namespace simdutf { -namespace arm64 { -namespace { - -/* result might be undefined when input_num is zero */ -simdutf_really_inline int count_ones(uint64_t input_num) { - return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); -} - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; - #else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); - #endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} -#endif -template T clear_least_significant_bit(T x) { - return (x & (x - 1)); -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf - -#endif // SIMDUTF_ARM64_BITMANIPULATION_H -/* end file src/simdutf/arm64/bitmanipulation.h */ -/* begin file src/simdutf/arm64/simd.h */ -#ifndef SIMDUTF_ARM64_SIMD_H -#define SIMDUTF_ARM64_SIMD_H - -#include - -namespace simdutf { -namespace arm64 { -namespace { -namespace simd { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -namespace { - // Start of private section with Visual Studio workaround - - #ifndef simdutf_make_uint8x16_t - #define simdutf_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, \ - x11, x12, x13, x14, x15, x16) \ - ([=]() { \ - uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ - x9, x10, x11, x12, x13, x14, x15, x16}; \ - return vld1q_u8(array); \ - }()) - #endif - #ifndef simdutf_make_int8x16_t - #define simdutf_make_int8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, \ - x11, x12, x13, x14, x15, x16) \ - ([=]() { \ - int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ - x9, x10, x11, x12, x13, x14, x15, x16}; \ - return vld1q_s8(array); \ - }()) - #endif - - #ifndef simdutf_make_uint8x8_t - #define simdutf_make_uint8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - uint8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1_u8(array); \ - }()) - #endif - #ifndef simdutf_make_int8x8_t - #define simdutf_make_int8x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - int8_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1_s8(array); \ - }()) - #endif - #ifndef simdutf_make_uint16x8_t - #define simdutf_make_uint16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - uint16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1q_u16(array); \ - }()) - #endif - #ifndef simdutf_make_int16x8_t - #define simdutf_make_int16x8_t(x1, x2, x3, x4, x5, x6, x7, x8) \ - ([=]() { \ - int16_t array[8] = {x1, x2, x3, x4, x5, x6, x7, x8}; \ - return vld1q_s16(array); \ - }()) - #endif - -// End of private section with Visual Studio workaround -} // namespace -#endif // SIMDUTF_REGULAR_VISUAL_STUDIO - -template struct simd8; - -// -// Base class of simd8 and simd8, both of which use uint8x16_t -// internally. -// -template > struct base_u8 { - uint8x16_t value; - static const int SIZE = sizeof(value); - void dump() const { - uint8_t temp[16]; - vst1q_u8(temp, *this); - printf("[%04x, %04x, %04x, %04x, %04x, %04x, %04x, %04x,%04x, %04x, %04x, " - "%04x, %04x, %04x, %04x, %04x]\n", - temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], - temp[7], temp[8], temp[9], temp[10], temp[11], temp[12], temp[13], - temp[14], temp[15]); - } - // Conversion from/to SIMD register - simdutf_really_inline base_u8(const uint8x16_t _value) : value(_value) {} - simdutf_really_inline operator const uint8x16_t &() const { - return this->value; - } - - // Bit operations - simdutf_really_inline simd8 operator|(const simd8 other) const { - return vorrq_u8(*this, other); - } - simdutf_really_inline simd8 operator&(const simd8 other) const { - return vandq_u8(*this, other); - } - simdutf_really_inline simd8 operator^(const simd8 other) const { - return veorq_u8(*this, other); - } - simdutf_really_inline simd8 &operator|=(const simd8 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast | other; - return *this_cast; - } - - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return vceqq_u8(lhs, rhs); - } - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return vextq_u8(prev_chunk, *this, 16 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base_u8 { - static simdutf_really_inline simd8 splat(bool _value) { - return vmovq_n_u8(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8(const uint8x16_t _value) - : base_u8(_value) {} - // False constructor - simdutf_really_inline simd8() : simd8(vdupq_n_u8(0)) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} - simdutf_really_inline void store(uint8_t dst[16]) const { - return vst1q_u8(dst, *this); - } - - // We return uint32_t instead of uint16_t because that seems to be more - // efficient for most purposes (cutting it down to uint16_t costs performance - // in some compilers). - simdutf_really_inline uint32_t to_bitmask() const { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - auto minput = *this & bit_mask; - uint8x16_t tmp = vpaddq_u8(minput, minput); - tmp = vpaddq_u8(tmp, tmp); - tmp = vpaddq_u8(tmp, tmp); - return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); - } - - // Returns 4-bit out of each byte, alternating between the high 4 bits and low - // bits result it is 64 bit. This method is expected to be faster than none() - // and is equivalent when the vector register is the result of a comparison, - // with byte values 0xff and 0x00. - simdutf_really_inline uint64_t to_bitmask64() const { - return vget_lane_u64( - vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); - } -}; - -// Unsigned bytes -template <> struct simd8 : base_u8 { - static simdutf_really_inline simd8 splat(uint8_t _value) { - return vmovq_n_u8(_value); - } - static simdutf_really_inline simd8 zero() { return vdupq_n_u8(0); } - static simdutf_really_inline simd8 load(const uint8_t *values) { - return vld1q_u8(values); - } - simdutf_really_inline simd8(const uint8x16_t _value) - : base_u8(_value) {} - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8(simdutf_make_uint8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15)) {} -#else - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8(uint8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} -#endif - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Store to array - simdutf_really_inline void store(uint8_t dst[16]) const { - return vst1q_u8(dst, *this); - } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 - operator-(const simd8 other) const { - return vsubq_u8(*this, other); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *this; - } - - // Order-specific operations - simdutf_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return vcgeq_u8(*this, other); - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return vcgtq_u8(*this, other); - } - // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true - // = nonzero. For ARM, returns all 1's. - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return simd8(*this > other); - } - - // Bit-specific operations - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return vtstq_u8(*this, bits); - } - - simdutf_really_inline bool is_ascii() const { - return this->max_val() < 0b10000000u; - } - - simdutf_really_inline bool any_bits_set_anywhere() const { - return this->max_val() != 0; - } - template simdutf_really_inline simd8 shr() const { - return vshrq_n_u8(*this, N); - } - simdutf_really_inline uint16_t sum_bytes() const { return vaddvq_u8(*this); } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - return vqtbl1q_u8(*this, simd8(original)); - } -}; - -// Signed bytes -template <> struct simd8 { - int8x16_t value; - static const int SIZE = sizeof(value); - - static simdutf_really_inline simd8 splat(int8_t _value) { - return vmovq_n_s8(_value); - } - static simdutf_really_inline simd8 zero() { return vdupq_n_s8(0); } - static simdutf_really_inline simd8 load(const int8_t values[16]) { - return vld1q_s8(values); - } - - // Use ST2 instead of UXTL+UXTL2 to interleave zeroes. UXTL is actually a - // USHLL #0, and shifting in NEON is actually quite slow. - // - // While this needs the registers to be in a specific order, bigger cores can - // interleave these with no overhead, and it still performs decently on little - // cores. - // movi v1.3d, #0 - // mov v0.16b, value[0] - // st2 {v0.16b, v1.16b}, [ptr], #32 - // mov v0.16b, value[1] - // st2 {v0.16b, v1.16b}, [ptr], #32 - // ... - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { - int8x16x2_t pair = match_system(big_endian) - ? int8x16x2_t{{this->value, vmovq_n_s8(0)}} - : int8x16x2_t{{vmovq_n_s8(0), this->value}}; - vst2q_s8(reinterpret_cast(p), pair); - } - - // In places where the table can be reused, which is most uses in simdutf, it - // is worth it to do 4 table lookups, as there is no direct zero extension - // from u8 to u32. - simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { - const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, - 2, 255, 255, 255, 3, 255, 255, 255}; - const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, - 6, 255, 255, 255, 7, 255, 255, 255}; - const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, - 10, 255, 255, 255, 11, 255, 255, 255}; - const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, - 14, 255, 255, 255, 15, 255, 255, 255}; - - // encourage store pairing and interleaving - const auto shuf1 = this->apply_lookup_16_to(tb1); - const auto shuf2 = this->apply_lookup_16_to(tb2); - shuf1.store(reinterpret_cast(p)); - shuf2.store(reinterpret_cast(p + 4)); - - const auto shuf3 = this->apply_lookup_16_to(tb3); - const auto shuf4 = this->apply_lookup_16_to(tb4); - shuf3.store(reinterpret_cast(p + 8)); - shuf4.store(reinterpret_cast(p + 12)); - } - // Conversion from/to SIMD register - simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {} - simdutf_really_inline operator const int8x16_t &() const { - return this->value; - } -#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline operator const uint8x16_t() const { - return vreinterpretq_u8_s8(this->value); - } -#endif - simdutf_really_inline operator int8x16_t &() { return this->value; } - - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8(simdutf_make_int8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, - v10, v11, v12, v13, v14, v15)) {} -#else - simdutf_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8(int8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} -#endif - - // Store to array - simdutf_really_inline void store(int8_t dst[16]) const { - return vst1q_s8(dst, value); - } - // Explicit conversion to/from unsigned - // - // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same - // type. In theory, we could check this occurrence with std::same_as and - // std::enabled_if but it is C++14 and relatively ugly and hard to read. -#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline explicit simd8(const uint8x16_t other) - : simd8(vreinterpretq_s8_u8(other)) {} -#endif - simdutf_really_inline operator simd8() const { - return vreinterpretq_u8_s8(this->value); - } - - simdutf_really_inline simd8 - operator|(const simd8 other) const { - return vorrq_s8(value, other.value); - } - - simdutf_really_inline int8_t max_val() const { return vmaxvq_s8(value); } - simdutf_really_inline int8_t min_val() const { return vminvq_s8(value); } - simdutf_really_inline bool is_ascii() const { return this->min_val() >= 0; } - - // Order-sensitive comparisons - simdutf_really_inline simd8 operator>(const simd8 other) const { - return vcgtq_s8(value, other.value); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return vcltq_s8(value, other.value); - } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - return vqtbl1q_s8(*this, simd8(original)); - } -}; - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "ARM kernel should use four registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - this->chunks[2].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 2); - this->chunks[3].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 3); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); - this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); - this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - // Add each of the elements next to each other, successively, to stuff each - // 8 byte mask into one. - uint8x16_t sum0 = - vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[0]), bit_mask), - vandq_u8(uint8x16_t(this->chunks[1]), bit_mask)); - uint8x16_t sum1 = - vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[2]), bit_mask), - vandq_u8(uint8x16_t(this->chunks[3]), bit_mask)); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); - } - - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(simd8(uint8x16_t(this->chunks[0])) >= mask, - simd8(uint8x16_t(this->chunks[1])) >= mask, - simd8(uint8x16_t(this->chunks[2])) >= mask, - simd8(uint8x16_t(this->chunks[3])) >= mask) - .to_bitmask(); - } -}; // struct simd8x64 -/* begin file src/simdutf/arm64/simd16-inl.h */ -template struct simd16; - -template > struct base_u16 { - uint16x8_t value; - /// the size of vector in bytes - static const int SIZE = sizeof(value); - /// the number of elements of type T a vector can hold - static const int ELEMENTS = SIZE / sizeof(T); - // Conversion from/to SIMD register - simdutf_really_inline base_u16() = default; - simdutf_really_inline base_u16(const uint16x8_t _value) : value(_value) {} - simdutf_really_inline operator const uint16x8_t &() const { - return this->value; - } - simdutf_really_inline operator uint16x8_t &() { return this->value; } - // Bit operations - simdutf_really_inline simd16 operator|(const simd16 other) const { - return vorrq_u16(*this, other); - } - simdutf_really_inline simd16 operator&(const simd16 other) const { - return vandq_u16(*this, other); - } - simdutf_really_inline simd16 operator^(const simd16 other) const { - return veorq_u16(*this, other); - } - simdutf_really_inline simd16 bit_andnot(const simd16 other) const { - return vbicq_u16(*this, other); - } - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - simdutf_really_inline simd16 &operator|=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdutf_really_inline simd16 &operator&=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdutf_really_inline simd16 &operator^=(const simd16 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } - - friend simdutf_really_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return vceqq_u16(lhs, rhs); - } - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return vextq_u18(prev_chunk, *this, 8 - N); - } -}; - -template > -struct base16 : base_u16 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdutf_really_inline base16() : base_u16() {} - simdutf_really_inline base16(const uint16x8_t _value) : base_u16(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) : base16(vld1q_u16(ptr)) {} - - static const int SIZE = sizeof(base_u16::value); - void dump() const { - uint16_t temp[8]; - vst1q_u16(temp, *this); - printf("[%04x, %04x, %04x, %04x, %04x, %04x, %04x, %04x]\n", temp[0], - temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], temp[7]); - } - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return vextq_u18(prev_chunk, *this, 8 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return vmovq_n_u16(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const uint16x8_t _value) - : base16(_value) {} - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return vmovq_n_u16(_value); - } - static simdutf_really_inline simd16 zero() { return vdupq_n_u16(0); } - static simdutf_really_inline simd16 load(const T values[8]) { - return vld1q_u16(reinterpret_cast(values)); - } - - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const uint16x8_t _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return vst1q_u16(dst, *this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd16 operator+(const simd16 other) const { - return vaddq_u16(*this, other); - } - simdutf_really_inline simd16 operator-(const simd16 other) const { - return vsubq_u16(*this, other); - } - simdutf_really_inline simd16 &operator+=(const simd16 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdutf_really_inline simd16 &operator-=(const simd16 other) { - *this = *this - other; - return *static_cast *>(this); - } -}; - -// Signed code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} -#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO - simdutf_really_inline simd16(const uint16x8_t _value) - : base16_numeric(_value) {} -#endif - simdutf_really_inline simd16(const int16x8_t _value) - : base16_numeric(vreinterpretq_u16_s16(_value)) {} - - // Splat constructor - simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const int16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - simdutf_really_inline operator simd16() const; - simdutf_really_inline operator const uint16x8_t &() const { - return this->value; - } - simdutf_really_inline operator const int16x8_t() const { - return vreinterpretq_s16_u16(this->value); - } - - simdutf_really_inline int16_t max_val() const { - return vmaxvq_s16(vreinterpretq_s16_u16(this->value)); - } - simdutf_really_inline int16_t min_val() const { - return vminvq_s16(vreinterpretq_s16_u16(this->value)); - } - // Order-sensitive comparisons - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return vmaxq_s16(vreinterpretq_s16_u16(this->value), - vreinterpretq_s16_u16(other.value)); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return vmaxq_s16(vreinterpretq_s16_u16(this->value), - vreinterpretq_s16_u16(other.value)); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return vcgtq_s16(vreinterpretq_s16_u16(this->value), - vreinterpretq_s16_u16(other.value)); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return vcltq_s16(vreinterpretq_s16_u16(this->value), - vreinterpretq_s16_u16(other.value)); - } -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const uint16x8_t _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - simdutf_really_inline int16_t max_val() const { return vmaxvq_u16(*this); } - simdutf_really_inline int16_t min_val() const { return vminvq_u16(*this); } - // Saturated math - simdutf_really_inline simd16 - saturating_add(const simd16 other) const { - return vqaddq_u16(*this, other); - } - simdutf_really_inline simd16 - saturating_sub(const simd16 other) const { - return vqsubq_u16(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return vmaxq_u16(*this, other); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return vminq_u16(*this, other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - gt_bits(const simd16 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - lt_bits(const simd16 other) const { - return other.saturating_sub(*this); - } - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return vcleq_u16(*this, other); - } - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return vcgeq_u16(*this, other); - } - simdutf_really_inline simd16 - operator>(const simd16 other) const { - return vcgtq_u16(*this, other); - } - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return vcltq_u16(*this, other); - } - - // Bit-specific operations - simdutf_really_inline simd16 bits_not_set() const { - return *this == uint16_t(0); - } - template simdutf_really_inline simd16 shr() const { - return simd16(vshrq_n_u16(*this, N)); - } - template simdutf_really_inline simd16 shl() const { - return simd16(vshlq_n_u16(*this, N)); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - return vqmovn_high_u16(vqmovn_u16(v0), v1); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(*this))); - } - - void dump() const { - uint16_t temp[8]; - vst1q_u16(temp, *this); - printf("[%04x, %04x, %04x, %04x, %04x, %04x, %04x, %04x]\n", temp[0], - temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], temp[7]); - } - - simdutf_really_inline uint32_t sum() const { return vaddlvq_u16(value); } -}; - -simdutf_really_inline simd16::operator simd16() const { - return this->value; -} - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 4, - "ARM kernel should use four registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline - simd16x32(const simd16 chunk0, const simd16 chunk1, - const simd16 chunk2, const simd16 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); - } - - simdutf_really_inline simd16 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); - this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); - this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - // Add each of the elements next to each other, successively, to stuff each - // 8 byte mask into one. - uint8x16_t sum0 = vpaddq_u8( - vreinterpretq_u8_u16(this->chunks[0] & vreinterpretq_u16_u8(bit_mask)), - vreinterpretq_u8_u16(this->chunks[1] & vreinterpretq_u16_u8(bit_mask))); - uint8x16_t sum1 = vpaddq_u8( - vreinterpretq_u8_u16(this->chunks[2] & vreinterpretq_u16_u8(bit_mask)), - vreinterpretq_u8_u16(this->chunks[3] & vreinterpretq_u16_u8(bit_mask))); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - this->chunks[2] = this->chunks[2].swap_bytes(); - this->chunks[3] = this->chunks[3].swap_bytes(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - return simd16x32( - (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low), - (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low), - (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low), - (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low)) - .to_bitmask(); - } -}; // struct simd16x32 -template <> -simdutf_really_inline uint64_t simd16x32::not_in_range( - const uint16_t low, const uint16_t high) const { - const simd16 mask_low = simd16::splat(low); - const simd16 mask_high = simd16::splat(high); - simd16x32 x(simd16((this->chunks[0] > mask_high) | - (this->chunks[0] < mask_low)), - simd16((this->chunks[1] > mask_high) | - (this->chunks[1] < mask_low)), - simd16((this->chunks[2] > mask_high) | - (this->chunks[2] < mask_low)), - simd16((this->chunks[3] > mask_high) | - (this->chunks[3] < mask_low))); - return x.to_bitmask(); -} - -simdutf_really_inline simd16 min(const simd16 a, - simd16 b) { - return vminq_u16(a.value, b.value); -} -/* end file src/simdutf/arm64/simd16-inl.h */ -/* begin file src/simdutf/arm64/simd32-inl.h */ -template struct simd32; - -template <> struct simd32 { - static const size_t SIZE = sizeof(uint32x4_t); - static const size_t ELEMENTS = SIZE / sizeof(uint32_t); - - uint32x4_t value; - - simdutf_really_inline simd32(const uint32x4_t v) : value(v) {} - - template - simdutf_really_inline simd32(const Pointer *ptr) - : value(vld1q_u32(reinterpret_cast(ptr))) {} - - simdutf_really_inline uint64_t sum() const { return vaddvq_u32(value); } - - simdutf_really_inline simd32 swap_bytes() const { - return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(value))); - } - - template simdutf_really_inline simd32 shr() const { - return vshrq_n_u32(value, N); - } - - template simdutf_really_inline simd32 shl() const { - return vshlq_n_u32(value, N); - } - - void dump() const { - uint32_t temp[4]; - vst1q_u32(temp, value); - printf("[%08x, %08x, %08x, %08x]\n", temp[0], temp[1], temp[2], temp[3]); - } - - // operators - simdutf_really_inline simd32 &operator+=(const simd32 other) { - value = vaddq_u32(value, other.value); - return *this; - } - - // static members - simdutf_really_inline static simd32 zero() { - return vdupq_n_u32(0); - } - - simdutf_really_inline static simd32 splat(uint32_t v) { - return vdupq_n_u32(v); - } -}; - -//---------------------------------------------------------------------- - -template <> struct simd32 { - uint32x4_t value; - - simdutf_really_inline simd32(const uint32x4_t v) : value(v) {} - - simdutf_really_inline bool any() const { return vmaxvq_u32(value) != 0; } -}; - -//---------------------------------------------------------------------- - -template -simdutf_really_inline simd32 operator|(const simd32 a, - const simd32 b) { - return vorrq_u32(a.value, b.value); -} - -simdutf_really_inline simd32 min(const simd32 a, - const simd32 b) { - return vminq_u32(a.value, b.value); -} - -simdutf_really_inline simd32 max(const simd32 a, - const simd32 b) { - return vmaxq_u32(a.value, b.value); -} - -simdutf_really_inline simd32 operator==(const simd32 a, - uint32_t b) { - return vceqq_u32(a.value, vdupq_n_u32(b)); -} - -simdutf_really_inline simd32 operator&(const simd32 a, - const simd32 b) { - return vandq_u32(a.value, b.value); -} - -simdutf_really_inline simd32 operator&(const simd32 a, - uint32_t b) { - return vandq_u32(a.value, vdupq_n_u32(b)); -} - -simdutf_really_inline simd32 operator|(const simd32 a, - uint32_t b) { - return vorrq_u32(a.value, vdupq_n_u32(b)); -} - -simdutf_really_inline simd32 operator+(const simd32 a, - const simd32 b) { - return vaddq_u32(a.value, b.value); -} - -simdutf_really_inline simd32 operator-(const simd32 a, - uint32_t b) { - return vsubq_u32(a.value, vdupq_n_u32(b)); -} - -simdutf_really_inline simd32 operator>=(const simd32 a, - const simd32 b) { - return vcgeq_u32(a.value, b.value); -} - -simdutf_really_inline simd32 operator!(const simd32 v) { - return vmvnq_u32(v.value); -} - -simdutf_really_inline simd32 operator>(const simd32 a, - const simd32 b) { - return vcgtq_u32(a.value, b.value); -} - -simdutf_really_inline simd32 select(const simd32 cond, - const simd32 v_true, - const simd32 v_false) { - return vbslq_u32(cond.value, v_true.value, v_false.value); -} -/* end file src/simdutf/arm64/simd32-inl.h */ -/* begin file src/simdutf/arm64/simd64-inl.h */ -template struct simd64; - -template <> struct simd64 { - uint64x2_t value; - - simdutf_really_inline simd64(const uint64x2_t v) : value(v) {} - - template - simdutf_really_inline simd64(const Pointer *ptr) - : value(vld1q_u64(reinterpret_cast(ptr))) {} - - simdutf_really_inline uint64_t sum() const { return vaddvq_u64(value); } - - // operators - simdutf_really_inline simd64 &operator+=(const simd64 other) { - value = vaddq_u64(value, other.value); - return *this; - } - - // static members - simdutf_really_inline static simd64 zero() { - return vdupq_n_u64(0); - } - - simdutf_really_inline static simd64 splat(uint64_t v) { - return vdupq_n_u64(v); - } -}; -/* end file src/simdutf/arm64/simd64-inl.h */ - -simdutf_really_inline simd64 sum_8bytes(const simd8 v) { - // We do it as 3 instructions. There might be a faster way. - // We hope that these 3 instructions are cheap. - uint16x8_t first_sum = vpaddlq_u8(v); - uint32x4_t second_sum = vpaddlq_u16(first_sum); - return vpaddlq_u32(second_sum); -} - -} // namespace simd -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf - -#endif // SIMDUTF_ARM64_SIMD_H -/* end file src/simdutf/arm64/simd.h */ - -/* begin file src/simdutf/arm64/end.h */ -#undef SIMDUTF_SIMD_HAS_BYTEMASK -/* end file src/simdutf/arm64/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_ARM64 - -#endif // SIMDUTF_ARM64_H -/* end file src/simdutf/arm64.h */ -/* begin file src/simdutf/icelake.h */ -#ifndef SIMDUTF_ICELAKE_H -#define SIMDUTF_ICELAKE_H - - -#ifdef __has_include - // How do we detect that a compiler supports vbmi2? - // For sure if the following header is found, we are ok? - #if __has_include() - #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 - #endif -#endif - -#ifdef _MSC_VER - #if _MSC_VER >= 1930 - // Visual Studio 2022 and up support VBMI2 under x64 even if the header - // avx512vbmi2intrin.h is not found. - // Visual Studio 2019 technically supports VBMI2, but the implementation - // might be unreliable. Search for visualstudio2019icelakeissue in our - // tests. - #define SIMDUTF_COMPILER_SUPPORTS_VBMI2 1 - #endif -#endif - -// We allow icelake on x64 as long as the compiler is known to support VBMI2. -#ifndef SIMDUTF_IMPLEMENTATION_ICELAKE - #define SIMDUTF_IMPLEMENTATION_ICELAKE \ - ((SIMDUTF_IS_X86_64) && (SIMDUTF_COMPILER_SUPPORTS_VBMI2)) -#endif - -// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdutf/simdutf/issues/1247 -#if ((SIMDUTF_IMPLEMENTATION_ICELAKE) && (SIMDUTF_IS_X86_64) && (__AVX2__) && \ - (SIMDUTF_HAS_AVX512F && SIMDUTF_HAS_AVX512DQ && SIMDUTF_HAS_AVX512VL && \ - SIMDUTF_HAS_AVX512VBMI2) && \ - (!SIMDUTF_IS_32BITS)) - #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE 0 -#endif - -#if SIMDUTF_IMPLEMENTATION_ICELAKE - #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE - #define SIMDUTF_TARGET_ICELAKE - #else - #define SIMDUTF_TARGET_ICELAKE \ - SIMDUTF_TARGET_REGION( \ - "avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2," \ - "avx512vl,avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512vpopcntdq") - #endif - -namespace simdutf { -namespace icelake {} // namespace icelake -} // namespace simdutf - - // - // These two need to be included outside SIMDUTF_TARGET_REGION - // -/* begin file src/simdutf/icelake/intrinsics.h */ -#ifndef SIMDUTF_ICELAKE_INTRINSICS_H -#define SIMDUTF_ICELAKE_INTRINSICS_H - - -#ifdef SIMDUTF_VISUAL_STUDIO - // under clang within visual studio, this will include - #include // visual studio or clang - #include -#else - - #if SIMDUTF_GCC11ORMORE -// We should not get warnings while including yet we do -// under some versions of GCC. -// If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnings. -SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) - #endif - - #include // elsewhere - - #if SIMDUTF_GCC11ORMORE -// cancels the suppression of the -Wuninitialized -SIMDUTF_POP_DISABLE_WARNINGS - #endif - - #ifndef _tzcnt_u64 - #define _tzcnt_u64(x) __tzcnt_u64(x) - #endif // _tzcnt_u64 -#endif // SIMDUTF_VISUAL_STUDIO - -#ifdef SIMDUTF_CLANG_VISUAL_STUDIO - /** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdutf, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ - #include // for _blsr_u64 - #include // for _pext_u64, _pdep_u64 - #include // for __lzcnt64 - #include // for most things (AVX2, AVX512, _popcnt64) - #include - #include - #include - #include - // Important: we need the AVX-512 headers: - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - // unfortunately, we may not get _blsr_u64, but, thankfully, clang - // has it as a macro. - #ifndef _blsr_u64 - // we roll our own - #define _blsr_u64(n) ((n - 1) & n) - #endif // _blsr_u64 -#endif // SIMDUTF_CLANG_VISUAL_STUDIO - -#if defined(__GNUC__) && !defined(__clang__) - - #if __GNUC__ == 8 - #define SIMDUTF_GCC8 1 - #elif __GNUC__ == 9 - #define SIMDUTF_GCC9 1 - #endif // __GNUC__ == 8 || __GNUC__ == 9 - -#endif // defined(__GNUC__) && !defined(__clang__) - -#if SIMDUTF_GCC8 - #pragma GCC push_options - #pragma GCC target("avx512f") -/** - * GCC 8 fails to provide _mm512_set_epi8. We roll our own. - */ -inline __m512i -_mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, - uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, - uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, - uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, - uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, - uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, - uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, - uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, - uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, - uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, - uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, - uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, - uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { - return _mm512_set_epi64( - uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + - (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + - (uint64_t(a1) << 48) + (uint64_t(a0) << 56), - uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + - (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + - (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), - uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + - (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + - (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), - uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + - (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + - (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), - uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + - (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + - (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), - uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + - (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + - (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), - uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + - (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + - (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), - uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + - (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + - (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + - (uint64_t(a56) << 56)); -} - #pragma GCC pop_options -#endif // SIMDUTF_GCC8 - -#endif // SIMDUTF_HASWELL_INTRINSICS_H -/* end file src/simdutf/icelake/intrinsics.h */ -/* begin file src/simdutf/icelake/implementation.h */ -#ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H -#define SIMDUTF_ICELAKE_IMPLEMENTATION_H - - -namespace simdutf { -namespace icelake { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation( - "icelake", - "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 " - "extensions)", - internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | - internal::instruction_set::BMI2 | - internal::instruction_set::AVX512BW | - internal::instruction_set::AVX512CD | - internal::instruction_set::AVX512VL | - internal::instruction_set::AVX512VBMI2 | - internal::instruction_set::AVX512VPOPCNTDQ) {} - -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 -}; - -} // namespace icelake -} // namespace simdutf - -#endif // SIMDUTF_ICELAKE_IMPLEMENTATION_H -/* end file src/simdutf/icelake/implementation.h */ - - // - // The rest need to be inside the region - // -/* begin file src/simdutf/icelake/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "icelake" -// #define SIMDUTF_IMPLEMENTATION icelake - -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE -// nothing needed. -#else -SIMDUTF_TARGET_ICELAKE -#endif - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -// clang-format off -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -// clang-format on -#endif // end of workaround -/* end file src/simdutf/icelake/begin.h */ - // Declarations -/* begin file src/simdutf/icelake/bitmanipulation.h */ -#ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H -#define SIMDUTF_ICELAKE_BITMANIPULATION_H - -namespace simdutf { -namespace icelake { -namespace { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdutf_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -#if SIMDUTF_NEED_TRAILING_ZEROES -// simdutf_really_inline int trailing_zeroes(uint64_t input_num) { -// #if SIMDUTF_REGULAR_VISUAL_STUDIO -// return (int)_tzcnt_u64(input_num); -// #else // SIMDUTF_REGULAR_VISUAL_STUDIO -// return __builtin_ctzll(input_num); -// #endif // SIMDUTF_REGULAR_VISUAL_STUDIO -// } -#endif - -} // unnamed namespace -} // namespace icelake -} // namespace simdutf - -#endif // SIMDUTF_ICELAKE_BITMANIPULATION_H -/* end file src/simdutf/icelake/bitmanipulation.h */ -/* begin file src/simdutf/icelake/simd.h */ -#ifndef SIMDUTF_ICELAKE_SIMD_H -#define SIMDUTF_ICELAKE_SIMD_H - -namespace simdutf { -namespace icelake { -namespace { -namespace simd { - -/* begin file src/simdutf/icelake/simd16-inl.h */ -template struct simd16; - -template <> struct simd16 { - static const size_t SIZE = sizeof(__m512i); - static const size_t ELEMENTS = SIZE / sizeof(uint16_t); - - template - static simdutf_really_inline simd16 load(const Pointer *ptr) { - return simd16(ptr); - } - - __m512i value; - - simdutf_really_inline simd16(const __m512i v) : value(v) {} - - template - simdutf_really_inline simd16(const Pointer *ptr) - : value(_mm512_loadu_si512(reinterpret_cast(ptr))) {} - - // operators - simdutf_really_inline simd16 &operator+=(const simd16 other) { - value = _mm512_add_epi32(value, other.value); - return *this; - } - - simdutf_really_inline simd16 &operator-=(const simd16 other) { - value = _mm512_sub_epi32(value, other.value); - return *this; - } - - // methods - simdutf_really_inline simd16 swap_bytes() const { - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - - return _mm512_shuffle_epi8(value, byteflip); - } - - simdutf_really_inline uint64_t sum() const { - const auto lo = _mm512_and_si512(value, _mm512_set1_epi32(0xffff)); - const auto hi = _mm512_srli_epi32(value, 16); - const auto sum32 = _mm512_add_epi32(lo, hi); - - return _mm512_reduce_add_epi32(sum32); - } - - // static members - simdutf_really_inline static simd16 zero() { - return _mm512_setzero_si512(); - } - - simdutf_really_inline static simd16 splat(uint16_t v) { - return _mm512_set1_epi16(v); - } -}; - -template <> struct simd16 { - __mmask32 value; - - simdutf_really_inline simd16(const __mmask32 v) : value(v) {} -}; - -// ------------------------------------------------------------ - -simdutf_really_inline simd16 min(const simd16 b, - const simd16 a) { - return _mm512_min_epu16(a.value, b.value); -} - -simdutf_really_inline simd16 operator&(const simd16 a, - uint16_t b) { - return _mm512_and_si512(a.value, _mm512_set1_epi16(b)); -} - -simdutf_really_inline simd16 operator^(const simd16 a, - uint16_t b) { - return _mm512_xor_si512(a.value, _mm512_set1_epi16(b)); -} - -simdutf_really_inline simd16 operator^(const simd16 a, - const simd16 b) { - return _mm512_xor_si512(a.value, b.value); -} - -simdutf_really_inline simd16 operator==(const simd16 a, - uint16_t b) { - return _mm512_cmpeq_epi16_mask(a.value, _mm512_set1_epi16(b)); -} -/* end file src/simdutf/icelake/simd16-inl.h */ -/* begin file src/simdutf/icelake/simd32-inl.h */ -template struct simd32; - -template <> struct simd32 { - static const size_t SIZE = sizeof(__m512i); - static const size_t ELEMENTS = SIZE / sizeof(uint32_t); - - __m512i value; - - simdutf_really_inline simd32(const __m512i v) : value(v) {} - - template - simdutf_really_inline simd32(const Pointer *ptr) - : value(_mm512_loadu_si512(reinterpret_cast(ptr))) {} - - uint64_t sum() const { - const __m512i mask = _mm512_set1_epi64(0xffffffff); - const __m512i t0 = _mm512_and_si512(value, mask); - const __m512i t1 = _mm512_srli_epi64(value, 32); - const __m512i t2 = _mm512_add_epi64(t0, t1); - return _mm512_reduce_add_epi64(t2); - } - - // operators - simdutf_really_inline simd32 &operator+=(const simd32 other) { - value = _mm512_add_epi32(value, other.value); - return *this; - } - - // static members - simdutf_really_inline static simd32 zero() { - return _mm512_setzero_si512(); - } - - simdutf_really_inline static simd32 splat(uint32_t v) { - return _mm512_set1_epi32(v); - } -}; - -simdutf_really_inline simd32 min(const simd32 b, - const simd32 a) { - return _mm512_min_epu32(a.value, b.value); -} - -simdutf_really_inline simd32 operator&(const simd32 b, - const simd32 a) { - return _mm512_and_si512(a.value, b.value); -} -/* end file src/simdutf/icelake/simd32-inl.h */ - -} // namespace simd -} // unnamed namespace -} // namespace icelake -} // namespace simdutf - -#endif // SIMDUTF_ICELAKE_SIMD_H -/* end file src/simdutf/icelake/simd.h */ - -/* begin file src/simdutf/icelake/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/icelake/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_ICELAKE -#endif // SIMDUTF_ICELAKE_H -/* end file src/simdutf/icelake.h */ -/* begin file src/simdutf/haswell.h */ -#ifndef SIMDUTF_HASWELL_H -#define SIMDUTF_HASWELL_H - -#ifdef SIMDUTF_WESTMERE_H - #error "haswell.h must be included before westmere.h" -#endif -#ifdef SIMDUTF_FALLBACK_H - #error "haswell.h must be included before fallback.h" -#endif - - -// Default Haswell to on if this is x86-64. Even if we are not compiled for it, -// it could be selected at runtime. -#ifndef SIMDUTF_IMPLEMENTATION_HASWELL - // - // You do not want to restrict it like so: SIMDUTF_IS_X86_64 && __AVX2__ - // because we want to rely on *runtime dispatch*. - // - #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE - #define SIMDUTF_IMPLEMENTATION_HASWELL 0 - #else - #define SIMDUTF_IMPLEMENTATION_HASWELL (SIMDUTF_IS_X86_64) - #endif - -#endif -// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdutf/simdutf/issues/1247 -#if ((SIMDUTF_IMPLEMENTATION_HASWELL) && (SIMDUTF_IS_X86_64) && (__AVX2__)) - #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL 0 -#endif - -#if SIMDUTF_IMPLEMENTATION_HASWELL - - #define SIMDUTF_TARGET_HASWELL SIMDUTF_TARGET_REGION("avx2,bmi,lzcnt,popcnt") - -namespace simdutf { -/** - * Implementation for Haswell (Intel AVX2). - */ -namespace haswell {} // namespace haswell -} // namespace simdutf - - // - // These two need to be included outside SIMDUTF_TARGET_REGION - // -/* begin file src/simdutf/haswell/implementation.h */ -#ifndef SIMDUTF_HASWELL_IMPLEMENTATION_H -#define SIMDUTF_HASWELL_IMPLEMENTATION_H - - -// The constructor may be executed on any host, so we take care not to use -// SIMDUTF_TARGET_REGION -namespace simdutf { -namespace haswell { - -using namespace simdutf; - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("haswell", "Intel/AMD AVX2", - internal::instruction_set::AVX2 | - internal::instruction_set::BMI1 | - internal::instruction_set::BMI2) {} - -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 -}; - -} // namespace haswell -} // namespace simdutf - -#endif // SIMDUTF_HASWELL_IMPLEMENTATION_H -/* end file src/simdutf/haswell/implementation.h */ -/* begin file src/simdutf/haswell/intrinsics.h */ -#ifndef SIMDUTF_HASWELL_INTRINSICS_H -#define SIMDUTF_HASWELL_INTRINSICS_H - - -#ifdef SIMDUTF_VISUAL_STUDIO - // under clang within visual studio, this will include - #include // visual studio or clang -#else - - #if SIMDUTF_GCC11ORMORE -// We should not get warnings while including yet we do -// under some versions of GCC. -// If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnings. -SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) - #endif - - #include // elsewhere - - #if SIMDUTF_GCC11ORMORE -// cancels the suppression of the -Wuninitialized -SIMDUTF_POP_DISABLE_WARNINGS - #endif - -#endif // SIMDUTF_VISUAL_STUDIO - -#ifdef SIMDUTF_CLANG_VISUAL_STUDIO - /** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdutf, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ - #include // for _blsr_u64 - #include // for __lzcnt64 - #include // for most things (AVX2, AVX512, _popcnt64) - #include - #include - #include - #include - // unfortunately, we may not get _blsr_u64, but, thankfully, clang - // has it as a macro. - #ifndef _blsr_u64 - // we roll our own - #define _blsr_u64(n) (((n) - 1) & (n)) - #endif // _blsr_u64 - // Same issue with _blsmsk_u32: - #ifndef _blsmsk_u32 - // we roll our own - #define _blsmsk_u32(n) (((n) - 1) ^ (n)) - #endif // _blsmsk_u32 -#endif // SIMDUTF_CLANG_VISUAL_STUDIO - -#endif // SIMDUTF_HASWELL_INTRINSICS_H -/* end file src/simdutf/haswell/intrinsics.h */ - - // - // The rest need to be inside the region - // -/* begin file src/simdutf/haswell/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "haswell" -// #define SIMDUTF_IMPLEMENTATION haswell -#define SIMDUTF_SIMD_HAS_BYTEMASK 1 - -#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL -// nothing needed. -#else -SIMDUTF_TARGET_HASWELL -#endif - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -// clang-format off -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -// clang-format on -#endif // end of workaround -/* end file src/simdutf/haswell/begin.h */ - // Declarations -/* begin file src/simdutf/haswell/bitmanipulation.h */ -#ifndef SIMDUTF_HASWELL_BITMANIPULATION_H -#define SIMDUTF_HASWELL_BITMANIPULATION_H - -namespace simdutf { -namespace haswell { -namespace { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdutf_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - #if SIMDUTF_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); - #else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); - #endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} -#endif - -template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } - -} // unnamed namespace -} // namespace haswell -} // namespace simdutf - -#endif // SIMDUTF_HASWELL_BITMANIPULATION_H -/* end file src/simdutf/haswell/bitmanipulation.h */ -/* begin file src/simdutf/haswell/simd.h */ -#ifndef SIMDUTF_HASWELL_SIMD_H -#define SIMDUTF_HASWELL_SIMD_H - -namespace simdutf { -namespace haswell { -namespace { -namespace simd { - -// Forward-declared so they can be used by splat and friends. -template struct base { - __m256i value; - - // Zero constructor - simdutf_really_inline base() : value{__m256i()} {} - - // Conversion from SIMD register - simdutf_really_inline base(const __m256i _value) : value(_value) {} - - simdutf_really_inline operator const __m256i &() const { return this->value; } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - __m256i first = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(*this)); - __m256i second = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(*this, 1)); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - first = _mm256_shuffle_epi8(first, swap); - second = _mm256_shuffle_epi8(second, swap); - } - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), first); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 16), second); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), - _mm256_cvtepu8_epi32(_mm256_castsi256_si128(*this))); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 8), - _mm256_cvtepu8_epi32(_mm256_castsi256_si128( - _mm256_srli_si256(*this, 8)))); - _mm256_storeu_si256( - reinterpret_cast<__m256i *>(ptr + 16), - _mm256_cvtepu8_epi32(_mm256_extractf128_si256(*this, 1))); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 24), - _mm256_cvtepu8_epi32(_mm_srli_si128( - _mm256_extractf128_si256(*this, 1), 8))); - } - // Bit operations - simdutf_really_inline Child operator|(const Child other) const { - return _mm256_or_si256(*this, other); - } - simdutf_really_inline Child operator&(const Child other) const { - return _mm256_and_si256(*this, other); - } - simdutf_really_inline Child operator^(const Child other) const { - return _mm256_xor_si256(*this, other); - } - simdutf_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } -}; - -// Forward-declared so they can be used by splat and friends. -template struct simd8; - -template > -struct base8 : base> { - simdutf_really_inline base8() : base>() {} - - simdutf_really_inline base8(const __m256i _value) : base>(_value) {} - - friend simdutf_always_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return _mm256_cmpeq_epi8(lhs, rhs); - } - - static const int SIZE = sizeof(base::value); - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return _mm256_alignr_epi8( - *this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdutf_really_inline simd8 splat(bool _value) { - return _mm256_set1_epi8(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} - - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdutf_really_inline uint32_t to_bitmask() const { - return uint32_t(_mm256_movemask_epi8(value)); - } -}; - -template struct base8_numeric : base8 { - static simdutf_really_inline simd8 splat(T _value) { - return _mm256_set1_epi8(_value); - } - static simdutf_really_inline simd8 zero() { - return _mm256_setzero_si256(); - } - static simdutf_really_inline simd8 load(const T values[32]) { - return _mm256_loadu_si256(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15); - } - - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const __m256i _value) - : base8(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[32]) const { - return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); - } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 operator-(const simd8 other) const { - return _mm256_sub_epi8(*this, other); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm256_shuffle_epi8(lookup_table, *this); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - simdutf_really_inline operator simd8() const; - - simdutf_really_inline bool is_ascii() const { - return _mm256_movemask_epi8(*this) == 0; - } - // Order-sensitive comparisons - simdutf_really_inline simd8 operator>(const simd8 other) const { - return _mm256_cmpgt_epi8(*this, other); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return _mm256_cmpgt_epi8(other, *this); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, - uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, - uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, - uint8_t v31) - : simd8(_mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, - v22, v23, v24, v25, v26, v27, v28, v29, v30, - v31)) {} - - // Saturated math - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return _mm256_subs_epu8(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return _mm256_min_epu8(other, *this); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - - // Bit-specific operations - simdutf_really_inline bool is_ascii() const { - return _mm256_movemask_epi8(*this) == 0; - } - simdutf_really_inline bool bits_not_set_anywhere() const { - return _mm256_testz_si256(*this, *this); - } - - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - - template simdutf_really_inline simd8 shr() const { - return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); - } - - simdutf_really_inline uint64_t sum_bytes() const { - const auto tmp = _mm256_sad_epu8(value, _mm256_setzero_si256()); - - return _mm256_extract_epi64(tmp, 0) + _mm256_extract_epi64(tmp, 1) + - _mm256_extract_epi64(tmp, 2) + _mm256_extract_epi64(tmp, 3); - } -}; -simdutf_really_inline simd8::operator simd8() const { - return this->value; -} - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, - "Haswell kernel should use two registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) - : chunks{chunk0, chunk1} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); - } - - simdutf_really_inline uint64_t in_range(const T low, const T high) const { - const simd8 mask_low = simd8::splat(low); - const simd8 mask_high = simd8::splat(high); - - return simd8x64( - (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low), - (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)) - .to_bitmask(); - } - - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), - (simd8(__m256i(this->chunks[1])) >= mask)) - .to_bitmask(); - } -}; // struct simd8x64 - -/* begin file src/simdutf/haswell/simd16-inl.h */ -#ifdef __GNUC__ - #if __GNUC__ < 8 - #define _mm256_set_m128i(xmm1, xmm2) \ - _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \ - _mm256_castsi128_si256(xmm2), 2) - #define _mm256_setr_m128i(xmm2, xmm1) \ - _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \ - _mm256_castsi128_si256(xmm2), 2) - #endif -#endif - -template struct simd16; - -template > -struct base16 : base> { - using bitmask_type = uint32_t; - - simdutf_really_inline base16() : base>() {} - simdutf_really_inline base16(const __m256i _value) - : base>(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) - : base16(_mm256_loadu_si256(reinterpret_cast(ptr))) {} - - friend simdutf_always_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return _mm256_cmpeq_epi16(lhs, rhs); - } - - /// the size of vector in bytes - static const int SIZE = sizeof(base>::value); - - /// the number of elements of type T a vector can hold - static const int ELEMENTS = SIZE / sizeof(T); -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return _mm256_set1_epi16(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - - simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} - - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} - - simdutf_really_inline bitmask_type to_bitmask() const { - return _mm256_movemask_epi8(*this); - } - - simdutf_really_inline simd16 operator~() const { return *this ^ true; } -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return _mm256_set1_epi16(_value); - } - - static simdutf_really_inline simd16 zero() { - return _mm256_setzero_si256(); - } - - static simdutf_really_inline simd16 load(const T values[8]) { - return _mm256_loadu_si256(reinterpret_cast(values)); - } - - simdutf_really_inline base16_numeric() : base16() {} - - simdutf_really_inline base16_numeric(const __m256i _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd16 operator+(const simd16 other) const { - return _mm256_add_epi16(*this, other); - } - simdutf_really_inline simd16 &operator+=(const simd16 other) { - *this = *this + other; - return *static_cast *>(this); - } -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m256i _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return _mm256_max_epu16(*this, other); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return _mm256_min_epu16(*this, other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return other.max_val(*this) == other; - } - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return other.min_val(*this) == other; - } - - // Bit-specific operations - simdutf_really_inline simd16 bits_not_set() const { - return *this == uint16_t(0); - } - - simdutf_really_inline simd16 any_bits_set() const { - return ~this->bits_not_set(); - } - - template simdutf_really_inline simd16 shr() const { - return simd16(_mm256_srli_epi16(*this, N)); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - return _mm256_shuffle_epi8(*this, swap); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - // Note: the AVX2 variant of pack operates on 128-bit lanes, thus - // we have to shuffle lanes in order to produce bytes in the - // correct order. - - // get the 0th lanes - const __m128i lo_0 = _mm256_extracti128_si256(v0, 0); - const __m128i lo_1 = _mm256_extracti128_si256(v1, 0); - - // get the 1st lanes - const __m128i hi_0 = _mm256_extracti128_si256(v0, 1); - const __m128i hi_1 = _mm256_extracti128_si256(v1, 1); - - // build new vectors (shuffle lanes) - const __m256i t0 = _mm256_set_m128i(lo_1, lo_0); - const __m256i t1 = _mm256_set_m128i(hi_1, hi_0); - - // pack code units in linear order from v0 and v1 - return _mm256_packus_epi16(t0, t1); - } - - simdutf_really_inline uint64_t sum() const { - const auto lo_u16 = _mm256_and_si256(value, _mm256_set1_epi32(0x0000ffff)); - const auto hi_u16 = _mm256_srli_epi32(value, 16); - const auto sum_u32 = _mm256_add_epi32(lo_u16, hi_u16); - - const auto lo_u32 = - _mm256_and_si256(sum_u32, _mm256_set1_epi64x(0xffffffff)); - const auto hi_u32 = _mm256_srli_epi64(sum_u32, 32); - const auto sum_u64 = _mm256_add_epi64(lo_u32, hi_u32); - - return uint64_t(_mm256_extract_epi64(sum_u64, 0)) + - uint64_t(_mm256_extract_epi64(sum_u64, 1)) + - uint64_t(_mm256_extract_epi64(sum_u64, 2)) + - uint64_t(_mm256_extract_epi64(sum_u64, 3)); - } -}; - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 2, - "Haswell kernel should use two registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline simd16x32(const simd16 chunk0, - const simd16 chunk1) - : chunks{chunk0, chunk1} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdutf_really_inline simd16 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16)); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(static_cast(low - 1)); - const simd16 mask_high = simd16::splat(static_cast(high + 1)); - return simd16x32( - (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), - (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low)) - .to_bitmask(); - } -}; // struct simd16x32 - -simd16 min(const simd16 a, simd16 b) { - return _mm256_min_epu16(a.value, b.value); -} -/* end file src/simdutf/haswell/simd16-inl.h */ -/* begin file src/simdutf/haswell/simd32-inl.h */ -template struct simd32; - -template <> struct simd32 { - static const size_t SIZE = sizeof(__m256i); - static const size_t ELEMENTS = SIZE / sizeof(uint32_t); - - __m256i value; - - simdutf_really_inline simd32(const __m256i v) : value(v) {} - - template - simdutf_really_inline simd32(const Pointer *ptr) - : value(_mm256_loadu_si256(reinterpret_cast(ptr))) {} - - simdutf_really_inline uint64_t sum() const { - const __m256i mask = _mm256_set1_epi64x(0xffffffff); - const __m256i t0 = _mm256_and_si256(value, mask); - const __m256i t1 = _mm256_srli_epi64(value, 32); - const __m256i t2 = _mm256_add_epi64(t0, t1); - - return uint64_t(_mm256_extract_epi64(t2, 0)) + - uint64_t(_mm256_extract_epi64(t2, 1)) + - uint64_t(_mm256_extract_epi64(t2, 2)) + - uint64_t(_mm256_extract_epi64(t2, 3)); - } - - simdutf_really_inline simd32 swap_bytes() const { - const __m256i shuffle = - _mm256_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12, - 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12); - - return _mm256_shuffle_epi8(value, shuffle); - } - - // operators - simdutf_really_inline simd32 &operator+=(const simd32 other) { - value = _mm256_add_epi32(value, other.value); - return *this; - } - - // static members - simdutf_really_inline static simd32 zero() { - return _mm256_setzero_si256(); - } - - simdutf_really_inline static simd32 splat(uint32_t v) { - return _mm256_set1_epi32(v); - } -}; - -//---------------------------------------------------------------------- - -template <> struct simd32 { - // static const size_t SIZE = sizeof(__m128i); - // static const size_t ELEMENTS = SIZE / sizeof(uint32_t); - - __m256i value; - - simdutf_really_inline simd32(const __m256i v) : value(v) {} - - simdutf_really_inline bool any() const { - return _mm256_movemask_epi8(value) != 0; - } -}; - -//---------------------------------------------------------------------- - -template -simdutf_really_inline simd32 operator|(const simd32 a, - const simd32 b) { - return _mm256_or_si256(a.value, b.value); -} - -simdutf_really_inline simd32 min(const simd32 b, - const simd32 a) { - return _mm256_min_epu32(a.value, b.value); -} - -simdutf_really_inline simd32 max(const simd32 a, - const simd32 b) { - return _mm256_max_epu32(a.value, b.value); -} - -simdutf_really_inline simd32 operator&(const simd32 b, - const simd32 a) { - return _mm256_and_si256(a.value, b.value); -} - -simdutf_really_inline simd32 operator+(const simd32 a, - const simd32 b) { - return _mm256_add_epi32(a.value, b.value); -} - -simdutf_really_inline simd32 operator==(const simd32 a, - const simd32 b) { - return _mm256_cmpeq_epi32(a.value, b.value); -} - -simdutf_really_inline simd32 operator>=(const simd32 a, - const simd32 b) { - return _mm256_cmpeq_epi32(_mm256_max_epu32(a.value, b.value), a.value); -} - -simdutf_really_inline simd32 operator!(const simd32 v) { - return _mm256_xor_si256(v.value, _mm256_set1_epi8(-1)); -} - -simdutf_really_inline simd32 operator>(const simd32 a, - const simd32 b) { - return !(b >= a); -} -/* end file src/simdutf/haswell/simd32-inl.h */ -/* begin file src/simdutf/haswell/simd64-inl.h */ -template struct simd64; - -template <> struct simd64 { - // static const size_t SIZE = sizeof(__m256i); - // static const size_t ELEMENTS = SIZE / sizeof(uint64_t); - - __m256i value; - - simdutf_really_inline simd64(const __m256i v) : value(v) {} - - template - simdutf_really_inline simd64(const Pointer *ptr) - : value(_mm256_loadu_si256(reinterpret_cast(ptr))) {} - - simdutf_really_inline uint64_t sum() const { - return _mm256_extract_epi64(value, 0) + _mm256_extract_epi64(value, 1) + - _mm256_extract_epi64(value, 2) + _mm256_extract_epi64(value, 3); - } - - // operators - simdutf_really_inline simd64 &operator+=(const simd64 other) { - value = _mm256_add_epi64(value, other.value); - return *this; - } - - // static members - simdutf_really_inline static simd64 zero() { - return _mm256_setzero_si256(); - } - - simdutf_really_inline static simd64 splat(uint64_t v) { - return _mm256_set1_epi64x(v); - } -}; -/* end file src/simdutf/haswell/simd64-inl.h */ - -simdutf_really_inline simd64 sum_8bytes(const simd8 v) { - return _mm256_sad_epu8(v.value, simd8::zero()); -} - -} // namespace simd - -} // unnamed namespace -} // namespace haswell -} // namespace simdutf - -#endif // SIMDUTF_HASWELL_SIMD_H -/* end file src/simdutf/haswell/simd.h */ - -/* begin file src/simdutf/haswell/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -#undef SIMDUTF_SIMD_HAS_BYTEMASK - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/haswell/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_HASWELL -#endif // SIMDUTF_HASWELL_COMMON_H -/* end file src/simdutf/haswell.h */ -/* begin file src/simdutf/westmere.h */ -#ifndef SIMDUTF_WESTMERE_H -#define SIMDUTF_WESTMERE_H - -#ifdef SIMDUTF_FALLBACK_H - #error "westmere.h must be included before fallback.h" -#endif - - -// Default Westmere to on if this is x86-64, unless we'll always select Haswell. -#ifndef SIMDUTF_IMPLEMENTATION_WESTMERE - // - // You do not want to set it to (SIMDUTF_IS_X86_64 && - // !SIMDUTF_REQUIRES_HASWELL) because you want to rely on runtime dispatch! - // - #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || SIMDUTF_CAN_ALWAYS_RUN_HASWELL - #define SIMDUTF_IMPLEMENTATION_WESTMERE 0 - #else - #define SIMDUTF_IMPLEMENTATION_WESTMERE (SIMDUTF_IS_X86_64) - #endif - -#endif - -#if (SIMDUTF_IMPLEMENTATION_WESTMERE && SIMDUTF_IS_X86_64 && __SSE4_2__) - #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE 0 -#endif - -#if SIMDUTF_IMPLEMENTATION_WESTMERE - - #define SIMDUTF_TARGET_WESTMERE SIMDUTF_TARGET_REGION("sse4.2,popcnt") - -namespace simdutf { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere {} // namespace westmere -} // namespace simdutf - - // - // These two need to be included outside SIMDUTF_TARGET_REGION - // -/* begin file src/simdutf/westmere/implementation.h */ -#ifndef SIMDUTF_WESTMERE_IMPLEMENTATION_H -#define SIMDUTF_WESTMERE_IMPLEMENTATION_H - - -// The constructor may be executed on any host, so we take care not to use -// SIMDUTF_TARGET_REGION -namespace simdutf { -namespace westmere { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("westmere", "Intel/AMD SSE4.2", - internal::instruction_set::SSE42) {} - -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 -}; - -} // namespace westmere -} // namespace simdutf - -#endif // SIMDUTF_WESTMERE_IMPLEMENTATION_H -/* end file src/simdutf/westmere/implementation.h */ -/* begin file src/simdutf/westmere/intrinsics.h */ -#ifndef SIMDUTF_WESTMERE_INTRINSICS_H -#define SIMDUTF_WESTMERE_INTRINSICS_H - -#ifdef SIMDUTF_VISUAL_STUDIO - // under clang within visual studio, this will include - #include // visual studio or clang -#else - - #if SIMDUTF_GCC11ORMORE -// We should not get warnings while including yet we do -// under some versions of GCC. -// If the x86intrin.h header has uninitialized values that are problematic, -// it is a GCC issue, we want to ignore these warnings. -SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized) - #endif - - #include // elsewhere - - #if SIMDUTF_GCC11ORMORE -// cancels the suppression of the -Wuninitialized -SIMDUTF_POP_DISABLE_WARNINGS - #endif - -#endif // SIMDUTF_VISUAL_STUDIO - -#ifdef SIMDUTF_CLANG_VISUAL_STUDIO - /** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ - #include // for _mm_alignr_epi8 -#endif - -#endif // SIMDUTF_WESTMERE_INTRINSICS_H -/* end file src/simdutf/westmere/intrinsics.h */ - - // - // The rest need to be inside the region - // -/* begin file src/simdutf/westmere/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "westmere" -// #define SIMDUTF_IMPLEMENTATION westmere -#define SIMDUTF_SIMD_HAS_BYTEMASK 1 - -#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE -// nothing needed. -#else -SIMDUTF_TARGET_WESTMERE -#endif -/* end file src/simdutf/westmere/begin.h */ - - // Declarations -/* begin file src/simdutf/westmere/bitmanipulation.h */ -#ifndef SIMDUTF_WESTMERE_BITMANIPULATION_H -#define SIMDUTF_WESTMERE_BITMANIPULATION_H - -namespace simdutf { -namespace westmere { -namespace { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdutf_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - #if SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long ret; - _BitScanForward64(&ret, input_num); - return (int)ret; - #else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); - #endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} -#endif - -template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } - -} // unnamed namespace -} // namespace westmere -} // namespace simdutf - -#endif // SIMDUTF_WESTMERE_BITMANIPULATION_H -/* end file src/simdutf/westmere/bitmanipulation.h */ -/* begin file src/simdutf/westmere/simd.h */ -#ifndef SIMDUTF_WESTMERE_SIMD_H -#define SIMDUTF_WESTMERE_SIMD_H - -namespace simdutf { -namespace westmere { -namespace { -namespace simd { - -template struct base { - __m128i value; - - // Zero constructor - simdutf_really_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdutf_really_inline base(const __m128i _value) : value(_value) {} - // Conversion to SIMD register - simdutf_really_inline operator const __m128i &() const { return this->value; } - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { - __m128i first = _mm_cvtepu8_epi16(*this); - __m128i second = _mm_cvtepu8_epi16(_mm_srli_si128(*this, 8)); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - first = _mm_shuffle_epi8(first, swap); - second = _mm_shuffle_epi8(second, swap); - } - _mm_storeu_si128(reinterpret_cast<__m128i *>(p), first); - _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), second); - } - simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { - _mm_storeu_si128(reinterpret_cast<__m128i *>(p), _mm_cvtepu8_epi32(*this)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 4), - _mm_cvtepu8_epi32(_mm_srli_si128(*this, 4))); - _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), - _mm_cvtepu8_epi32(_mm_srli_si128(*this, 8))); - _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 12), - _mm_cvtepu8_epi32(_mm_srli_si128(*this, 12))); - } - // Bit operations - simdutf_really_inline Child operator|(const Child other) const { - return _mm_or_si128(*this, other); - } - simdutf_really_inline Child operator&(const Child other) const { - return _mm_and_si128(*this, other); - } - simdutf_really_inline Child operator^(const Child other) const { - return _mm_xor_si128(*this, other); - } - simdutf_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } -}; - -// Forward-declared so they can be used by splat and friends. -template struct simd8; - -template > -struct base8 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdutf_really_inline T first() const { return _mm_extract_epi8(*this, 0); } - simdutf_really_inline T last() const { return _mm_extract_epi8(*this, 15); } - simdutf_really_inline base8() : base>() {} - simdutf_really_inline base8(const __m128i _value) : base>(_value) {} - - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return _mm_cmpeq_epi8(lhs, rhs); - } - - static const int SIZE = sizeof(base>::value); - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdutf_really_inline simd8 splat(bool _value) { - return _mm_set1_epi8(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8() : base8() {} - simdutf_really_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdutf_really_inline int to_bitmask() const { - return _mm_movemask_epi8(*this); - } - simdutf_really_inline simd8 operator~() const { return *this ^ true; } -}; - -template struct base8_numeric : base8 { - static simdutf_really_inline simd8 splat(T _value) { - return _mm_set1_epi8(_value); - } - static simdutf_really_inline simd8 zero() { return _mm_setzero_si128(); } - static simdutf_really_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } - - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const __m128i _value) - : base8(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[16]) const { - return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd8 operator-(const simd8 other) const { - return _mm_sub_epi8(*this, other); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization - simdutf_really_inline operator simd8() const; - simdutf_really_inline bool is_ascii() const { - return _mm_movemask_epi8(*this) == 0; - } - - // Order-sensitive comparisons - simdutf_really_inline simd8 operator>(const simd8 other) const { - return _mm_cmpgt_epi8(*this, other); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return _mm_cmpgt_epi8(other, *this); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8(_mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15)) {} - - // Saturated math - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return _mm_subs_epu8(*this, other); - } - - // Order-specific operations - simdutf_really_inline simd8 - min_val(const simd8 other) const { - return _mm_min_epu8(*this, other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - - // Bit-specific operations - simdutf_really_inline simd8 bits_not_set() const { - return *this == uint8_t(0); - } - simdutf_really_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdutf_really_inline bool is_ascii() const { - return _mm_movemask_epi8(*this) == 0; - } - - simdutf_really_inline bool bits_not_set_anywhere() const { - return _mm_testz_si128(*this, *this); - } - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - template simdutf_really_inline simd8 shr() const { - return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); - } - template simdutf_really_inline simd8 shl() const { - return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); - } - - simdutf_really_inline uint64_t sum_bytes() const { - const auto tmp = _mm_sad_epu8(value, _mm_setzero_si128()); - return _mm_extract_epi64(tmp, 0) + _mm_extract_epi64(tmp, 1); - } -}; - -simdutf_really_inline simd8::operator simd8() const { - return this->value; -} - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "Westmere kernel should use four registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - this->chunks[2].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 2); - this->chunks[3].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 3); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); - this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8) * 2); - this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(simd8(__m128i(this->chunks[0])) >= mask, - simd8(__m128i(this->chunks[1])) >= mask, - simd8(__m128i(this->chunks[2])) >= mask, - simd8(__m128i(this->chunks[3])) >= mask) - .to_bitmask(); - } -}; // struct simd8x64 - -/* begin file src/simdutf/westmere/simd16-inl.h */ -template struct simd16; - -template > -struct base16 : base> { - simdutf_really_inline base16() : base>() {} - - simdutf_really_inline base16(const __m128i _value) - : base>(_value) {} - - friend simdutf_really_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return _mm_cmpeq_epi16(lhs, rhs); - } - - /// the size of vector in bytes - static const int SIZE = sizeof(base>::value); - - /// the number of elements of type T a vector can hold - static const int ELEMENTS = SIZE / sizeof(T); -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return _mm_set1_epi16(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} - - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} - - simdutf_really_inline int to_bitmask() const { - return _mm_movemask_epi8(*this); - } - - simdutf_really_inline simd16 operator~() const { return *this ^ true; } -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return _mm_set1_epi16(_value); - } - - static simdutf_really_inline simd16 zero() { return _mm_setzero_si128(); } - - static simdutf_really_inline simd16 load(const T values[8]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - - simdutf_really_inline base16_numeric() : base16() {} - - simdutf_really_inline base16_numeric(const __m128i _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd16 operator+(const simd16 other) const { - return _mm_add_epi16(*this, other); - } - simdutf_really_inline simd16 &operator+=(const simd16 other) { - *this = *this + other; - return *static_cast *>(this); - } -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - - simdutf_really_inline simd16(const __m128i _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - - // Array constructor - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return _mm_max_epu16(*this, other); - } - - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return _mm_min_epu16(*this, other); - } - - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return other.max_val(*this) == other; - } - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return other.min_val(*this) == other; - } - - // Bit-specific operations - simdutf_really_inline simd16 bits_not_set() const { - return *this == uint16_t(0); - } - - simdutf_really_inline simd16 any_bits_set() const { - return ~this->bits_not_set(); - } - - template simdutf_really_inline simd16 shr() const { - return simd16(_mm_srli_epi16(*this, N)); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - return _mm_shuffle_epi8(*this, swap); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - return _mm_packus_epi16(v0, v1); - } - - simdutf_really_inline uint64_t sum() const { - const auto lo_u16 = _mm_and_si128(value, _mm_set1_epi32(0x0000ffff)); - const auto hi_u16 = _mm_srli_epi32(value, 16); - const auto sum_u32 = _mm_add_epi32(lo_u16, hi_u16); - - const auto lo_u32 = _mm_and_si128(sum_u32, _mm_set1_epi64x(0xffffffff)); - const auto hi_u32 = _mm_srli_epi64(sum_u32, 32); - const auto sum_u64 = _mm_add_epi64(lo_u32, hi_u32); - - return uint64_t(_mm_extract_epi64(sum_u64, 0)) + - uint64_t(_mm_extract_epi64(sum_u64, 1)); - } -}; - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 4, - "Westmere kernel should use four registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline - simd16x32(const simd16 chunk0, const simd16 chunk1, - const simd16 chunk2, const simd16 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); - } - - simdutf_really_inline simd16 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); - this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); - this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - this->chunks[2] = this->chunks[2].swap_bytes(); - this->chunks[3] = this->chunks[3].swap_bytes(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(static_cast(low - 1)); - const simd16 mask_high = simd16::splat(static_cast(high + 1)); - return simd16x32( - (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), - (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), - (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), - (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) - .to_bitmask(); - } -}; // struct simd16x32 - -simd16 min(const simd16 a, simd16 b) { - return _mm_min_epu16(a.value, b.value); -} -/* end file src/simdutf/westmere/simd16-inl.h */ -/* begin file src/simdutf/westmere/simd32-inl.h */ -template struct simd32; - -template <> struct simd32 { - static const size_t SIZE = sizeof(__m128i); - static const size_t ELEMENTS = SIZE / sizeof(uint32_t); - - __m128i value; - - simdutf_really_inline simd32(const __m128i v) : value(v) {} - - template - simdutf_really_inline simd32(const Pointer *ptr) - : value(_mm_loadu_si128(reinterpret_cast(ptr))) {} - - simdutf_really_inline uint64_t sum() const { - return uint64_t(_mm_extract_epi32(value, 0)) + - uint64_t(_mm_extract_epi32(value, 1)) + - uint64_t(_mm_extract_epi32(value, 2)) + - uint64_t(_mm_extract_epi32(value, 3)); - } - - simdutf_really_inline simd32 swap_bytes() const { - const __m128i shuffle = - _mm_setr_epi8(3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12); - - return _mm_shuffle_epi8(value, shuffle); - } - - template simdutf_really_inline simd32 shr() const { - return _mm_srli_epi32(value, N); - } - - template simdutf_really_inline simd32 shl() const { - return _mm_slli_epi32(value, N); - } - - void dump() const { - printf("[%08x, %08x, %08x, %08x]\n", uint32_t(_mm_extract_epi32(value, 0)), - uint32_t(_mm_extract_epi32(value, 1)), - uint32_t(_mm_extract_epi32(value, 2)), - uint32_t(_mm_extract_epi32(value, 3))); - } - - // operators - simdutf_really_inline simd32 &operator+=(const simd32 other) { - value = _mm_add_epi32(value, other.value); - return *this; - } - - // static members - simdutf_really_inline static simd32 zero() { - return _mm_setzero_si128(); - } - - simdutf_really_inline static simd32 splat(uint32_t v) { - return _mm_set1_epi32(v); - } -}; - -//---------------------------------------------------------------------- - -template <> struct simd32 { - // static const size_t SIZE = sizeof(__m128i); - // static const size_t ELEMENTS = SIZE / sizeof(uint32_t); - - __m128i value; - - simdutf_really_inline simd32(const __m128i v) : value(v) {} - - simdutf_really_inline bool any() const { - return _mm_movemask_epi8(value) != 0; - } - - simdutf_really_inline uint8_t to_4bit_bitmask() const { - return uint8_t(_mm_movemask_ps(_mm_castsi128_ps(value))); - } -}; - -//---------------------------------------------------------------------- - -template -simdutf_really_inline simd32 operator|(const simd32 a, - const simd32 b) { - return _mm_or_si128(a.value, b.value); -} - -simdutf_really_inline simd32 min(const simd32 a, - const simd32 b) { - return _mm_min_epu32(a.value, b.value); -} - -simdutf_really_inline simd32 max(const simd32 a, - const simd32 b) { - return _mm_max_epu32(a.value, b.value); -} - -simdutf_really_inline simd32 operator==(const simd32 a, - uint32_t b) { - return _mm_cmpeq_epi32(a.value, _mm_set1_epi32(b)); -} - -simdutf_really_inline simd32 operator&(const simd32 a, - const simd32 b) { - return _mm_and_si128(a.value, b.value); -} - -simdutf_really_inline simd32 operator&(const simd32 a, - uint32_t b) { - return _mm_and_si128(a.value, _mm_set1_epi32(b)); -} - -simdutf_really_inline simd32 operator|(const simd32 a, - uint32_t b) { - return _mm_or_si128(a.value, _mm_set1_epi32(b)); -} - -simdutf_really_inline simd32 operator+(const simd32 a, - const simd32 b) { - return _mm_add_epi32(a.value, b.value); -} - -simdutf_really_inline simd32 operator-(const simd32 a, - uint32_t b) { - return _mm_sub_epi32(a.value, _mm_set1_epi32(b)); -} - -simdutf_really_inline simd32 operator==(const simd32 a, - const simd32 b) { - return _mm_cmpeq_epi32(a.value, b.value); -} - -simdutf_really_inline simd32 operator>=(const simd32 a, - const simd32 b) { - return _mm_cmpeq_epi32(_mm_max_epu32(a.value, b.value), a.value); -} - -simdutf_really_inline simd32 operator!(const simd32 v) { - return _mm_xor_si128(v.value, _mm_set1_epi8(-1)); -} - -simdutf_really_inline simd32 operator>(const simd32 a, - const simd32 b) { - return !(b >= a); -} - -simdutf_really_inline simd32 select(const simd32 cond, - const simd32 v_true, - const simd32 v_false) { - return _mm_blendv_epi8(v_false.value, v_true.value, cond.value); -} -/* end file src/simdutf/westmere/simd32-inl.h */ -/* begin file src/simdutf/westmere/simd64-inl.h */ -template struct simd64; - -template <> struct simd64 { - // static const size_t SIZE = sizeof(__m128i); - // static const size_t ELEMENTS = SIZE / sizeof(uint64_t); - - __m128i value; - - simdutf_really_inline simd64(const __m128i v) : value(v) {} - - template - simdutf_really_inline simd64(const Pointer *ptr) - : value(_mm_loadu_si128(reinterpret_cast(ptr))) {} - - simdutf_really_inline uint64_t sum() const { - return _mm_extract_epi64(value, 0) + _mm_extract_epi64(value, 1); - } - - // operators - simdutf_really_inline simd64 &operator+=(const simd64 other) { - value = _mm_add_epi64(value, other.value); - return *this; - } - - // static members - simdutf_really_inline static simd64 zero() { - return _mm_setzero_si128(); - } - - simdutf_really_inline static simd64 splat(uint64_t v) { - return _mm_set1_epi64x(v); - } -}; -/* end file src/simdutf/westmere/simd64-inl.h */ - -simdutf_really_inline simd64 sum_8bytes(const simd8 v) { - return _mm_sad_epu8(v.value, simd8::zero()); -} - -simdutf_really_inline simd8 as_vector_u8(const simd32 v) { - return simd8(v.value); -} - -} // namespace simd -} // unnamed namespace -} // namespace westmere -} // namespace simdutf - -#endif // SIMDUTF_WESTMERE_SIMD_INPUT_H -/* end file src/simdutf/westmere/simd.h */ - -/* begin file src/simdutf/westmere/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -#undef SIMDUTF_SIMD_HAS_BYTEMASK -/* end file src/simdutf/westmere/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_WESTMERE -#endif // SIMDUTF_WESTMERE_COMMON_H -/* end file src/simdutf/westmere.h */ -/* begin file src/simdutf/ppc64.h */ -#ifndef SIMDUTF_PPC64_H -#define SIMDUTF_PPC64_H - -#ifdef SIMDUTF_FALLBACK_H - #error "ppc64.h must be included before fallback.h" -#endif - - -#ifndef SIMDUTF_IMPLEMENTATION_PPC64 - #define SIMDUTF_IMPLEMENTATION_PPC64 (SIMDUTF_IS_PPC64) -#endif -#define SIMDUTF_CAN_ALWAYS_RUN_PPC64 \ - SIMDUTF_IMPLEMENTATION_PPC64 &&SIMDUTF_IS_PPC64 - - -#if SIMDUTF_IMPLEMENTATION_PPC64 - -namespace simdutf { -/** - * Implementation for ALTIVEC (PPC64). - */ -namespace ppc64 {} // namespace ppc64 -} // namespace simdutf - -/* begin file src/simdutf/ppc64/implementation.h */ -#ifndef SIMDUTF_PPC64_IMPLEMENTATION_H -#define SIMDUTF_PPC64_IMPLEMENTATION_H - - -namespace simdutf { -namespace ppc64 { - -namespace { -using namespace simdutf; - -template simdutf_really_inline size_t align_down(size_t size) { - return N * (size / N); -} -} // namespace - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("ppc64", "PPC64 ALTIVEC", - internal::instruction_set::ALTIVEC) {} - -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 - -#ifdef SIMDUTF_INTERNAL_TESTS - virtual std::vector internal_tests() const override; -#endif - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -}; - -} // namespace ppc64 -} // namespace simdutf - -#endif // SIMDUTF_PPC64_IMPLEMENTATION_H -/* end file src/simdutf/ppc64/implementation.h */ - -/* begin file src/simdutf/ppc64/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "ppc64" -// #define SIMDUTF_IMPLEMENTATION ppc64 -/* end file src/simdutf/ppc64/begin.h */ - - // Declarations -/* begin file src/simdutf/ppc64/intrinsics.h */ -#ifndef SIMDUTF_PPC64_INTRINSICS_H -#define SIMDUTF_PPC64_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -// These are defined by altivec.h in GCC toolchain, it is safe to undef them. -#ifdef bool - #undef bool -#endif - -#ifdef vector - #undef vector -#endif - -#endif // SIMDUTF_PPC64_INTRINSICS_H -/* end file src/simdutf/ppc64/intrinsics.h */ -/* begin file src/simdutf/ppc64/bitmanipulation.h */ -#ifndef SIMDUTF_PPC64_BITMANIPULATION_H -#define SIMDUTF_PPC64_BITMANIPULATION_H - -namespace simdutf { -namespace ppc64 { -namespace { - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO -simdutf_really_inline int count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdutf_really_inline int count_ones(uint64_t input_num) { - return __builtin_popcountll(input_num); -} -#endif - -#if SIMDUTF_NEED_TRAILING_ZEROES -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { - return __builtin_ctzll(input_num); -} -#endif - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf - -#endif // SIMDUTF_PPC64_BITMANIPULATION_H -/* end file src/simdutf/ppc64/bitmanipulation.h */ -/* begin file src/simdutf/ppc64/simd.h */ -#ifndef SIMDUTF_PPC64_SIMD_H -#define SIMDUTF_PPC64_SIMD_H - -#include - -namespace simdutf { -namespace ppc64 { -namespace { -namespace simd { - -using vec_bool_t = __vector __bool char; -using vec_bool16_t = __vector __bool short; -using vec_bool32_t = __vector __bool int; -using vec_u8_t = __vector unsigned char; -using vec_i8_t = __vector signed char; -using vec_u16_t = __vector unsigned short; -using vec_i16_t = __vector signed short; -using vec_u32_t = __vector unsigned int; -using vec_i32_t = __vector signed int; -using vec_u64_t = __vector unsigned long long; -using vec_i64_t = __vector signed long long; - -// clang-format off -template struct vector_u8_type_for_element_aux { - using type = typename std::conditional::value, vec_bool_t, - typename std::conditional::value, vec_u8_t, - typename std::conditional::value, vec_i8_t, void>::type>::type>::type; - - static_assert(not std::is_same::value, - "accepted element types are 8 bit integers or bool"); -}; - -template struct vector_u16_type_for_element_aux { - using type = typename std::conditional::value, vec_bool16_t, - typename std::conditional::value, vec_u16_t, - typename std::conditional::value, vec_i16_t, void>::type>::type>::type; - - static_assert(not std::is_same::value, - "accepted element types are 16 bit integers or bool"); -}; - -template struct vector_u32_type_for_element_aux { - using type = typename std::conditional::value, vec_bool32_t, - typename std::conditional::value, vec_u32_t, - typename std::conditional::value, vec_i32_t, void>::type>::type>::type; - - static_assert(not std::is_same::value, - "accepted element types are 32 bit integers or bool"); -}; -// clang-format on - -template -using vector_u8_type_for_element = - typename vector_u8_type_for_element_aux::type; - -template -using vector_u16_type_for_element = - typename vector_u16_type_for_element_aux::type; - -template -using vector_u32_type_for_element = - typename vector_u32_type_for_element_aux::type; - -template uint16_t move_mask_u8(T vec) { - const vec_u8_t perm_mask = {15 * 8, 14 * 8, 13 * 8, 12 * 8, 11 * 8, 10 * 8, - 9 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, - 3 * 8, 2 * 8, 1 * 8, 0 * 8}; - - const auto result = (vec_u64_t)vec_vbpermq((vec_u8_t)vec, perm_mask); -#if SIMDUTF_IS_BIG_ENDIAN - return static_cast(result[0]); -#else - return static_cast(result[1]); -#endif -} - -/* begin file src/simdutf/ppc64/simd8-inl.h */ -// file included directly - -template struct base8 { - using vector_type = vector_u8_type_for_element; - vector_type value; - static const int SIZE = sizeof(vector_type); - static const int ELEMENTS = sizeof(vector_type) / sizeof(T); - - // Zero constructor - simdutf_really_inline base8() : value{vec_splats(T(0))} {} - - // Conversion from SIMD register - simdutf_really_inline base8(const vector_type _value) : value{_value} {} - - // Splat scalar - simdutf_really_inline base8(T v) : value{vec_splats(v)} {} - - // Conversion to SIMD register - simdutf_really_inline operator const vector_type &() const { - return this->value; - } - - template simdutf_really_inline void store(U *ptr) const { - vec_xst(value, 0, reinterpret_cast(ptr)); - } - - template void operator|=(const SIMD8 other) { - this->value = vec_or(this->value, other.value); - } - - template vector_type prev_aux(vector_type prev_chunk) const { - vector_type chunk = this->value; -#if !SIMDUTF_IS_BIG_ENDIAN - chunk = (vector_type)vec_reve(this->value); - prev_chunk = (vector_type)vec_reve((vector_type)prev_chunk); -#endif - chunk = (vector_type)vec_sld((vector_type)prev_chunk, (vector_type)chunk, - 16 - N); -#if !SIMDUTF_IS_BIG_ENDIAN - chunk = (vector_type)vec_reve((vector_type)chunk); -#endif - return chunk; - } - - simdutf_really_inline bool is_ascii() const { - return move_mask_u8(this->value) == 0; - } - - simdutf_really_inline uint16_t to_bitmask() const { - return move_mask_u8(value); - } - - template - simdutf_really_inline void store_bytes_as_utf16(char16_t *p) const { - const vector_type zero = vec_splats(T(0)); - - if (big_endian) { - const vec_u8_t perm_lo = {16, 0, 16, 1, 16, 2, 16, 3, - 16, 4, 16, 5, 16, 6, 16, 7}; - const vec_u8_t perm_hi = {16, 8, 16, 9, 16, 10, 16, 11, - 16, 12, 16, 13, 16, 14, 16, 15}; - - const vector_type v0 = vec_perm(value, zero, perm_lo); - const vector_type v1 = vec_perm(value, zero, perm_hi); - -#if defined(__clang__) - vec_xst(v0, 0, reinterpret_cast(p)); - vec_xst(v1, 16, reinterpret_cast(p)); -#else - vec_xst(v0, 0, reinterpret_cast(p)); - vec_xst(v1, 16, reinterpret_cast(p)); -#endif // defined(__clang__) - } else { - const vec_u8_t perm_lo = {0, 16, 1, 16, 2, 16, 3, 16, - 4, 16, 5, 16, 6, 16, 7, 16}; - const vec_u8_t perm_hi = {8, 16, 9, 16, 10, 16, 11, 16, - 12, 16, 13, 16, 14, 16, 15, 16}; - - const vector_type v0 = vec_perm(value, zero, perm_lo); - const vector_type v1 = vec_perm(value, zero, perm_hi); - -#if defined(__clang__) - vec_xst(v0, 0, reinterpret_cast(p)); - vec_xst(v1, 16, reinterpret_cast(p)); -#else - vec_xst(v0, 0, reinterpret_cast(p)); - vec_xst(v1, 16, reinterpret_cast(p)); -#endif // defined(__clang__) - } - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { - store_bytes_as_utf16(p); - } - - simdutf_really_inline void store_bytes_as_utf32(char32_t *p) const { - const vector_type zero = vec_splats(T(0)); - -#if SIMDUTF_IS_BIG_ENDIAN - const vec_u8_t perm0 = {16, 16, 16, 0, 16, 16, 16, 1, - 16, 16, 16, 2, 16, 16, 16, 3}; - - const vec_u8_t perm1 = {16, 16, 16, 4, 16, 16, 16, 5, - 16, 16, 16, 6, 16, 16, 16, 7}; - - const vec_u8_t perm2 = {16, 16, 16, 8, 16, 16, 16, 9, - 16, 16, 16, 10, 16, 16, 16, 11}; - - const vec_u8_t perm3 = {16, 16, 16, 12, 16, 16, 16, 13, - 16, 16, 16, 14, 16, 16, 16, 15}; -#else - const vec_u8_t perm0 = {0, 16, 16, 16, 1, 16, 16, 16, - 2, 16, 16, 16, 3, 16, 16, 16}; - - const vec_u8_t perm1 = {4, 16, 16, 16, 5, 16, 16, 16, - 6, 16, 16, 16, 7, 16, 16, 16}; - - const vec_u8_t perm2 = {8, 16, 16, 16, 9, 16, 16, 16, - 10, 16, 16, 16, 11, 16, 16, 16}; - - const vec_u8_t perm3 = {12, 16, 16, 16, 13, 16, 16, 16, - 14, 16, 16, 16, 15, 16, 16, 16}; -#endif // SIMDUTF_IS_BIG_ENDIAN - - const vector_type v0 = vec_perm(value, zero, perm0); - const vector_type v1 = vec_perm(value, zero, perm1); - const vector_type v2 = vec_perm(value, zero, perm2); - const vector_type v3 = vec_perm(value, zero, perm3); - - constexpr size_t n = base8::SIZE; - -#if defined(__clang__) - vec_xst(v0, 0 * n, reinterpret_cast(p)); - vec_xst(v1, 1 * n, reinterpret_cast(p)); - vec_xst(v2, 2 * n, reinterpret_cast(p)); - vec_xst(v3, 3 * n, reinterpret_cast(p)); -#else - vec_xst(v0, 0 * n, reinterpret_cast(p)); - vec_xst(v1, 1 * n, reinterpret_cast(p)); - vec_xst(v2, 2 * n, reinterpret_cast(p)); - vec_xst(v3, 3 * n, reinterpret_cast(p)); -#endif // defined(__clang__) - } - - simdutf_really_inline void store_words_as_utf32(char32_t *p) const { - const vector_type zero = vec_splats(T(0)); - -#if SIMDUTF_IS_BIG_ENDIAN - const vec_u8_t perm0 = {16, 16, 0, 1, 16, 16, 2, 3, - 16, 16, 4, 5, 16, 16, 6, 7}; - const vec_u8_t perm1 = {16, 16, 8, 9, 16, 16, 10, 11, - 16, 16, 12, 13, 16, 16, 14, 15}; -#else - const vec_u8_t perm0 = {0, 1, 16, 16, 2, 3, 16, 16, - 4, 5, 16, 16, 6, 7, 16, 16}; - const vec_u8_t perm1 = {8, 9, 16, 16, 10, 11, 16, 16, - 12, 13, 16, 16, 14, 15, 16, 16}; -#endif // SIMDUTF_IS_BIG_ENDIAN - - const vector_type v0 = vec_perm(value, zero, perm0); - const vector_type v1 = vec_perm(value, zero, perm1); - - constexpr size_t n = base8::SIZE; - -#if defined(__clang__) - vec_xst(v0, 0 * n, reinterpret_cast(p)); - vec_xst(v1, 1 * n, reinterpret_cast(p)); -#else - vec_xst(v0, 0 * n, reinterpret_cast(p)); - vec_xst(v1, 1 * n, reinterpret_cast(p)); -#endif // defined(__clang__) - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { - store_bytes_as_utf32(p); - } -}; - -// Forward declaration -template struct simd8; - -template -simd8 operator==(const simd8 a, const simd8 b); - -template -simd8 operator!=(const simd8 a, const simd8 b); - -template simd8 operator&(const simd8 a, const simd8 b); - -template simd8 operator|(const simd8 a, const simd8 b); - -template simd8 operator^(const simd8 a, const simd8 b); - -template simd8 operator+(const simd8 a, const simd8 b); - -template simd8 operator<(const simd8 a, const simd8 b); - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - using super = base8; - - static simdutf_really_inline simd8 splat(bool _value) { - return (vector_type)vec_splats((unsigned char)(-(!!_value))); - } - - simdutf_really_inline simd8() : super(vector_type()) {} - simdutf_really_inline simd8(const vector_type _value) : super(_value) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - - template - simdutf_really_inline simd8(simd8 other) - : simd8(vector_type(other.value)) {} - - simdutf_really_inline uint16_t to_bitmask() const { - return move_mask_u8(value); - } - - simdutf_really_inline bool any() const { - return !vec_all_eq(this->value, (vector_type)vec_splats(0)); - } - - simdutf_really_inline bool all() const { return to_bitmask() == 0xffff; } - - simdutf_really_inline simd8 operator~() const { - return this->value ^ (vector_type)splat(true); - } -}; - -template struct base8_numeric : base8 { - using super = base8; - using vector_type = typename super::vector_type; - - static simdutf_really_inline simd8 splat(T value) { - return (vector_type)vec_splats(value); - } - - static simdutf_really_inline simd8 zero() { return splat(0); } - - template - static simdutf_really_inline simd8 load(const U *values) { - return vec_xl(0, reinterpret_cast(values)); - } - - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } - - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const vector_type _value) - : base8(_value) {} - - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - simdutf_really_inline simd8 &operator-=(const simd8 other) { - this->value = vec_sub(this->value, other.value); - return *static_cast *>(this); - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return (vector_type)vec_perm((vector_type)lookup_table, - (vector_type)lookup_table, this->value); - } - - template - simdutf_really_inline simd8 - lookup_32(const simd8 lookup_table_lo, - const simd8 lookup_table_hi) const { - return (vector_type)vec_perm(lookup_table_lo.value, lookup_table_hi.value, - this->value); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - using Self = simd8; - - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const vector_type _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((vector_type){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - simdutf_really_inline bool is_ascii() const { - return move_mask_u8(this->value) == 0; - } - - template - simdutf_really_inline simd8(simd8 other) - : simd8(vector_type(other.value)) {} - - template - simdutf_really_inline Self prev(const Self prev_chunk) const { - return prev_aux(prev_chunk.value); - } - - // Saturated math - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return (vector_type)vec_subs(this->value, (vector_type)other); - } - - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - - // Bit-specific operations - simdutf_really_inline bool bits_not_set_anywhere() const { - return vec_all_eq(this->value, (vector_type)vec_splats(0)); - } - - simdutf_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - - template simdutf_really_inline simd8 shr() const { - return simd8( - (vector_type)vec_sr(this->value, (vector_type)vec_splat_u8(N))); - } - - template simdutf_really_inline simd8 shl() const { - return simd8( - (vector_type)vec_sl(this->value, (vector_type)vec_splat_u8(N))); - } - - void dump() const { - uint8_t tmp[16]; - store(tmp); - for (int i = 0; i < 16; i++) { - if (i == 0) { - printf("[%02x", tmp[i]); - } else if (i == 15) { - printf(" %02x]", tmp[i]); - } else { - printf(" %02x", tmp[i]); - } - } - putchar('\n'); - } - - void dump_ascii() const { - uint8_t tmp[16]; - store(tmp); - for (int i = 0; i < 16; i++) { - if (i == 0) { - printf("[%c", tmp[i]); - } else if (i == 15) { - printf("%c]", tmp[i]); - } else { - printf("%c", tmp[i]); - } - } - putchar('\n'); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const vector_type _value) - : base8_numeric(_value) {} - - template - simdutf_really_inline simd8(simd8 other) - : simd8(vector_type(other.value)) {} - - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - - simdutf_really_inline operator simd8() const; - - // Saturated math - simdutf_really_inline simd8 - saturating_add(const simd8 other) const { - return (vector_type)vec_adds(this->value, other.value); - } - - void dump() const { - int8_t tmp[16]; - store(tmp); - for (int i = 0; i < 16; i++) { - if (i == 0) { - printf("[%02x", tmp[i]); - } else if (i == 15) { - printf("%02x]", tmp[i]); - } else { - printf("%02x", tmp[i]); - } - } - putchar('\n'); - } -}; - -template -simd8 operator==(const simd8 a, const simd8 b) { - return vec_cmpeq(a.value, b.value); -} - -template -simd8 operator!=(const simd8 a, const simd8 b) { - return vec_cmpne(a.value, b.value); -} - -template simd8 operator&(const simd8 a, const simd8 b) { - return vec_and(a.value, b.value); -} - -template simd8 operator&(const simd8 a, U b) { - return vec_and(a.value, vec_splats(T(b))); -} - -template simd8 operator|(const simd8 a, const simd8 b) { - return vec_or(a.value, b.value); -} - -template simd8 operator^(const simd8 a, const simd8 b) { - return vec_xor(a.value, b.value); -} - -template simd8 operator^(const simd8 a, U b) { - return vec_xor(a.value, vec_splats(T(b))); -} - -template simd8 operator+(const simd8 a, const simd8 b) { - return vec_add(a.value, b.value); -} - -template simd8 operator+(const simd8 a, U b) { - return vec_add(a.value, vec_splats(T(b))); -} - -simdutf_really_inline simd8::operator simd8() const { - return (simd8::vector_type)value; -} - -template -simd8 operator<(const simd8 a, const simd8 b) { - return vec_cmplt(a.value, b.value); -} - -template -simd8 operator>(const simd8 a, const simd8 b) { - return vec_cmpgt(a.value, b.value); -} - -template -simd8 operator>=(const simd8 a, const simd8 b) { - return vec_cmpge(a.value, b.value); -} - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static constexpr size_t ELEMENTS = simd8::ELEMENTS; - - static_assert(NUM_CHUNKS == 4, - "PPC64 kernel should use four registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - simd8x64(simd8x64 &&) = default; - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + ELEMENTS * 0); - this->chunks[1].store(ptr + ELEMENTS * 1); - this->chunks[2].store(ptr + ELEMENTS * 2); - this->chunks[3].store(ptr + ELEMENTS * 3); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - this->chunks[2].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 2); - this->chunks[3].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 3); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); - this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8) * 2); - this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(simd8(this->chunks[0]) >= mask, - simd8(this->chunks[1]) >= mask, - simd8(this->chunks[2]) >= mask, - simd8(this->chunks[3]) >= mask) - .to_bitmask(); - } - - void dump() const { - puts(""); - for (int i = 0; i < 4; i++) { - printf("chunk[%d] = ", i); - this->chunks[i].dump(); - } - } -}; // struct simd8x64 - -simdutf_really_inline simd8 avg(const simd8 a, - const simd8 b) { - return vec_avg(a.value, b.value); -} -/* end file src/simdutf/ppc64/simd8-inl.h */ -/* begin file src/simdutf/ppc64/simd16-inl.h */ -// file included directly - -template struct simd16; - -template struct base16 { - using vector_type = vector_u16_type_for_element; - static const int SIZE = sizeof(vector_type); - static const int ELEMENTS = sizeof(vector_type) / sizeof(T); - - vector_type value; - - // Zero constructor - simdutf_really_inline base16() : value{vector_type()} {} - - // Conversion from SIMD register - simdutf_really_inline base16(const vector_type _value) : value{_value} {} - - void dump() const { - uint16_t tmp[8]; - vec_xst(value, 0, reinterpret_cast(tmp)); - for (int i = 0; i < 8; i++) { - if (i == 0) { - printf("[%04x", tmp[i]); - } else if (i == 8 - 1) { - printf(" %04x]", tmp[i]); - } else { - printf(" %04x", tmp[i]); - } - } - putchar('\n'); - } -}; - -// Forward declaration -template struct simd16; - -template -simd16 operator==(const simd16 a, const simd16 b); - -template -simd16 operator==(const simd16 a, U b); - -template simd16 operator&(const simd16 a, const simd16 b); - -template simd16 operator|(const simd16 a, const simd16 b); - -template simd16 operator|(const simd16 a, U b); - -template simd16 operator^(const simd16 a, U b); - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return (vector_type)vec_splats(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - - simdutf_really_inline simd16(const vector_type _value) - : base16(_value) {} - - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} - - simdutf_really_inline uint16_t to_bitmask() const { - return move_mask_u8(value); - } - - simdutf_really_inline bool any() const { - const auto tmp = vec_u64_t(value); - - return tmp[0] || tmp[1]; // Note: logical or, not binary one - } - - simdutf_really_inline bool is_zero() const { - const auto tmp = vec_u64_t(value); - - return (tmp[0] | tmp[1]) == 0; - } - - simdutf_really_inline simd16 &operator|=(const simd16 rhs) { - value = vec_or(this->value, rhs.value); - return *this; - } -}; - -template struct base16_numeric : base16 { - using vector_type = typename base16::vector_type; - - static simdutf_really_inline simd16 splat(T _value) { - return vec_splats(_value); - } - - static simdutf_really_inline simd16 zero() { return splat(0); } - - template - static simdutf_really_inline simd16 load(const U *ptr) { - return vec_xl(0, reinterpret_cast(ptr)); - } - - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const vector_type _value) - : base16(_value) {} - - // Store to array - template simdutf_really_inline void store(U *dst) const { -#if defined(__clang__) - return vec_xst(this->value, 0, reinterpret_cast(dst)); -#else - return vec_xst(this->value, 0, reinterpret_cast(dst)); -#endif // defined(__clang__) - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { - return vec_xor(this->value, vec_splats(T(0xffff))); - } -}; - -// Signed code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const vector_type _value) - : base16_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd16(int16_t _value) : simd16(splat(_value)) {} - // Array constructor - simdutf_really_inline operator simd16() const; -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const vector_type _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - - // Array constructor - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - simdutf_really_inline bool is_ascii() const { - return vec_all_lt(value, vec_splats(uint16_t(128))); - } - - // Order-specific operations - simdutf_really_inline simd16 - max_val(const simd16 other) const { - return vec_max(this->value, other.value); - } - simdutf_really_inline simd16 - min_val(const simd16 other) const { - return vec_min(this->value, other.value); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd16 - operator<=(const simd16 other) const { - return other.max_val(*this) == other; - } - - simdutf_really_inline simd16 - operator>=(const simd16 other) const { - return other.min_val(*this) == other; - } - - simdutf_really_inline simd16 - operator<(const simd16 other) const { - return vec_cmplt(value, other.value); - } - - // Bit-specific operations - template simdutf_really_inline simd16 shr() const { - return vec_sr(value, vec_splats(uint16_t(N))); - } - - template simdutf_really_inline simd16 shl() const { - return vec_sl(value, vec_splats(uint16_t(N))); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return vec_revb(value); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - return vec_packs(v0.value, v1.value); - } -}; - -template -simd16 operator==(const simd16 a, const simd16 b) { - return vec_cmpeq(a.value, b.value); -} - -template -simd16 operator==(const simd16 a, U b) { - return vec_cmpeq(a.value, vec_splats(T(b))); -} - -template -simd16 operator&(const simd16 a, const simd16 b) { - return vec_and(a.value, b.value); -} - -template simd16 operator&(const simd16 a, U b) { - return vec_and(a.value, vec_splats(T(b))); -} - -template -simd16 operator|(const simd16 a, const simd16 b) { - return vec_or(a.value, b.value); -} - -template simd16 operator|(const simd16 a, U b) { - return vec_or(a.value, vec_splats(T(b))); -} - -template -simd16 operator^(const simd16 a, const simd16 b) { - return vec_xor(a.value, b.value); -} - -template simd16 operator^(const simd16 a, U b) { - return vec_xor(a.value, vec_splats(T(b))); -} - -simdutf_really_inline simd16::operator simd16() const { - return (vec_u16_t)(value); -} - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 4, - "AltiVec kernel should use four registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline - simd16x32(const simd16 chunk0, const simd16 chunk1, - const simd16 chunk2, const simd16 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); - } - - simdutf_really_inline simd16 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16) * 0); - this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16) * 1); - this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16) * 2); - this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - this->chunks[2] = this->chunks[2].swap_bytes(); - this->chunks[3] = this->chunks[3].swap_bytes(); - } - - simdutf_really_inline uint64_t lteq(const T m) const { - const simd16 mask = simd16::splat(m); - return simd16x32(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t not_in_range(const T low, const T high) const { - const simd16 mask_low = simd16::splat(static_cast(low - 1)); - const simd16 mask_high = simd16::splat(static_cast(high + 1)); - return simd16x32( - (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low), - (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low), - (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low), - (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low)) - .to_bitmask(); - } -}; // struct simd16x32 -/* end file src/simdutf/ppc64/simd16-inl.h */ -/* begin file src/simdutf/ppc64/simd32-inl.h */ -// file included directly - -template struct simd32; - -template struct base32 { - using vector_type = vector_u32_type_for_element; - static const int SIZE = sizeof(vector_type); - static const int ELEMENTS = sizeof(vector_type) / sizeof(T); - - vector_type value; - - // Zero constructor - simdutf_really_inline base32() : value{vector_type()} {} - - // Conversion from SIMD register - simdutf_really_inline base32(const vector_type _value) : value{_value} {} - - // Splat for scalar - simdutf_really_inline base32(T scalar) : value{vec_splats(scalar)} {} - - template - simdutf_really_inline base32(const Pointer *ptr) - : base32(vec_xl(0, reinterpret_cast(ptr))) {} - - // Store to array - template simdutf_really_inline void store(U *dst) const { -#if defined(__clang__) - return vec_xst(this->value, 0, reinterpret_cast(dst)); -#else - return vec_xst(this->value, 0, reinterpret_cast(dst)); -#endif // defined(__clang__) - } - - void dump(const char *name = nullptr) const { - if (name != nullptr) { - printf("%-10s = ", name); - } - - uint32_t tmp[4]; - vec_xst(value, 0, reinterpret_cast(tmp)); - for (int i = 0; i < 4; i++) { - if (i == 0) { - printf("[%08x", tmp[i]); - } else if (i == 4 - 1) { - printf(" %08x]", tmp[i]); - } else { - printf(" %08x", tmp[i]); - } - } - putchar('\n'); - } -}; - -template struct base32_numeric : base32 { - using super = base32; - using vector_type = typename super::vector_type; - - static simdutf_really_inline simd32 splat(T _value) { - return vec_splats(_value); - } - - static simdutf_really_inline simd32 zero() { return splat(0); } - - template - static simdutf_really_inline simd32 load(const U *values) { - return vec_xl(0, reinterpret_cast(values)); - } - - simdutf_really_inline base32_numeric() : base32() {} - - simdutf_really_inline base32_numeric(const vector_type _value) - : base32(_value) {} - - // Addition/subtraction are the same for signed and unsigned - simdutf_really_inline simd32 operator+(const simd32 other) const { - return vec_add(this->value, other.value); - } - - simdutf_really_inline simd32 operator-(const simd32 other) const { - return vec_sub(this->value, other.value); - } - - simdutf_really_inline simd32 &operator+=(const simd32 other) { - *this = *this + other; - return *static_cast *>(this); - } - - simdutf_really_inline simd32 &operator-=(const simd32 other) { - *this = *this - other; - return *static_cast *>(this); - } -}; - -// Forward declaration -template struct simd32; - -template -simd32 operator==(const simd32 a, const simd32 b); - -template -simd32 operator!=(const simd32 a, const simd32 b); - -template -simd32 operator>(const simd32 a, const simd32 b); - -template simd32 operator==(const simd32 a, T b); - -template simd32 operator!=(const simd32 a, T b); - -template simd32 operator&(const simd32 a, const simd32 b); - -template simd32 operator|(const simd32 a, const simd32 b); - -template simd32 operator^(const simd32 a, const simd32 b); - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd32 : base32 { - static simdutf_really_inline simd32 splat(bool _value) { - return (vector_type)vec_splats(uint32_t(-(!!_value))); - } - - simdutf_really_inline simd32(const vector_type _value) - : base32(_value) {} - - // Splat constructor - simdutf_really_inline simd32(bool _value) : base32(splat(_value)) {} - - simdutf_really_inline uint16_t to_bitmask() const { - return move_mask_u8(value); - } - - simdutf_really_inline bool any() const { - const vec_u64_t tmp = (vec_u64_t)value; - - return tmp[0] || tmp[1]; // Note: logical or, not binary one - } - - simdutf_really_inline bool is_zero() const { - const vec_u64_t tmp = (vec_u64_t)value; - - return (tmp[0] | tmp[1]) == 0; - } - - simdutf_really_inline simd32 operator~() const { - return (vec_bool32_t)vec_xor(this->value, vec_splats(uint32_t(0xffffffff))); - } -}; - -// Unsigned code units -template <> struct simd32 : base32_numeric { - simdutf_really_inline simd32() : base32_numeric() {} - - simdutf_really_inline simd32(const vector_type _value) - : base32_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd32(uint32_t _value) : simd32(splat(_value)) {} - - // Array constructor - simdutf_really_inline simd32(const char32_t *values) - : simd32(load(reinterpret_cast(values))) {} - - // Bit-specific operations - template simdutf_really_inline simd32 shr() const { - return vec_sr(value, vec_splats(uint32_t(N))); - } - - template simdutf_really_inline simd32 shl() const { - return vec_sl(value, vec_splats(uint32_t(N))); - } - - // Change the endianness - simdutf_really_inline simd32 swap_bytes() const { - return vec_revb(value); - } - - simdutf_really_inline uint64_t sum() const { - return uint64_t(value[0]) + uint64_t(value[1]) + uint64_t(value[2]) + - uint64_t(value[3]); - } - - static simdutf_really_inline simd16 - pack(const simd32 &v0, const simd32 &v1) { - return vec_packs(v0.value, v1.value); - } -}; - -template -simd32 operator==(const simd32 a, const simd32 b) { - return vec_cmpeq(a.value, b.value); -} - -template -simd32 operator!=(const simd32 a, const simd32 b) { - return vec_cmpne(a.value, b.value); -} - -template simd32 operator==(const simd32 a, T b) { - return vec_cmpeq(a.value, vec_splats(b)); -} - -template simd32 operator!=(const simd32 a, T b) { - return vec_cmpne(a.value, vec_splats(b)); -} - -template -simd32 operator>(const simd32 a, const simd32 b) { - return vec_cmpgt(a.value, b.value); -} - -template -simd32 operator>=(const simd32 a, const simd32 b) { - return vec_cmpge(a.value, b.value); -} - -template -simd32 operator&(const simd32 a, const simd32 b) { - return vec_and(a.value, b.value); -} - -template simd32 operator&(const simd32 a, U b) { - return vec_and(a.value, vec_splats(T(b))); -} - -template -simd32 operator|(const simd32 a, const simd32 b) { - return vec_or(a.value, b.value); -} - -template -simd32 operator^(const simd32 a, const simd32 b) { - return vec_xor(a.value, b.value); -} - -template simd32 operator^(const simd32 a, U b) { - return vec_xor(a.value, vec_splats(T(b))); -} - -template simd32 max_val(const simd32 a, const simd32 b) { - return vec_max(a.value, b.value); -} - -template -simdutf_really_inline simd32 min(const simd32 b, const simd32 a) { - return vec_min(a.value, b.value); -} -/* end file src/simdutf/ppc64/simd32-inl.h */ - -template -simd8 select(const simd8 cond, const simd8 val_true, - const simd8 val_false) { - return vec_sel(val_false.value, val_true.value, cond.value); -} - -template -simd8 select(const T cond, const simd8 val_true, - const simd8 val_false) { - return vec_sel(val_false.value, val_true.value, vec_splats(cond)); -} - -template -simd16 select(const simd16 cond, const simd16 val_true, - const simd16 val_false) { - return vec_sel(val_false.value, val_true.value, cond.value); -} - -template -simd16 select(const T cond, const simd16 val_true, - const simd16 val_false) { - return vec_sel(val_false.value, val_true.value, vec_splats(cond)); -} - -template -simd32 select(const simd32 cond, const simd32 val_true, - const simd32 val_false) { - return vec_sel(val_false.value, val_true.value, cond.value); -} - -template -simd32 select(const T cond, const simd32 val_true, - const simd32 val_false) { - return vec_sel(val_false.value, val_true.value, vec_splats(cond)); -} - -using vector_u8 = simd8; -using vector_u16 = simd16; -using vector_u32 = simd32; -using vector_i8 = simd8; - -simdutf_really_inline vector_u8 as_vector_u8(const vector_u16 v) { - return vector_u8::vector_type(v.value); -} - -simdutf_really_inline vector_u8 as_vector_u8(const vector_u32 v) { - return vector_u8::vector_type(v.value); -} - -simdutf_really_inline vector_u8 as_vector_u8(const vector_i8 v) { - return vector_u8::vector_type(v.value); -} - -simdutf_really_inline vector_u8 as_vector_u8(const simd16 v) { - return vector_u8::vector_type(v.value); -} - -simdutf_really_inline vector_i8 as_vector_i8(const vector_u8 v) { - return vector_i8::vector_type(v.value); -} - -simdutf_really_inline vector_u16 as_vector_u16(const vector_u8 v) { - return vector_u16::vector_type(v.value); -} - -simdutf_really_inline vector_u16 as_vector_u16(const simd16 v) { - return vector_u16::vector_type(v.value); -} - -simdutf_really_inline vector_u32 as_vector_u32(const vector_u8 v) { - return vector_u32::vector_type(v.value); -} - -simdutf_really_inline vector_u32 as_vector_u32(const vector_u16 v) { - return vector_u32::vector_type(v.value); -} - -simdutf_really_inline vector_u32 max(vector_u32 a, vector_u32 b) { - return vec_max(a.value, b.value); -} - -simdutf_really_inline vector_u32 max(vector_u32 a, vector_u32 b, vector_u32 c) { - return max(max(a, b), c); -} - -simdutf_really_inline vector_u32 sum4bytes(vector_u8 bytes, vector_u32 acc) { - return vec_sum4s(bytes.value, acc.value); -} - -} // namespace simd -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf - -#endif // SIMDUTF_PPC64_SIMD_INPUT_H -/* end file src/simdutf/ppc64/simd.h */ - -/* begin file src/simdutf/ppc64/end.h */ -/* end file src/simdutf/ppc64/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_PPC64 - -#endif // SIMDUTF_PPC64_H -/* end file src/simdutf/ppc64.h */ -/* begin file src/simdutf/rvv.h */ -#ifndef SIMDUTF_RVV_H -#define SIMDUTF_RVV_H - -#ifdef SIMDUTF_FALLBACK_H - #error "rvv.h must be included before fallback.h" -#endif - - -#define SIMDUTF_CAN_ALWAYS_RUN_RVV SIMDUTF_IS_RVV - -#ifndef SIMDUTF_IMPLEMENTATION_RVV - #define SIMDUTF_IMPLEMENTATION_RVV \ - (SIMDUTF_CAN_ALWAYS_RUN_RVV || \ - (SIMDUTF_IS_RISCV64 && SIMDUTF_HAS_RVV_INTRINSICS && \ - SIMDUTF_HAS_RVV_TARGET_REGION)) -#endif - -#if SIMDUTF_IMPLEMENTATION_RVV - - #if SIMDUTF_CAN_ALWAYS_RUN_RVV - #define SIMDUTF_TARGET_RVV - #else - #define SIMDUTF_TARGET_RVV SIMDUTF_TARGET_REGION("arch=+v") - #endif - #if !SIMDUTF_IS_ZVBB && SIMDUTF_HAS_ZVBB_INTRINSICS - #define SIMDUTF_TARGET_ZVBB SIMDUTF_TARGET_REGION("arch=+v,+zvbb") - #endif - -namespace simdutf { -namespace rvv {} // namespace rvv -} // namespace simdutf - -/* begin file src/simdutf/rvv/implementation.h */ -#ifndef SIMDUTF_RVV_IMPLEMENTATION_H -#define SIMDUTF_RVV_IMPLEMENTATION_H - - -namespace simdutf { -namespace rvv { - -namespace { -using namespace simdutf; -} // namespace - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("rvv", "RISC-V Vector Extension", - internal::instruction_set::RVV), - _supports_zvbb(internal::detect_supported_architectures() & - internal::instruction_set::ZVBB) {} -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 -private: - const bool _supports_zvbb; - -#if SIMDUTF_IS_ZVBB - bool supports_zvbb() const { return true; } -#elif SIMDUTF_HAS_ZVBB_INTRINSICS - bool supports_zvbb() const { return _supports_zvbb; } -#else - bool supports_zvbb() const { return false; } -#endif -}; - -} // namespace rvv -} // namespace simdutf - -#endif // SIMDUTF_RVV_IMPLEMENTATION_H -/* end file src/simdutf/rvv/implementation.h */ -/* begin file src/simdutf/rvv/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "rvv" -// #define SIMDUTF_IMPLEMENTATION rvv - -#if SIMDUTF_CAN_ALWAYS_RUN_RVV -// nothing needed. -#else -SIMDUTF_TARGET_RVV -#endif -/* end file src/simdutf/rvv/begin.h */ -/* begin file src/simdutf/rvv/intrinsics.h */ -#ifndef SIMDUTF_RVV_INTRINSICS_H -#define SIMDUTF_RVV_INTRINSICS_H - - -#include - -#if __riscv_v_intrinsic >= 1000000 || __GCC__ >= 14 - #define simdutf_vrgather_u8m1x2(tbl, idx) \ - __riscv_vcreate_v_u8m1_u8m2( \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 0), \ - __riscv_vsetvlmax_e8m1()), \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 1), \ - __riscv_vsetvlmax_e8m1())); - - #define simdutf_vrgather_u8m1x4(tbl, idx) \ - __riscv_vcreate_v_u8m1_u8m4( \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 0), \ - __riscv_vsetvlmax_e8m1()), \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 1), \ - __riscv_vsetvlmax_e8m1()), \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 2), \ - __riscv_vsetvlmax_e8m1()), \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 3), \ - __riscv_vsetvlmax_e8m1())); -#else - // This has worse codegen on gcc - #define simdutf_vrgather_u8m1x2(tbl, idx) \ - __riscv_vset_v_u8m1_u8m2( \ - __riscv_vlmul_ext_v_u8m1_u8m2(__riscv_vrgather_vv_u8m1( \ - tbl, __riscv_vget_v_u8m2_u8m1(idx, 0), __riscv_vsetvlmax_e8m1())), \ - 1, \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m2_u8m1(idx, 1), \ - __riscv_vsetvlmax_e8m1())) - - #define simdutf_vrgather_u8m1x4(tbl, idx) \ - __riscv_vset_v_u8m1_u8m4( \ - __riscv_vset_v_u8m1_u8m4( \ - __riscv_vset_v_u8m1_u8m4( \ - __riscv_vlmul_ext_v_u8m1_u8m4(__riscv_vrgather_vv_u8m1( \ - tbl, __riscv_vget_v_u8m4_u8m1(idx, 0), \ - __riscv_vsetvlmax_e8m1())), \ - 1, \ - __riscv_vrgather_vv_u8m1(tbl, \ - __riscv_vget_v_u8m4_u8m1(idx, 1), \ - __riscv_vsetvlmax_e8m1())), \ - 2, \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 2), \ - __riscv_vsetvlmax_e8m1())), \ - 3, \ - __riscv_vrgather_vv_u8m1(tbl, __riscv_vget_v_u8m4_u8m1(idx, 3), \ - __riscv_vsetvlmax_e8m1())) -#endif - -/* Zvbb adds dedicated support for endianness swaps with vrev8, but if we can't - * use that, we have to emulate it with the standard V extension. - * Using LMUL=1 vrgathers could be faster than the srl+macc variant, but that - * would increase register pressure, and vrgather implementations performance - * varies a lot. */ -enum class simdutf_ByteFlip { NONE, V, ZVBB }; - -template -simdutf_really_inline static uint16_t simdutf_byteflip(uint16_t v) { - if (method != simdutf_ByteFlip::NONE) - return (uint16_t)((v * 1u) << 8 | (v * 1u) >> 8); - return v; -} - -#ifdef SIMDUTF_TARGET_ZVBB -SIMDUTF_UNTARGET_REGION -SIMDUTF_TARGET_ZVBB -#endif - -template -simdutf_really_inline static vuint16m1_t simdutf_byteflip(vuint16m1_t v, - size_t vl) { -#if SIMDUTF_HAS_ZVBB_INTRINSICS - if (method == simdutf_ByteFlip::ZVBB) - return __riscv_vrev8_v_u16m1(v, vl); -#endif - if (method == simdutf_ByteFlip::V) - return __riscv_vmacc_vx_u16m1(__riscv_vsrl_vx_u16m1(v, 8, vl), 0x100, v, - vl); - return v; -} - -template -simdutf_really_inline static vuint16m2_t simdutf_byteflip(vuint16m2_t v, - size_t vl) { -#if SIMDUTF_HAS_ZVBB_INTRINSICS - if (method == simdutf_ByteFlip::ZVBB) - return __riscv_vrev8_v_u16m2(v, vl); -#endif - if (method == simdutf_ByteFlip::V) - return __riscv_vmacc_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 8, vl), 0x100, v, - vl); - return v; -} - -template -simdutf_really_inline static vuint16m4_t simdutf_byteflip(vuint16m4_t v, - size_t vl) { -#if SIMDUTF_HAS_ZVBB_INTRINSICS - if (method == simdutf_ByteFlip::ZVBB) - return __riscv_vrev8_v_u16m4(v, vl); -#endif - if (method == simdutf_ByteFlip::V) - return __riscv_vmacc_vx_u16m4(__riscv_vsrl_vx_u16m4(v, 8, vl), 0x100, v, - vl); - return v; -} - -template -simdutf_really_inline static vuint16m8_t simdutf_byteflip(vuint16m8_t v, - size_t vl) { -#if SIMDUTF_HAS_ZVBB_INTRINSICS - if (method == simdutf_ByteFlip::ZVBB) - return __riscv_vrev8_v_u16m8(v, vl); -#endif - if (method == simdutf_ByteFlip::V) - return __riscv_vmacc_vx_u16m8(__riscv_vsrl_vx_u16m8(v, 8, vl), 0x100, v, - vl); - return v; -} - -#ifdef SIMDUTF_TARGET_ZVBB -SIMDUTF_UNTARGET_REGION -SIMDUTF_TARGET_RVV -#endif - -#endif // SIMDUTF_RVV_INTRINSICS_H -/* end file src/simdutf/rvv/intrinsics.h */ -/* begin file src/simdutf/rvv/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_RVV -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -/* end file src/simdutf/rvv/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_RVV - -#endif // SIMDUTF_RVV_H -/* end file src/simdutf/rvv.h */ -/* begin file src/simdutf/lsx.h */ -#ifndef SIMDUTF_LSX_H -#define SIMDUTF_LSX_H - -#ifdef SIMDUTF_FALLBACK_H - #error "lsx.h must be included before fallback.h" -#endif - - -#ifndef SIMDUTF_IMPLEMENTATION_LSX - #define SIMDUTF_IMPLEMENTATION_LSX (SIMDUTF_IS_LSX) -#endif -#if SIMDUTF_IMPLEMENTATION_LSX && SIMDUTF_IS_LSX - #define SIMDUTF_CAN_ALWAYS_RUN_LSX 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_LSX 0 -#endif - -#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) - -#if SIMDUTF_IMPLEMENTATION_LSX - -namespace simdutf { -/** - * Implementation for LoongArch SX. - */ -namespace lsx {} // namespace lsx -} // namespace simdutf - -/* begin file src/simdutf/lsx/implementation.h */ -#ifndef SIMDUTF_LSX_IMPLEMENTATION_H -#define SIMDUTF_LSX_IMPLEMENTATION_H - - -namespace simdutf { -namespace lsx { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("lsx", "LOONGARCH SX", - internal::instruction_set::LSX) {} -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 -}; - -} // namespace lsx -} // namespace simdutf - -#endif // SIMDUTF_LSX_IMPLEMENTATION_H -/* end file src/simdutf/lsx/implementation.h */ - -/* begin file src/simdutf/lsx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lsx" -// #define SIMDUTF_IMPLEMENTATION lsx -#define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 -/* end file src/simdutf/lsx/begin.h */ - - // Declarations -/* begin file src/simdutf/lsx/intrinsics.h */ -#ifndef SIMDUTF_LSX_INTRINSICS_H -#define SIMDUTF_LSX_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -/* -Encoding of argument for LoongArch64 xvldi instruction. See: -https://jia.je/unofficial-loongarch-intrinsics-guide/lasx/misc/#__m256i-__lasx_xvldi-imm_n1024_1023-imm - -1: imm[12:8]=0b10000: broadcast imm[7:0] as 32-bit elements to all lanes - -2: imm[12:8]=0b10001: broadcast imm[7:0] << 8 as 32-bit elements to all lanes - -3: imm[12:8]=0b10010: broadcast imm[7:0] << 16 as 32-bit elements to all lanes - -4: imm[12:8]=0b10011: broadcast imm[7:0] << 24 as 32-bit elements to all lanes - -5: imm[12:8]=0b10100: broadcast imm[7:0] as 16-bit elements to all lanes - -6: imm[12:8]=0b10101: broadcast imm[7:0] << 8 as 16-bit elements to all lanes - -7: imm[12:8]=0b10110: broadcast (imm[7:0] << 8) | 0xFF as 32-bit elements to all -lanes - -8: imm[12:8]=0b10111: broadcast (imm[7:0] << 16) | 0xFFFF as 32-bit elements to -all lanes - -9: imm[12:8]=0b11000: broadcast imm[7:0] as 8-bit elements to all lanes - -10: imm[12:8]=0b11001: repeat each bit of imm[7:0] eight times, and broadcast -the result as 64-bit elements to all lanes -*/ - -namespace vldi { - -template class const_u16 { - constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); - constexpr static const uint8_t b1 = ((v >> 1 * 8) & 0xff); - - constexpr static bool is_case5 = uint16_t(b0) == v; - constexpr static bool is_case6 = (uint16_t(b1) << 8) == v; - constexpr static bool is_case9 = (b0 == b1); - constexpr static bool is_case10 = - ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)); - -public: - constexpr static uint16_t operation = is_case5 ? 0b10100 - : is_case6 ? 0b10101 - : is_case9 ? 0b11000 - : is_case10 ? 0x11001 - : 0xffff; - - constexpr static uint16_t byte = - is_case5 ? b0 - : is_case6 ? b1 - : is_case9 ? b0 - : is_case10 ? ((b0 ? 0x55 : 0x00) | (b1 ? 0xaa : 0x00)) - : 0xffff; - - constexpr static int value = int((operation << 8) | byte) - 8192; - constexpr static bool valid = operation != 0xffff; -}; - -template class const_u32 { - constexpr static const uint8_t b0 = (v & 0xff); - constexpr static const uint8_t b1 = ((v >> 8) & 0xff); - constexpr static const uint8_t b2 = ((v >> 16) & 0xff); - constexpr static const uint8_t b3 = ((v >> 24) & 0xff); - - constexpr static bool is_case1 = (uint32_t(b0) == v); - constexpr static bool is_case2 = ((uint32_t(b1) << 8) == v); - constexpr static bool is_case3 = ((uint32_t(b2) << 16) == v); - constexpr static bool is_case4 = ((uint32_t(b3) << 24) == v); - constexpr static bool is_case5 = (b0 == b2) && (b1 == 0) && (b3 == 0); - constexpr static bool is_case6 = (b1 == b3) && (b0 == 0) && (b2 == 0); - constexpr static bool is_case7 = (b3 == 0) && (b2 == 0) && (b0 == 0xff); - constexpr static bool is_case8 = (b3 == 0) && (b1 == 0xff) && (b0 == 0xff); - constexpr static bool is_case9 = (b0 == b1) && (b0 == b2) && (b0 == b3); - constexpr static bool is_case10 = - ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)) && - ((b2 == 0xff) || (b2 == 0x00)) && ((b3 == 0xff) || (b3 == 0x00)); - -public: - constexpr static uint16_t operation = is_case1 ? 0b10000 - : is_case2 ? 0b10001 - : is_case3 ? 0b10010 - : is_case4 ? 0b10011 - : is_case5 ? 0b10100 - : is_case6 ? 0b10101 - : is_case7 ? 0b10110 - : is_case8 ? 0b10111 - : is_case9 ? 0b11000 - : is_case10 ? 0b11001 - : 0xffff; - - constexpr static uint16_t byte = - is_case1 ? b0 - : is_case2 ? b1 - : is_case3 ? b2 - : is_case4 ? b3 - : is_case5 ? b0 - : is_case6 ? b1 - : is_case7 ? b1 - : is_case8 ? b2 - : is_case9 ? b0 - : is_case10 ? ((b0 ? 0x11 : 0x00) | (b1 ? 0x22 : 0x00) | - (b2 ? 0x44 : 0x00) | (b3 ? 0x88 : 0x00)) - : 0xffff; - - constexpr static int value = int((operation << 8) | byte) - 8192; - constexpr static bool valid = operation != 0xffff; -}; - -template class const_u64 { - constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); - constexpr static const uint8_t b1 = ((v >> 1 * 8) & 0xff); - constexpr static const uint8_t b2 = ((v >> 2 * 8) & 0xff); - constexpr static const uint8_t b3 = ((v >> 3 * 8) & 0xff); - constexpr static const uint8_t b4 = ((v >> 4 * 8) & 0xff); - constexpr static const uint8_t b5 = ((v >> 5 * 8) & 0xff); - constexpr static const uint8_t b6 = ((v >> 6 * 8) & 0xff); - constexpr static const uint8_t b7 = ((v >> 7 * 8) & 0xff); - - constexpr static bool is_case10 = - ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)) && - ((b2 == 0xff) || (b2 == 0x00)) && ((b3 == 0xff) || (b3 == 0x00)) && - ((b4 == 0xff) || (b4 == 0x00)) && ((b5 == 0xff) || (b5 == 0x00)) && - ((b6 == 0xff) || (b6 == 0x00)) && ((b7 == 0xff) || (b7 == 0x00)); - -public: - constexpr static bool is_32bit = - ((v & 0xffffffff) == (v >> 32)) && const_u32<(v >> 32)>::value; - constexpr static uint8_t op_32bit = const_u32<(v >> 32)>::operation; - constexpr static uint8_t byte_32bit = const_u32<(v >> 32)>::byte; - - constexpr static uint16_t operation = is_32bit ? op_32bit - : is_case10 ? 0x11001 - : 0xffff; - - constexpr static uint16_t byte = - is_32bit ? byte_32bit - : is_case10 - ? ((b0 ? 0x01 : 0x00) | (b1 ? 0x02 : 0x00) | (b2 ? 0x04 : 0x00) | - (b3 ? 0x08 : 0x00) | (b4 ? 0x10 : 0x00) | (b5 ? 0x20 : 0x00) | - (b6 ? 0x40 : 0x00) | (b7 ? 0x80 : 0x00)) - : 0xffff; - - constexpr static int value = int((operation << 8) | byte) - 8192; - constexpr static bool valid = operation != 0xffff; -}; -} // namespace vldi - -// Uncomment when running under QEMU affected -// by bug https://gitlab.com/qemu-project/qemu/-/issues/2865 -// Versions <= 9.2.2 are affected, likely anything newer is correct. -#ifndef QEMU_VLDI_BUG -// #define QEMU_VLDI_BUG 1 -#endif - -#ifdef QEMU_VLDI_BUG - #define lsx_splat_u16(v) __lsx_vreplgr2vr_h(v) - #define lsx_splat_u32(v) __lsx_vreplgr2vr_w(v) -#else -template constexpr __m128i lsx_splat_u16_aux() { - constexpr bool is_imm10 = (int16_t(x) < 512) && (int16_t(x) > -512); - constexpr uint16_t imm10 = is_imm10 ? x : 0; - constexpr bool is_vldi = vldi::const_u16::valid; - constexpr int vldi_imm = is_vldi ? vldi::const_u16::value : 0; - - return is_imm10 ? __lsx_vrepli_h(int16_t(imm10)) - : is_vldi ? __lsx_vldi(vldi_imm) - : __lsx_vreplgr2vr_h(x); -} - -template constexpr __m128i lsx_splat_u32_aux() { - constexpr bool is_imm10 = (int32_t(x) < 512) && (int32_t(x) > -512); - constexpr uint32_t imm10 = is_imm10 ? x : 0; - constexpr bool is_vldi = vldi::const_u32::valid; - constexpr int vldi_imm = is_vldi ? vldi::const_u32::value : 0; - - return is_imm10 ? __lsx_vrepli_w(int32_t(imm10)) - : is_vldi ? __lsx_vldi(vldi_imm) - : __lsx_vreplgr2vr_w(x); -} - - #define lsx_splat_u16(v) lsx_splat_u16_aux<(v)>() - #define lsx_splat_u32(v) lsx_splat_u32_aux<(v)>() -#endif // QEMU_VLDI_BUG - -#endif // SIMDUTF_LSX_INTRINSICS_H -/* end file src/simdutf/lsx/intrinsics.h */ -/* begin file src/simdutf/lsx/bitmanipulation.h */ -#ifndef SIMDUTF_LSX_BITMANIPULATION_H -#define SIMDUTF_LSX_BITMANIPULATION_H - -#include - -namespace simdutf { -namespace lsx { -namespace { - -simdutf_really_inline int count_ones(uint64_t input_num) { - return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); -} - -#if SIMDUTF_NEED_TRAILING_ZEROES -// simdutf_really_inline int trailing_zeroes(uint64_t input_num) { -// return __builtin_ctzll(input_num); -// } -#endif - -} // unnamed namespace -} // namespace lsx -} // namespace simdutf - -#endif // SIMDUTF_LSX_BITMANIPULATION_H -/* end file src/simdutf/lsx/bitmanipulation.h */ -/* begin file src/simdutf/lsx/simd.h */ -#ifndef SIMDUTF_LSX_SIMD_H -#define SIMDUTF_LSX_SIMD_H - - -namespace simdutf { -namespace lsx { -namespace { -namespace simd { - -template struct simd8; - -// -// Base class of simd8 and simd8, both of which use __m128i -// internally. -// -template > struct base_u8 { - __m128i value; - static const int SIZE = sizeof(value); - - // Conversion from/to SIMD register - simdutf_really_inline base_u8(const __m128i _value) : value(_value) {} - simdutf_really_inline operator const __m128i &() const { return this->value; } - simdutf_really_inline operator __m128i &() { return this->value; } - - // Bit operations - simdutf_really_inline simd8 operator|(const simd8 other) const { - return __lsx_vor_v(this->value, other); - } - simdutf_really_inline simd8 operator&(const simd8 other) const { - return __lsx_vand_v(this->value, other); - } - simdutf_really_inline simd8 operator^(const simd8 other) const { - return __lsx_vxor_v(this->value, other); - } - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdutf_really_inline simd8 &operator|=(const simd8 other) { - auto this_cast = static_cast *>(this); - *this_cast = *this_cast | other; - return *this_cast; - } - - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return __lsx_vseq_b(lhs, rhs); - } - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(this->value, N), - __lsx_vbsrl_v(prev_chunk.value, 16 - N)); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base_u8 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - static simdutf_really_inline simd8 splat(bool _value) { - return __lsx_vreplgr2vr_b(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8(const __m128i _value) : base_u8(_value) {} - // False constructor - simdutf_really_inline simd8() : simd8(__lsx_vldi(0)) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : simd8(splat(_value)) {} - simdutf_really_inline void store(uint8_t dst[16]) const { - return __lsx_vst(this->value, dst, 0); - } - - simdutf_really_inline uint32_t to_bitmask() const { - return __lsx_vpickve2gr_wu(__lsx_vmsknz_b(*this), 0); - } -}; - -// Unsigned bytes -template <> struct simd8 : base_u8 { - static simdutf_really_inline simd8 splat(uint8_t _value) { - return __lsx_vreplgr2vr_b(_value); - } - static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } - static simdutf_really_inline simd8 load(const uint8_t *values) { - return __lsx_vld(values, 0); - } - simdutf_really_inline simd8(const __m128i _value) - : base_u8(_value) {} - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i)v16u8{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15}) {} - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdutf_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Store to array - simdutf_really_inline void store(uint8_t dst[16]) const { - return __lsx_vst(this->value, dst, 0); - } - - // Order-specific operations - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return __lsx_vsle_bu(other, *this); - } - simdutf_really_inline simd8 - operator>(const simd8 other) const { - return __lsx_vslt_bu(other, *this); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - value = __lsx_vsub_b(value, other.value); - return *this; - } - // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true - // = nonzero. For ARM, returns all 1's. - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return simd8(*this > other); - } - - // Bit-specific operations - simdutf_really_inline simd8 any_bits_set(simd8 bits) const { - return __lsx_vslt_bu(__lsx_vldi(0), __lsx_vand_v(this->value, bits)); - } - simdutf_really_inline bool is_ascii() const { - return __lsx_vpickve2gr_hu(__lsx_vmskgez_b(this->value), 0) == 0xFFFF; - } - - simdutf_really_inline bool any_bits_set_anywhere() const { - return __lsx_vpickve2gr_hu(__lsx_vmsknz_b(this->value), 0) > 0; - } - template simdutf_really_inline simd8 shr() const { - return __lsx_vsrli_b(this->value, N); - } - template simdutf_really_inline simd8 shl() const { - return __lsx_vslli_b(this->value, N); - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); - return __lsx_vshuf_b(__lsx_vldi(0), *this, simd8(original_tmp)); - } - - simdutf_really_inline uint64_t sum_bytes() const { - const auto sum_u16 = __lsx_vhaddw_hu_bu(value, value); - const auto sum_u32 = __lsx_vhaddw_wu_hu(sum_u16, sum_u16); - const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); - - return uint64_t(__lsx_vpickve2gr_du(sum_u64, 0)) + - uint64_t(__lsx_vpickve2gr_du(sum_u64, 1)); - } -}; - -// Signed bytes -template <> struct simd8 { - __m128i value; - - static const int SIZE = sizeof(value); - - static simdutf_really_inline simd8 splat(int8_t _value) { - return __lsx_vreplgr2vr_b(_value); - } - static simdutf_really_inline simd8 zero() { return __lsx_vldi(0); } - static simdutf_really_inline simd8 load(const int8_t values[16]) { - return __lsx_vld(values, 0); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *p) const { - __m128i zero = __lsx_vldi(0); - if (match_system(big_endian)) { - __lsx_vst(__lsx_vilvl_b(zero, (__m128i)this->value), - reinterpret_cast(p), 0); - __lsx_vst(__lsx_vilvh_b(zero, (__m128i)this->value), - reinterpret_cast(p + 8), 0); - } else { - __lsx_vst(__lsx_vilvl_b((__m128i)this->value, zero), - reinterpret_cast(p), 0); - __lsx_vst(__lsx_vilvh_b((__m128i)this->value, zero), - reinterpret_cast(p + 8), 0); - } - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *p) const { - __m128i zero = __lsx_vldi(0); - __m128i in16low = __lsx_vilvl_b(zero, (__m128i)this->value); - __m128i in16high = __lsx_vilvh_b(zero, (__m128i)this->value); - __m128i in32_0 = __lsx_vilvl_h(zero, in16low); - __m128i in32_1 = __lsx_vilvh_h(zero, in16low); - __m128i in32_2 = __lsx_vilvl_h(zero, in16high); - __m128i in32_3 = __lsx_vilvh_h(zero, in16high); - __lsx_vst(in32_0, reinterpret_cast(p), 0); - __lsx_vst(in32_1, reinterpret_cast(p + 4), 0); - __lsx_vst(in32_2, reinterpret_cast(p + 8), 0); - __lsx_vst(in32_3, reinterpret_cast(p + 12), 0); - } - - // In places where the table can be reused, which is most uses in simdutf, it - // is worth it to do 4 table lookups, as there is no direct zero extension - // from u8 to u32. - simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t *p) const { - const simd8 tb1{0, 255, 255, 255, 1, 255, 255, 255, - 2, 255, 255, 255, 3, 255, 255, 255}; - const simd8 tb2{4, 255, 255, 255, 5, 255, 255, 255, - 6, 255, 255, 255, 7, 255, 255, 255}; - const simd8 tb3{8, 255, 255, 255, 9, 255, 255, 255, - 10, 255, 255, 255, 11, 255, 255, 255}; - const simd8 tb4{12, 255, 255, 255, 13, 255, 255, 255, - 14, 255, 255, 255, 15, 255, 255, 255}; - - // encourage store pairing and interleaving - const auto shuf1 = this->apply_lookup_16_to(tb1); - const auto shuf2 = this->apply_lookup_16_to(tb2); - shuf1.store(reinterpret_cast(p)); - shuf2.store(reinterpret_cast(p + 4)); - - const auto shuf3 = this->apply_lookup_16_to(tb3); - const auto shuf4 = this->apply_lookup_16_to(tb4); - shuf3.store(reinterpret_cast(p + 8)); - shuf4.store(reinterpret_cast(p + 12)); - } - // Conversion from/to SIMD register - simdutf_really_inline simd8(const __m128i _value) : value(_value) {} - - // Zero constructor - simdutf_really_inline simd8() : simd8(zero()) {} - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - - // Store to array - simdutf_really_inline void store(int8_t dst[16]) const { - return __lsx_vst(value, dst, 0); - } - - simdutf_really_inline operator simd8() const { - return ((__m128i)this->value); - } - - simdutf_really_inline simd8 - operator|(const simd8 other) const { - return __lsx_vor_v((__m128i)value, (__m128i)other.value); - } - - simdutf_really_inline bool is_ascii() const { - return (__lsx_vpickve2gr_hu(__lsx_vmskgez_b((__m128i)this->value), 0) == - 0xffff); - } - - // Order-sensitive comparisons - simdutf_really_inline simd8 operator>(const simd8 other) const { - return __lsx_vslt_b((__m128i)other.value, (__m128i)value); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return __lsx_vslt_b((__m128i)value, (__m128i)other.value); - } - - template - simdutf_really_inline simd8 - prev(const simd8 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(this->value, N), - __lsx_vbsrl_v(prev_chunk.value, 16 - N)); - } - - template - simdutf_really_inline simd8 - apply_lookup_16_to(const simd8 original) const { - __m128i original_tmp = __lsx_vand_v(original, __lsx_vldi(0x1f)); - return __lsx_vshuf_b(__lsx_vldi(0), (__m128i)this->value, - simd8(original_tmp)); - } -}; - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert( - NUM_CHUNKS == 4, - "LoongArch kernel should use four registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 2 * sizeof(simd8) / sizeof(T)), - simd8::load(ptr + 3 * sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd8) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd8) * 3 / sizeof(T)); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - this->chunks[2] |= other.chunks[2]; - this->chunks[3] |= other.chunks[3]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdutf_really_inline bool is_ascii() const { return reduce_or().is_ascii(); } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - this->chunks[2].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 2); - this->chunks[3].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 3); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 1); - this->chunks[2].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 2); - this->chunks[3].store_ascii_as_utf32_tbl(ptr + sizeof(simd8) * 3); - } - - simdutf_really_inline uint64_t to_bitmask() const { - __m128i mask = __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[3]), 6); - mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[2]), 4)); - mask = __lsx_vor_v(mask, __lsx_vbsll_v(__lsx_vmsknz_b(this->chunks[1]), 2)); - mask = __lsx_vor_v(mask, __lsx_vmsknz_b(this->chunks[0])); - return __lsx_vpickve2gr_du(mask, 0); - } - - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask, - this->chunks[2] < mask, this->chunks[3] < mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask, - this->chunks[2] > mask, this->chunks[3] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(simd8(this->chunks[0].value) >= mask, - simd8(this->chunks[1].value) >= mask, - simd8(this->chunks[2].value) >= mask, - simd8(this->chunks[3].value) >= mask) - .to_bitmask(); - } -}; // struct simd8x64 - -/* begin file src/simdutf/lsx/simd16-inl.h */ -template struct simd16; - -template > struct base_u16 { - __m128i value; - static const size_t SIZE = sizeof(value); - static const size_t ELEMENTS = sizeof(value) / sizeof(T); - - // Conversion from/to SIMD register - simdutf_really_inline base_u16() = default; - simdutf_really_inline base_u16(const __m128i _value) : value(_value) {} - // Bit operations - simdutf_really_inline simd16 operator|(const simd16 other) const { - return __lsx_vor_v(this->value, other.value); - } - simdutf_really_inline simd16 operator&(const simd16 other) const { - return __lsx_vand_v(this->value, other.value); - } - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } - - friend simdutf_really_inline Mask operator==(const simd16 lhs, - const simd16 rhs) { - return __lsx_vseq_h(lhs.value, rhs.value); - } - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), - __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); - } -}; - -template > -struct base16 : base_u16 { - simdutf_really_inline base16() : base_u16() {} - simdutf_really_inline base16(const __m128i _value) : base_u16(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) - : base16(__lsx_vld(ptr, 0)) {} - - static const int SIZE = sizeof(base_u16::value); - - template - simdutf_really_inline simd16 prev(const simd16 prev_chunk) const { - return __lsx_vor_v(__lsx_vbsll_v(*this, N * 2), - __lsx_vbsrl_v(prev_chunk, 16 - N * 2)); - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return __lsx_vreplgr2vr_h(uint16_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const __m128i _value) : base16(_value) {} -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return __lsx_vreplgr2vr_h(_value); - } - static simdutf_really_inline simd16 zero() { return __lsx_vldi(0); } - - template - static simdutf_really_inline simd16 load(const Pointer values) { - return __lsx_vld(values, 0); - } - - simdutf_really_inline base16_numeric(const __m128i _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return __lsx_vst(this->value, dst, 0); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFu; } -}; - -// Unsigned code unitstemplate<> -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16(const __m128i _value) - : base16_numeric((__m128i)_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - // Copy constructor - simdutf_really_inline simd16(const simd16 mask) : simd16(mask.value) {} - - // Order-specific operations - simdutf_really_inline simd16 &operator+=(const simd16 other) { - value = __lsx_vadd_h(value, other.value); - return *this; - } - - template - static simdutf_really_inline simd8 - pack_shifted_right(const simd16 &v0, const simd16 &v1) { - return __lsx_vssrlni_bu_h(v1.value, v0.value, N); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - return pack_shifted_right<0>(v0, v1); - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return __lsx_vshuf4i_b(this->value, 0b10110001); - } - - simdutf_really_inline uint64_t sum() const { - const auto sum_u32 = __lsx_vhaddw_wu_hu(value, value); - const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); - - return uint64_t(__lsx_vpickve2gr_du(sum_u64, 0)) + - uint64_t(__lsx_vpickve2gr_du(sum_u64, 1)); - } -}; - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert( - NUM_CHUNKS == 4, - "LOONGARCH kernel should use four registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline - simd16x32(const simd16 chunk0, const simd16 chunk1, - const simd16 chunk2, const simd16 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 2 * sizeof(simd16) / sizeof(T)), - simd16::load(ptr + 3 * sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - this->chunks[2].store(ptr + sizeof(simd16) * 2 / sizeof(T)); - this->chunks[3].store(ptr + sizeof(simd16) * 3 / sizeof(T)); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - this->chunks[2] = this->chunks[2].swap_bytes(); - this->chunks[3] = this->chunks[3].swap_bytes(); - } -}; // struct simd16x32 - -simdutf_really_inline simd16 operator^(const simd16 a, - uint16_t b) { - const auto bv = __lsx_vreplgr2vr_h(b); - return __lsx_vxor_v(a.value, bv); -} - -simdutf_really_inline simd16 min(const simd16 a, - const simd16 b) { - return __lsx_vmin_hu(a.value, b.value); -} -/* end file src/simdutf/lsx/simd16-inl.h */ -/* begin file src/simdutf/lsx/simd32-inl.h */ -template struct simd32; - -template <> struct simd32 { - __m128i value; - static const int SIZE = sizeof(value); - static const int ELEMENTS = SIZE / sizeof(uint32_t); - - // constructors - simdutf_really_inline simd32(__m128i v) : value(v) {} - - template - simdutf_really_inline simd32(Ptr *ptr) : value(__lsx_vld(ptr, 0)) {} - - // in-place operators - simdutf_really_inline simd32 &operator-=(const simd32 other) { - value = __lsx_vsub_w(value, other.value); - return *this; - } - - // members - simdutf_really_inline uint64_t sum() const { - return uint64_t(__lsx_vpickve2gr_wu(value, 0)) + - uint64_t(__lsx_vpickve2gr_wu(value, 1)) + - uint64_t(__lsx_vpickve2gr_wu(value, 2)) + - uint64_t(__lsx_vpickve2gr_wu(value, 3)); - } - - // static members - static simdutf_really_inline simd32 splat(uint32_t x) { - return __lsx_vreplgr2vr_w(x); - } - - static simdutf_really_inline simd32 zero() { - return __lsx_vrepli_w(0); - } -}; - -// ------------------------------------------------------------ - -template <> struct simd32 { - __m128i value; - static const int SIZE = sizeof(value); - - // constructors - simdutf_really_inline simd32(__m128i v) : value(v) {} -}; - -// ------------------------------------------------------------ - -simdutf_really_inline simd32 operator&(const simd32 a, - const simd32 b) { - return __lsx_vor_v(a.value, b.value); -} - -simdutf_really_inline simd32 operator<(const simd32 a, - const simd32 b) { - return __lsx_vslt_wu(a.value, b.value); -} - -simdutf_really_inline simd32 operator>(const simd32 a, - const simd32 b) { - return __lsx_vslt_wu(b.value, a.value); -} - -// ------------------------------------------------------------ - -simdutf_really_inline simd32 as_vector_u32(const simd32 v) { - return v.value; -} -/* end file src/simdutf/lsx/simd32-inl.h */ -/* begin file src/simdutf/lsx/simd64-inl.h */ -template struct simd64; - -template <> struct simd64 { - __m128i value; - static const int SIZE = sizeof(value); - static const int ELEMENTS = SIZE / sizeof(uint64_t); - - // constructors - simdutf_really_inline simd64(__m128i v) : value(v) {} - - template - simdutf_really_inline simd64(Ptr *ptr) : value(__lsx_vld(ptr, 0)) {} - - // in-place operators - simdutf_really_inline simd64 &operator+=(const simd64 other) { - value = __lsx_vadd_d(value, other.value); - return *this; - } - - // members - simdutf_really_inline uint64_t sum() const { - return uint64_t(__lsx_vpickve2gr_du(value, 0)) + - uint64_t(__lsx_vpickve2gr_du(value, 1)); - } - - // static members - static simdutf_really_inline simd64 zero() { - return __lsx_vrepli_d(0); - } -}; - -// ------------------------------------------------------------ - -template <> struct simd64 { - __m128i value; - static const int SIZE = sizeof(value); - - // constructors - simdutf_really_inline simd64(__m128i v) : value(v) {} -}; - -// ------------------------------------------------------------ - -simd64 sum_8bytes(const simd8 v) { - const auto sum_u16 = __lsx_vhaddw_hu_bu(v, v); - const auto sum_u32 = __lsx_vhaddw_wu_hu(sum_u16, sum_u16); - const auto sum_u64 = __lsx_vhaddw_du_wu(sum_u32, sum_u32); - - return simd64(sum_u64); -} -/* end file src/simdutf/lsx/simd64-inl.h */ - -} // namespace simd -} // unnamed namespace -} // namespace lsx -} // namespace simdutf - -#endif // SIMDUTF_LSX_SIMD_H -/* end file src/simdutf/lsx/simd.h */ - -/* begin file src/simdutf/lsx/end.h */ -#undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP -/* end file src/simdutf/lsx/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_LSX - -#endif // SIMDUTF_LSX_H -/* end file src/simdutf/lsx.h */ -/* begin file src/simdutf/lasx.h */ -#ifndef SIMDUTF_LASX_H -#define SIMDUTF_LASX_H - -#ifdef SIMDUTF_FALLBACK_H - #error "lasx.h must be included before fallback.h" -#endif - - -#ifndef SIMDUTF_IMPLEMENTATION_LASX - #define SIMDUTF_IMPLEMENTATION_LASX (SIMDUTF_IS_LASX) -#endif -#if SIMDUTF_IMPLEMENTATION_LASX && SIMDUTF_IS_LASX - #define SIMDUTF_CAN_ALWAYS_RUN_LASX 1 -#else - #define SIMDUTF_CAN_ALWAYS_RUN_LASX 0 -#endif - -#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) - -#if SIMDUTF_IMPLEMENTATION_LASX - -namespace simdutf { -/** - * Implementation for LoongArch ASX. - */ -namespace lasx {} // namespace lasx -} // namespace simdutf - -/* begin file src/simdutf/lasx/implementation.h */ -#ifndef SIMDUTF_LASX_IMPLEMENTATION_H -#define SIMDUTF_LASX_IMPLEMENTATION_H - - -namespace simdutf { -namespace lasx { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("lasx", "LOONGARCH ASX", - internal::instruction_set::LSX | - internal::instruction_set::LASX) {} -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 -}; - -} // namespace lasx -} // namespace simdutf - -#endif // SIMDUTF_LASX_IMPLEMENTATION_H -/* end file src/simdutf/lasx/implementation.h */ - -/* begin file src/simdutf/lasx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lasx" -// #define SIMDUTF_IMPLEMENTATION lasx -#define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 -/* end file src/simdutf/lasx/begin.h */ - - // Declarations -/* begin file src/simdutf/lasx/intrinsics.h */ -#ifndef SIMDUTF_LASX_INTRINSICS_H -#define SIMDUTF_LASX_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include -#include - -#if defined(__loongarch_asx) - #ifdef __clang__ - #define VREGS_PREFIX "$vr" - #define XREGS_PREFIX "$xr" - #else // GCC - #define VREGS_PREFIX "$f" - #define XREGS_PREFIX "$f" - #endif - #define __ALL_REGS \ - "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26," \ - "27,28,29,30,31" -// Convert __m128i to __m256i -static inline __m256i ____m256i(__m128i in) { - __m256i out = __lasx_xvldi(0); - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " XREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " VREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - : [out] "+f"(out) - : [in] "f"(in)); - return out; -} -// Convert two __m128i to __m256i -static inline __m256i lasx_set_q(__m128i inhi, __m128i inlo) { - __m256i out; - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[hi], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[lo], " VREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x20 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".ifnc %[out], %[hi] \n\t" - ".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " XREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[hi], " VREGS_PREFIX "\\j \n\t" - " xvori.b $xr\\i, $xr\\j, 0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".endif \n\t" - : [out] "=f"(out), [hi] "+f"(inhi) - : [lo] "f"(inlo)); - return out; -} -// Convert __m256i low part to __m128i -static inline __m128i lasx_extracti128_lo(__m256i in) { - __m128i out; - __asm__ volatile(".ifnc %[out], %[in] \n\t" - ".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " XREGS_PREFIX "\\j \n\t" - " vori.b $vr\\i, $vr\\j, 0 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - ".endif \n\t" - : [out] "=f"(out) - : [in] "f"(in)); - return out; -} -// Convert __m256i high part to __m128i -static inline __m128i lasx_extracti128_hi(__m256i in) { - __m128i out; - __asm__ volatile(".irp i," __ALL_REGS "\n\t" - " .ifc %[out], " VREGS_PREFIX "\\i \n\t" - " .irp j," __ALL_REGS "\n\t" - " .ifc %[in], " XREGS_PREFIX "\\j \n\t" - " xvpermi.q $xr\\i, $xr\\j, 0x11 \n\t" - " .endif \n\t" - " .endr \n\t" - " .endif \n\t" - ".endr \n\t" - : [out] "=f"(out) - : [in] "f"(in)); - return out; -} -#endif - -/* -Encoding of argument for LoongArch64 xvldi instruction. See: -https://jia.je/unofficial-loongarch-intrinsics-guide/lasx/misc/#__m256i-__lasx_xvldi-imm_n1024_1023-imm - -1: imm[12:8]=0b10000: broadcast imm[7:0] as 32-bit elements to all lanes - -2: imm[12:8]=0b10001: broadcast imm[7:0] << 8 as 32-bit elements to all lanes - -3: imm[12:8]=0b10010: broadcast imm[7:0] << 16 as 32-bit elements to all lanes - -4: imm[12:8]=0b10011: broadcast imm[7:0] << 24 as 32-bit elements to all lanes - -5: imm[12:8]=0b10100: broadcast imm[7:0] as 16-bit elements to all lanes - -6: imm[12:8]=0b10101: broadcast imm[7:0] << 8 as 16-bit elements to all lanes - -7: imm[12:8]=0b10110: broadcast (imm[7:0] << 8) | 0xFF as 32-bit elements to all -lanes - -8: imm[12:8]=0b10111: broadcast (imm[7:0] << 16) | 0xFFFF as 32-bit elements to -all lanes - -9: imm[12:8]=0b11000: broadcast imm[7:0] as 8-bit elements to all lanes - -10: imm[12:8]=0b11001: repeat each bit of imm[7:0] eight times, and broadcast -the result as 64-bit elements to all lanes -*/ - -namespace lasx_vldi { - -template class const_u16 { - constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); - constexpr static const uint8_t b1 = ((v >> 1 * 8) & 0xff); - - constexpr static bool is_case5 = uint16_t(b0) == v; - constexpr static bool is_case6 = (uint16_t(b1) << 8) == v; - constexpr static bool is_case9 = (b0 == b1); - constexpr static bool is_case10 = - ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)); - -public: - constexpr static uint16_t operation = is_case5 ? 0b10100 - : is_case6 ? 0b10101 - : is_case9 ? 0b11000 - : is_case10 ? 0x11001 - : 0xffff; - - constexpr static uint16_t byte = - is_case5 ? b0 - : is_case6 ? b1 - : is_case9 ? b0 - : is_case10 ? ((b0 ? 0x55 : 0x00) | (b1 ? 0xaa : 0x00)) - : 0xffff; - - constexpr static int value = int((operation << 8) | byte) - 8192; - constexpr static bool valid = operation != 0xffff; -}; - -template class const_u32 { - constexpr static const uint8_t b0 = (v & 0xff); - constexpr static const uint8_t b1 = ((v >> 8) & 0xff); - constexpr static const uint8_t b2 = ((v >> 16) & 0xff); - constexpr static const uint8_t b3 = ((v >> 24) & 0xff); - - constexpr static bool is_case1 = (uint32_t(b0) == v); - constexpr static bool is_case2 = ((uint32_t(b1) << 8) == v); - constexpr static bool is_case3 = ((uint32_t(b2) << 16) == v); - constexpr static bool is_case4 = ((uint32_t(b3) << 24) == v); - constexpr static bool is_case5 = (b0 == b2) && (b1 == 0) && (b3 == 0); - constexpr static bool is_case6 = (b1 == b3) && (b0 == 0) && (b2 == 0); - constexpr static bool is_case7 = (b3 == 0) && (b2 == 0) && (b0 == 0xff); - constexpr static bool is_case8 = (b3 == 0) && (b1 == 0xff) && (b0 == 0xff); - constexpr static bool is_case9 = (b0 == b1) && (b0 == b2) && (b0 == b3); - constexpr static bool is_case10 = - ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)) && - ((b2 == 0xff) || (b2 == 0x00)) && ((b3 == 0xff) || (b3 == 0x00)); - -public: - constexpr static uint16_t operation = is_case1 ? 0b10000 - : is_case2 ? 0b10001 - : is_case3 ? 0b10010 - : is_case4 ? 0b10011 - : is_case5 ? 0b10100 - : is_case6 ? 0b10101 - : is_case7 ? 0b10110 - : is_case8 ? 0b10111 - : is_case9 ? 0b11000 - : is_case10 ? 0b11001 - : 0xffff; - - constexpr static uint16_t byte = - is_case1 ? b0 - : is_case2 ? b1 - : is_case3 ? b2 - : is_case4 ? b3 - : is_case5 ? b0 - : is_case6 ? b1 - : is_case7 ? b1 - : is_case8 ? b2 - : is_case9 ? b0 - : is_case10 ? ((b0 ? 0x11 : 0x00) | (b1 ? 0x22 : 0x00) | - (b2 ? 0x44 : 0x00) | (b3 ? 0x88 : 0x00)) - : 0xffff; - - constexpr static int value = int((operation << 8) | byte) - 8192; - constexpr static bool valid = operation != 0xffff; -}; - -template class const_u64 { - constexpr static const uint8_t b0 = ((v >> 0 * 8) & 0xff); - constexpr static const uint8_t b1 = ((v >> 1 * 8) & 0xff); - constexpr static const uint8_t b2 = ((v >> 2 * 8) & 0xff); - constexpr static const uint8_t b3 = ((v >> 3 * 8) & 0xff); - constexpr static const uint8_t b4 = ((v >> 4 * 8) & 0xff); - constexpr static const uint8_t b5 = ((v >> 5 * 8) & 0xff); - constexpr static const uint8_t b6 = ((v >> 6 * 8) & 0xff); - constexpr static const uint8_t b7 = ((v >> 7 * 8) & 0xff); - - constexpr static bool is_case10 = - ((b0 == 0xff) || (b0 == 0x00)) && ((b1 == 0xff) || (b1 == 0x00)) && - ((b2 == 0xff) || (b2 == 0x00)) && ((b3 == 0xff) || (b3 == 0x00)) && - ((b4 == 0xff) || (b4 == 0x00)) && ((b5 == 0xff) || (b5 == 0x00)) && - ((b6 == 0xff) || (b6 == 0x00)) && ((b7 == 0xff) || (b7 == 0x00)); - -public: - constexpr static bool is_32bit = - ((v & 0xffffffff) == (v >> 32)) && const_u32<(v >> 32)>::value; - constexpr static uint8_t op_32bit = const_u32<(v >> 32)>::operation; - constexpr static uint8_t byte_32bit = const_u32<(v >> 32)>::byte; - - constexpr static uint16_t operation = is_32bit ? op_32bit - : is_case10 ? 0x11001 - : 0xffff; - - constexpr static uint16_t byte = - is_32bit ? byte_32bit - : is_case10 - ? ((b0 ? 0x01 : 0x00) | (b1 ? 0x02 : 0x00) | (b2 ? 0x04 : 0x00) | - (b3 ? 0x08 : 0x00) | (b4 ? 0x10 : 0x00) | (b5 ? 0x20 : 0x00) | - (b6 ? 0x40 : 0x00) | (b7 ? 0x80 : 0x00)) - : 0xffff; - - constexpr static int value = int((operation << 8) | byte) - 8192; - constexpr static bool valid = operation != 0xffff; -}; - -} // namespace lasx_vldi - -// Uncomment when running under QEMU affected -// by bug https://gitlab.com/qemu-project/qemu/-/issues/2865 -// Versions <= 9.2.2 are affected, likely anything newer is correct. -#ifndef QEMU_VLDI_BUG -// #define QEMU_VLDI_BUG 1 -#endif - -#ifdef QEMU_VLDI_BUG - #define lasx_splat_u16(v) __lasx_xvreplgr2vr_h(v) - #define lasx_splat_u32(v) __lasx_xvreplgr2vr_w(v) -#else -template constexpr __m256i lasx_splat_u16_aux() { - constexpr bool is_imm10 = (int16_t(x) < 512) && (int16_t(x) > -512); - constexpr uint16_t imm10 = is_imm10 ? x : 0; - constexpr bool is_vldi = lasx_vldi::const_u16::valid; - constexpr int vldi_imm = is_vldi ? lasx_vldi::const_u16::value : 0; - - return is_imm10 ? __lasx_xvrepli_h(int16_t(imm10)) - : is_vldi ? __lasx_xvldi(vldi_imm) - : __lasx_xvreplgr2vr_h(x); -} - -template constexpr __m256i lasx_splat_u32_aux() { - constexpr bool is_imm10 = (int32_t(x) < 512) && (int32_t(x) > -512); - constexpr uint32_t imm10 = is_imm10 ? x : 0; - constexpr bool is_vldi = lasx_vldi::const_u32::valid; - constexpr int vldi_imm = is_vldi ? lasx_vldi::const_u32::value : 0; - - return is_imm10 ? __lasx_xvrepli_w(int32_t(imm10)) - : is_vldi ? __lasx_xvldi(vldi_imm) - : __lasx_xvreplgr2vr_w(x); -} - - #define lasx_splat_u16(v) lasx_splat_u16_aux<(v)>() - #define lasx_splat_u32(v) lasx_splat_u32_aux<(v)>() -#endif // QEMU_VLDI_BUG - -#endif // SIMDUTF_LASX_INTRINSICS_H -/* end file src/simdutf/lasx/intrinsics.h */ -/* begin file src/simdutf/lasx/bitmanipulation.h */ -#ifndef SIMDUTF_LASX_BITMANIPULATION_H -#define SIMDUTF_LASX_BITMANIPULATION_H - -#include - -namespace simdutf { -namespace lasx { -namespace { - -simdutf_really_inline int count_ones(uint64_t input_num) { - return __lsx_vpickve2gr_w(__lsx_vpcnt_d(__lsx_vreplgr2vr_d(input_num)), 0); -} - -#if SIMDUTF_NEED_TRAILING_ZEROES -// simdutf_really_inline int trailing_zeroes(uint64_t input_num) { -// return __builtin_ctzll(input_num); -// } -#endif - -} // unnamed namespace -} // namespace lasx -} // namespace simdutf - -#endif // SIMDUTF_LASX_BITMANIPULATION_H -/* end file src/simdutf/lasx/bitmanipulation.h */ -/* begin file src/simdutf/lasx/simd.h */ -#ifndef SIMDUTF_LASX_SIMD_H -#define SIMDUTF_LASX_SIMD_H - - -namespace simdutf { -namespace lasx { -namespace { -namespace simd { - -__attribute__((aligned(32))) static const uint8_t prev_shuf_table[32][32] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, - {0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}, - {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, - {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, - 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, - 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0}, - {15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0}, - {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0}, - {6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0}, - {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0}, - {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0}, - {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0}, - {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, -}; - -__attribute__((aligned(32))) static const uint8_t bitsel_mask_table[32][32] = { - {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x0}, - {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0}}; - -// Forward-declared so they can be used by splat and friends. -template struct base { - __m256i value; - - // Zero constructor - simdutf_really_inline base() : value{__m256i()} {} - - // Conversion from SIMD register - simdutf_really_inline base(const __m256i _value) : value(_value) {} - // Conversion to SIMD register - simdutf_really_inline operator const __m256i &() const { return this->value; } - simdutf_really_inline operator __m256i &() { return this->value; } - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - if (big_endian) { - __m256i zero = __lasx_xvldi(0); - __m256i in8 = __lasx_xvpermi_d(this->value, 0b11011000); - __m256i inlow = __lasx_xvilvl_b(in8, zero); - __m256i inhigh = __lasx_xvilvh_b(in8, zero); - __lasx_xvst(inlow, reinterpret_cast(ptr), 0); - __lasx_xvst(inhigh, reinterpret_cast(ptr), 32); - } else { - __m256i inlow = __lasx_vext2xv_hu_bu(this->value); - __m256i inhigh = __lasx_vext2xv_hu_bu( - __lasx_xvpermi_q(this->value, this->value, 0b00000001)); - __lasx_xvst(inlow, reinterpret_cast<__m256i *>(ptr), 0); - __lasx_xvst(inhigh, reinterpret_cast<__m256i *>(ptr), 32); - } - } - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - __m256i in32_0 = __lasx_vext2xv_wu_bu(this->value); - __lasx_xvst(in32_0, reinterpret_cast(ptr), 0); - - __m256i in8_1 = __lasx_xvpermi_d(this->value, 0b00000001); - __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); - __lasx_xvst(in32_1, reinterpret_cast(ptr), 32); - - __m256i in8_2 = __lasx_xvpermi_d(this->value, 0b00000010); - __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); - __lasx_xvst(in32_2, reinterpret_cast(ptr), 64); - - __m256i in8_3 = __lasx_xvpermi_d(this->value, 0b00000011); - __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); - __lasx_xvst(in32_3, reinterpret_cast(ptr), 96); - } - // Bit operations - simdutf_really_inline Child operator|(const Child other) const { - return __lasx_xvor_v(this->value, other); - } - simdutf_really_inline Child operator&(const Child other) const { - return __lasx_xvand_v(this->value, other); - } - simdutf_really_inline Child operator^(const Child other) const { - return __lasx_xvxor_v(this->value, other); - } - simdutf_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } -}; - -template struct simd8; - -template > -struct base8 : base> { - simdutf_really_inline base8() : base>() {} - simdutf_really_inline base8(const __m256i _value) : base>(_value) {} - friend simdutf_really_inline Mask operator==(const simd8 lhs, - const simd8 rhs) { - return __lasx_xvseq_b(lhs, rhs); - } - - static const int SIZE = sizeof(base::value); - - template - simdutf_really_inline simd8 prev(const simd8 prev_chunk) const { - static_assert(N <= 16, "unsupported shift value"); - - if (!N) - return this->value; - - __m256i zero = __lasx_xvldi(0); - __m256i result, shuf; - if (N < 16) { - shuf = __lasx_xvld(prev_shuf_table[N], 0); - - result = __lasx_xvshuf_b( - __lasx_xvpermi_q(this->value, this->value, 0b00000001), this->value, - shuf); - __m256i srl_prev = __lasx_xvbsrl_v( - __lasx_xvpermi_q(zero, prev_chunk.value, 0b00110001), (16 - N)); - __m256i mask = __lasx_xvld(bitsel_mask_table[N], 0); - result = __lasx_xvbitsel_v(result, srl_prev, mask); - - return result; - } else if (N == 16) { - return __lasx_xvpermi_q(this->value, prev_chunk.value, 0b00100001); - } - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdutf_really_inline simd8 splat(bool _value) { - return __lasx_xvreplgr2vr_b(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd8() : base8() {} - simdutf_really_inline simd8(const __m256i _value) : base8(_value) {} - // Splat constructor - simdutf_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdutf_really_inline uint32_t to_bitmask() const { - __m256i mask = __lasx_xvmsknz_b(this->value); - uint32_t mask0 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t mask1 = __lasx_xvpickve2gr_wu(mask, 4); - return (mask0 | (mask1 << 16)); - } - simdutf_really_inline bool any() const { - if (__lasx_xbz_b(this->value)) - return false; - return true; - } - simdutf_really_inline simd8 operator~() const { return *this ^ true; } -}; - -template struct base8_numeric : base8 { - static simdutf_really_inline simd8 splat(T _value) { - return __lasx_xvreplgr2vr_b(_value); - } - static simdutf_really_inline simd8 zero() { return __lasx_xvldi(0); } - static simdutf_really_inline simd8 load(const T values[32]) { - return __lasx_xvld(reinterpret_cast(values), 0); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdutf_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, - v12, v13, v14, v15); - } - - simdutf_really_inline base8_numeric() : base8() {} - simdutf_really_inline base8_numeric(const __m256i _value) - : base8(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[32]) const { - return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); - } - - // Override to distinguish from bool version - simdutf_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdutf_really_inline simd8 lookup_16(simd8 lookup_table) const { - __m256i origin = __lasx_xvand_v(this->value, __lasx_xvldi(0x1f)); - return __lasx_xvshuf_b(__lasx_xvldi(0), lookup_table, origin); - } - - template - simdutf_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - simdutf_really_inline operator simd8() const; - simdutf_really_inline bool is_ascii() const { - __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); - if (__lasx_xbnz_v(ascii_mask)) - return false; - return true; - } - // Order-sensitive comparisons - simdutf_really_inline simd8 operator>(const simd8 other) const { - return __lasx_xvslt_b(other, this->value); - } - simdutf_really_inline simd8 operator<(const simd8 other) const { - return __lasx_xvslt_b(this->value, other); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdutf_really_inline simd8() : base8_numeric() {} - simdutf_really_inline simd8(const __m256i _value) - : base8_numeric(_value) {} - // Splat constructor - simdutf_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdutf_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdutf_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, - uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25, - uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, - uint8_t v31) - : simd8((__m256i)v32u8{v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, v15, - v16, v17, v18, v19, v20, v21, v22, v23, - v24, v25, v26, v27, v28, v29, v30, v31}) {} - - // Saturated math - simdutf_really_inline simd8 - saturating_sub(const simd8 other) const { - return __lasx_xvssub_bu(this->value, other); - } - - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdutf_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - simdutf_really_inline simd8 - operator>=(const simd8 other) const { - return __lasx_xvsle_bu(other, *this); - } - simdutf_really_inline simd8 &operator-=(const simd8 other) { - value = __lasx_xvsub_b(value, other.value); - return *this; - } - - // Bit-specific operations - simdutf_really_inline bool is_ascii() const { - __m256i ascii_mask = __lasx_xvslti_b(this->value, 0); - if (__lasx_xbnz_v(ascii_mask)) - return false; - return true; - } - simdutf_really_inline bool any_bits_set_anywhere() const { - if (__lasx_xbnz_v(this->value)) - return true; - return false; - } - template simdutf_really_inline simd8 shr() const { - return __lasx_xvsrli_b(this->value, N); - } - template simdutf_really_inline simd8 shl() const { - return __lasx_xvslli_b(this->value, N); - } - - simdutf_really_inline uint64_t sum_bytes() const { - const auto sum_u16 = __lasx_xvhaddw_hu_bu(value, value); - const auto sum_u32 = __lasx_xvhaddw_wu_hu(sum_u16, sum_u16); - const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); - - return uint64_t(__lasx_xvpickve2gr_du(sum_u64, 0)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 1)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 2)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 3)); - } -}; -simdutf_really_inline simd8::operator simd8() const { - return this->value; -} - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, - "LASX kernel should use two registers per 64-byte block."); - simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8 other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdutf_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) - : chunks{chunk0, chunk1} {} - simdutf_really_inline simd8x64(const T *ptr) - : chunks{simd8::load(ptr), - simd8::load(ptr + sizeof(simd8) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd8) * 1 / sizeof(T)); - } - - simdutf_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdutf_really_inline simd8x64 &operator|=(const simd8x64 &other) { - this->chunks[0] |= other.chunks[0]; - this->chunks[1] |= other.chunks[1]; - return *this; - } - - simdutf_really_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdutf_really_inline bool is_ascii() const { - return this->reduce_or().is_ascii(); - } - - template - simdutf_really_inline void store_ascii_as_utf16(char16_t *ptr) const { - this->chunks[0].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 0); - this->chunks[1].template store_ascii_as_utf16(ptr + - sizeof(simd8) * 1); - } - - simdutf_really_inline void store_ascii_as_utf32(char32_t *ptr) const { - this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8) * 0); - this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8) * 1); - } - - simdutf_really_inline uint64_t lt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] < mask, this->chunks[1] < mask) - .to_bitmask(); - } - - simdutf_really_inline uint64_t gt(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] > mask, this->chunks[1] > mask) - .to_bitmask(); - } - simdutf_really_inline uint64_t gteq_unsigned(const uint8_t m) const { - const simd8 mask = simd8::splat(m); - return simd8x64((simd8(__m256i(this->chunks[0])) >= mask), - (simd8(__m256i(this->chunks[1])) >= mask)) - .to_bitmask(); - } -}; // struct simd8x64 - -/* begin file src/simdutf/lasx/simd16-inl.h */ -template struct simd16; - -template > -struct base16 : base> { - using bitmask_type = uint32_t; - - simdutf_really_inline base16() : base>() {} - simdutf_really_inline base16(const __m256i _value) - : base>(_value) {} - template - simdutf_really_inline base16(const Pointer *ptr) - : base16(__lasx_xvld(reinterpret_cast(ptr), 0)) {} - - /// the size of vector in bytes - static const int SIZE = sizeof(base>::value); - - /// the number of elements of type T a vector can hold - static const int ELEMENTS = SIZE / sizeof(T); -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd16 : base16 { - static simdutf_really_inline simd16 splat(bool _value) { - return __lasx_xvreplgr2vr_h(uint8_t(-(!!_value))); - } - - simdutf_really_inline simd16() : base16() {} - simdutf_really_inline simd16(const __m256i _value) : base16(_value) {} - // Splat constructor - simdutf_really_inline simd16(bool _value) : base16(splat(_value)) {} - - simdutf_really_inline bitmask_type to_bitmask() const { - __m256i mask = __lasx_xvmsknz_b(this->value); - bitmask_type mask0 = __lasx_xvpickve2gr_wu(mask, 0); - bitmask_type mask1 = __lasx_xvpickve2gr_wu(mask, 4); - return (mask0 | (mask1 << 16)); - } - simdutf_really_inline simd16 operator~() const { return *this ^ true; } -}; - -template struct base16_numeric : base16 { - static simdutf_really_inline simd16 splat(T _value) { - return __lasx_xvreplgr2vr_h((uint16_t)_value); - } - static simdutf_really_inline simd16 zero() { return __lasx_xvldi(0); } - template - static simdutf_really_inline simd16 load(const Pointer values) { - return __lasx_xvld(values, 0); - } - - simdutf_really_inline base16_numeric() : base16() {} - simdutf_really_inline base16_numeric(const __m256i _value) - : base16(_value) {} - - // Store to array - simdutf_really_inline void store(T dst[8]) const { - return __lasx_xvst(this->value, reinterpret_cast<__m256i *>(dst), 0); - } - - // Override to distinguish from bool version - simdutf_really_inline simd16 operator~() const { return *this ^ 0xFFFFu; } -}; - -// Unsigned code units -template <> struct simd16 : base16_numeric { - simdutf_really_inline simd16() : base16_numeric() {} - simdutf_really_inline simd16(const __m256i _value) - : base16_numeric(_value) {} - - // Splat constructor - simdutf_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {} - - // Array constructor - simdutf_really_inline simd16(const uint16_t *values) : simd16(load(values)) {} - simdutf_really_inline simd16(const char16_t *values) - : simd16(load(reinterpret_cast(values))) {} - - // Order-specific operations - simdutf_really_inline simd16 &operator+=(const simd16 other) { - value = __lasx_xvadd_h(value, other.value); - return *this; - } - - // Change the endianness - simdutf_really_inline simd16 swap_bytes() const { - return __lasx_xvshuf4i_b(this->value, 0b10110001); - } - - template - static simdutf_really_inline simd8 - pack_shifted_right(const simd16 &v0, const simd16 &v1) { - return __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(v1.value, v0.value, N), - 0b11011000); - } - - // Pack with the unsigned saturation of two uint16_t code units into single - // uint8_t vector - static simdutf_really_inline simd8 pack(const simd16 &v0, - const simd16 &v1) { - - return pack_shifted_right<0>(v0, v1); - } - - simdutf_really_inline uint64_t sum() const { - const auto sum_u32 = __lasx_xvhaddw_wu_hu(value, value); - const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); - - return uint64_t(__lasx_xvpickve2gr_du(sum_u64, 0)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 1)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 2)) + - uint64_t(__lasx_xvpickve2gr_du(sum_u64, 3)); - } -}; - -template struct simd16x32 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd16); - static_assert(NUM_CHUNKS == 2, - "LASX kernel should use two registers per 64-byte block."); - simd16 chunks[NUM_CHUNKS]; - - simd16x32(const simd16x32 &o) = delete; // no copy allowed - simd16x32 & - operator=(const simd16 other) = delete; // no assignment allowed - simd16x32() = delete; // no default constructor allowed - - simdutf_really_inline simd16x32(const simd16 chunk0, - const simd16 chunk1) - : chunks{chunk0, chunk1} {} - simdutf_really_inline simd16x32(const T *ptr) - : chunks{simd16::load(ptr), - simd16::load(ptr + sizeof(simd16) / sizeof(T))} {} - - simdutf_really_inline void store(T *ptr) const { - this->chunks[0].store(ptr + sizeof(simd16) * 0 / sizeof(T)); - this->chunks[1].store(ptr + sizeof(simd16) * 1 / sizeof(T)); - } - - simdutf_really_inline void swap_bytes() { - this->chunks[0] = this->chunks[0].swap_bytes(); - this->chunks[1] = this->chunks[1].swap_bytes(); - } -}; // struct simd16x32 - -simdutf_really_inline simd16 min(const simd16 a, - const simd16 b) { - return __lasx_xvmin_hu(a.value, b.value); -} - -simdutf_really_inline simd16 operator==(const simd16 a, - uint16_t b) { - const auto bv = __lasx_xvreplgr2vr_h(b); - return __lasx_xvseq_h(a.value, bv); -} -/* end file src/simdutf/lasx/simd16-inl.h */ -/* begin file src/simdutf/lasx/simd32-inl.h */ -template struct simd32; - -template <> struct simd32 { - __m256i value; - static const int SIZE = sizeof(value); - static const int ELEMENTS = SIZE / sizeof(uint32_t); - - // constructors - simdutf_really_inline simd32(__m256i v) : value(v) {} - - template - simdutf_really_inline simd32(Ptr *ptr) : value(__lasx_xvld(ptr, 0)) {} - - // in-place operators - simdutf_really_inline simd32 &operator-=(const simd32 other) { - value = __lasx_xvsub_w(value, other.value); - return *this; - } - - // members - simdutf_really_inline uint64_t sum() const { - const auto odd = __lasx_xvsrli_d(value, 32); - const auto even = __lasx_xvand_v(value, __lasx_xvreplgr2vr_d(0xffffffff)); - - const auto sum64 = __lasx_xvadd_d(odd, even); - - return uint64_t(__lasx_xvpickve2gr_du(sum64, 0)) + - uint64_t(__lasx_xvpickve2gr_du(sum64, 1)) + - uint64_t(__lasx_xvpickve2gr_du(sum64, 2)) + - uint64_t(__lasx_xvpickve2gr_du(sum64, 3)); - } - - // static members - static simdutf_really_inline simd32 splat(uint32_t x) { - return __lasx_xvreplgr2vr_w(x); - } - - static simdutf_really_inline simd32 zero() { - return __lasx_xvrepli_w(0); - } -}; - -// ------------------------------------------------------------ - -template <> struct simd32 { - __m256i value; - static const int SIZE = sizeof(value); - - // constructors - simdutf_really_inline simd32(__m256i v) : value(v) {} -}; - -// ------------------------------------------------------------ - -simdutf_really_inline simd32 operator&(const simd32 a, - const simd32 b) { - return __lasx_xvor_v(a.value, b.value); -} - -simdutf_really_inline simd32 operator<(const simd32 a, - const simd32 b) { - return __lasx_xvslt_wu(a.value, b.value); -} - -simdutf_really_inline simd32 operator>(const simd32 a, - const simd32 b) { - return __lasx_xvslt_wu(b.value, a.value); -} - -// ------------------------------------------------------------ - -simdutf_really_inline simd32 as_vector_u32(const simd32 v) { - return v.value; -} -/* end file src/simdutf/lasx/simd32-inl.h */ -/* begin file src/simdutf/lasx/simd64-inl.h */ -template struct simd64; - -template <> struct simd64 { - __m256i value; - static const int SIZE = sizeof(value); - static const int ELEMENTS = SIZE / sizeof(uint64_t); - - // constructors - simdutf_really_inline simd64(__m256i v) : value(v) {} - - template - simdutf_really_inline simd64(Ptr *ptr) : value(__lasx_xvld(ptr, 0)) {} - - // in-place operators - simdutf_really_inline simd64 &operator+=(const simd64 other) { - value = __lasx_xvadd_d(value, other.value); - return *this; - } - - // members - simdutf_really_inline uint64_t sum() const { - return uint64_t(__lasx_xvpickve2gr_du(value, 0)) + - uint64_t(__lasx_xvpickve2gr_du(value, 1)) + - uint64_t(__lasx_xvpickve2gr_du(value, 2)) + - uint64_t(__lasx_xvpickve2gr_du(value, 3)); - } - - // static members - static simdutf_really_inline simd64 zero() { - return __lasx_xvrepli_d(0); - } -}; - -// ------------------------------------------------------------ - -template <> struct simd64 { - __m256i value; - static const int SIZE = sizeof(value); - - // constructors - simdutf_really_inline simd64(__m256i v) : value(v) {} -}; - -// ------------------------------------------------------------ - -simd64 sum_8bytes(const simd8 v) { - const auto sum_u16 = __lasx_xvhaddw_hu_bu(v, v); - const auto sum_u32 = __lasx_xvhaddw_wu_hu(sum_u16, sum_u16); - const auto sum_u64 = __lasx_xvhaddw_du_wu(sum_u32, sum_u32); - - return simd64(sum_u64); -} -/* end file src/simdutf/lasx/simd64-inl.h */ - -} // namespace simd -} // unnamed namespace -} // namespace lasx -} // namespace simdutf - -#endif // SIMDUTF_LASX_SIMD_H -/* end file src/simdutf/lasx/simd.h */ - -/* begin file src/simdutf/lasx/end.h */ -#undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP -/* end file src/simdutf/lasx/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_LASX - -#endif // SIMDUTF_LASX_H -/* end file src/simdutf/lasx.h */ -/* begin file src/simdutf/fallback.h */ -#ifndef SIMDUTF_FALLBACK_H -#define SIMDUTF_FALLBACK_H - - -// Note that fallback.h is always imported last. - -// Default Fallback to on unless a builtin implementation has already been -// selected. -#ifndef SIMDUTF_IMPLEMENTATION_FALLBACK - #if SIMDUTF_CAN_ALWAYS_RUN_ARM64 || SIMDUTF_CAN_ALWAYS_RUN_ICELAKE || \ - SIMDUTF_CAN_ALWAYS_RUN_HASWELL || SIMDUTF_CAN_ALWAYS_RUN_WESTMERE || \ - SIMDUTF_CAN_ALWAYS_RUN_PPC64 || SIMDUTF_CAN_ALWAYS_RUN_RVV || \ - SIMDUTF_CAN_ALWAYS_RUN_LSX || SIMDUTF_CAN_ALWAYS_RUN_LASX - #define SIMDUTF_IMPLEMENTATION_FALLBACK 0 - #else - #define SIMDUTF_IMPLEMENTATION_FALLBACK 1 - #endif -#endif - -#define SIMDUTF_CAN_ALWAYS_RUN_FALLBACK (SIMDUTF_IMPLEMENTATION_FALLBACK) - -#if SIMDUTF_IMPLEMENTATION_FALLBACK - -namespace simdutf { -/** - * Fallback implementation (runs on any machine). - */ -namespace fallback {} // namespace fallback -} // namespace simdutf - -/* begin file src/simdutf/fallback/implementation.h */ -#ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H -#define SIMDUTF_FALLBACK_IMPLEMENTATION_H - - -namespace simdutf { -namespace fallback { - -namespace { -using namespace simdutf; -} - -class implementation final : public simdutf::implementation { -public: - simdutf_really_inline implementation() - : simdutf::implementation("fallback", "Generic fallback implementation", - 0) {} - -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *input, - size_t length) const noexcept final; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool validate_ascii(const char *buf, - size_t len) const noexcept final; - simdutf_warn_unused result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final; - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final; - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final; - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused result - convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16le(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf32_to_utf16be(const char32_t *buf, size_t len, - char16_t *utf16_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16le_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; - simdutf_warn_unused size_t - convert_valid_utf16be_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_buffer) const noexcept final; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t length, - char16_t *output) const noexcept final; - simdutf_warn_unused size_t count_utf16le(const char16_t *buf, - size_t length) const noexcept; - simdutf_warn_unused size_t count_utf16be(const char16_t *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *buf, - size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept; - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept; - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept; -#endif // SIMDUTF_FEATURE_BASE64 -}; -} // namespace fallback -} // namespace simdutf - -#endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H -/* end file src/simdutf/fallback/implementation.h */ - -/* begin file src/simdutf/fallback/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "fallback" -// #define SIMDUTF_IMPLEMENTATION fallback -/* end file src/simdutf/fallback/begin.h */ - - // Declarations -/* begin file src/simdutf/fallback/bitmanipulation.h */ -#ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H -#define SIMDUTF_FALLBACK_BITMANIPULATION_H - -#include - -namespace simdutf { -namespace fallback { -namespace {} // unnamed namespace -} // namespace fallback -} // namespace simdutf - -#endif // SIMDUTF_FALLBACK_BITMANIPULATION_H -/* end file src/simdutf/fallback/bitmanipulation.h */ - -/* begin file src/simdutf/fallback/end.h */ -/* end file src/simdutf/fallback/end.h */ - -#endif // SIMDUTF_IMPLEMENTATION_FALLBACK -#endif // SIMDUTF_FALLBACK_H -/* end file src/simdutf/fallback.h */ - -// The scalar routines should be included once. -/* begin file src/scalar/swap_bytes.h */ -#ifndef SIMDUTF_SWAP_BYTES_H -#define SIMDUTF_SWAP_BYTES_H - -namespace simdutf { -namespace scalar { - -inline simdutf_warn_unused uint16_t u16_swap_bytes(const uint16_t word) { - return uint16_t((word >> 8) | (word << 8)); -} - -inline simdutf_warn_unused uint32_t u32_swap_bytes(const uint32_t word) { - return ((word >> 24) & 0xff) | // move byte 3 to byte 0 - ((word << 8) & 0xff0000) | // move byte 1 to byte 2 - ((word >> 8) & 0xff00) | // move byte 2 to byte 1 - ((word << 24) & 0xff000000); // byte 0 to byte 3 -} - -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/swap_bytes.h */ -#if SIMDUTF_FEATURE_ASCII -/* begin file src/scalar/ascii.h */ -#ifndef SIMDUTF_ASCII_H -#define SIMDUTF_ASCII_H - -namespace simdutf { -namespace scalar { -namespace { -namespace ascii { -#if SIMDUTF_IMPLEMENTATION_FALLBACK -// Only used by the fallback kernel. -inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - // process in blocks of 16 bytes when possible - for (; pos + 16 <= len; pos += 16) { - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) != 0) { - return false; - } - } - // process the tail byte-by-byte - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return false; - } - } - return true; -} -#endif - -inline simdutf_warn_unused result validate_with_errors(const char *buf, - size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - // process in blocks of 16 bytes when possible - for (; pos + 16 <= len; pos += 16) { - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) != 0) { - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return result(error_code::TOO_LARGE, pos); - } - } - } - } - // process the tail byte-by-byte - for (; pos < len; pos++) { - if (data[pos] >= 0b10000000) { - return result(error_code::TOO_LARGE, pos); - } - } - return result(error_code::SUCCESS, pos); -} - -} // namespace ascii -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/ascii.h */ -#endif // SIMDUTF_FEATURE_ASCII -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/scalar/utf8.h */ -#ifndef SIMDUTF_UTF8_H -#define SIMDUTF_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8 { -#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_RVV -// only used by the fallback kernel. -// credit: based on code from Google Fuchsia (Apache Licensed) -inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 16 bytes are ascii. - uint64_t next_pos = pos + 16; - if (next_pos <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; - - while (byte < 0b10000000) { - if (++pos == len) { - return true; - } - byte = data[pos]; - } - - if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if ((code_point < 0x80) || (0x7ff < code_point)) { - return false; - } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point) || - (0xd7ff < code_point && code_point < 0xe000)) { - return false; - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { - return false; - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return false; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return false; - } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return false; - } - } else { - // we may have a continuation - return false; - } - pos = next_pos; - } - return true; -} -#endif - -inline simdutf_warn_unused result validate_with_errors(const char *buf, - size_t len) noexcept { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 16 bytes are ascii. - size_t next_pos = pos + 16; - if (next_pos <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - std::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; - - while (byte < 0b10000000) { - if (++pos == len) { - return result(error_code::SUCCESS, len); - } - byte = data[pos]; - } - - if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if ((code_point < 0x80) || (0x7ff < code_point)) { - return result(error_code::OVERLONG, pos); - } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point)) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } - pos = next_pos; - } - return result(error_code::SUCCESS, len); -} - -// Finds the previous leading byte starting backward from buf and validates with -// errors from there Used to pinpoint the location of an error when an invalid -// chunk is detected We assume that the stream starts with a leading byte, and -// to check that it is the case, we ask that you pass a pointer to the start of -// the stream (start). -inline simdutf_warn_unused result rewind_and_validate_with_errors( - const char *start, const char *buf, size_t len) noexcept { - // First check that we start with a leading byte - if ((*start & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, 0); - } - size_t extra_len{0}; - // A leading byte cannot be further than 4 bytes away - for (int i = 0; i < 5; i++) { - unsigned char byte = *buf; - if ((byte & 0b11000000) != 0b10000000) { - break; - } else { - buf--; - extra_len++; - } - } - - result res = validate_with_errors(buf, len + extra_len); - res.count -= extra_len; - return res; -} - -inline size_t count_code_points(const char *buf, size_t len) { - const int8_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - // -65 is 0b10111111, anything larger in two-complement's should start a new - // code point. - if (p[i] > -65) { - counter++; - } - } - return counter; -} - -inline size_t utf16_length_from_utf8(const char *buf, size_t len) { - const int8_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - if (p[i] > -65) { - counter++; - } - if (uint8_t(p[i]) >= 240) { - counter++; - } - } - return counter; -} - -simdutf_warn_unused inline size_t trim_partial_utf8(const char *input, - size_t length) { - if (length < 3) { - switch (length) { - case 2: - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 2]) >= 0xe0) { - return length - 2; - } // 3- and 4-byte characters with only 2 bytes left - return length; - case 1: - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - return length; - case 0: - return length; - } - } - if (uint8_t(input[length - 1]) >= 0xc0) { - return length - 1; - } // 2-, 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 2]) >= 0xe0) { - return length - 2; - } // 3- and 4-byte characters with only 1 byte left - if (uint8_t(input[length - 3]) >= 0xf0) { - return length - 3; - } // 4-byte characters with only 3 bytes left - return length; -} - -} // namespace utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || \ - (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) -/* begin file src/scalar/utf16.h */ -#ifndef SIMDUTF_UTF16_H -#define SIMDUTF_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16 { - -template -inline simdutf_warn_unused bool validate(const char16_t *data, - size_t len) noexcept { - uint64_t pos = 0; - while (pos < len) { - char16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) == 0xD800) { - if (pos + 1 >= len) { - return false; - } - char16_t diff = char16_t(word - 0xD800); - if (diff > 0x3FF) { - return false; - } - char16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - char16_t diff2 = char16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return false; - } - pos += 2; - } else { - pos++; - } - } - return true; -} - -template -inline simdutf_warn_unused result validate_with_errors(const char16_t *data, - size_t len) noexcept { - size_t pos = 0; - while (pos < len) { - char16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) == 0xD800) { - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); - } - char16_t diff = char16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - char16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - char16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - pos += 2; - } else { - pos++; - } - } - return result(error_code::SUCCESS, pos); -} - -template -inline size_t count_code_points(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; - counter += ((word & 0xFC00) != 0xDC00); - } - return counter; -} - -template -inline size_t utf8_length_from_utf16(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; - counter++; // ASCII - counter += static_cast( - word > - 0x7F); // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes - counter += static_cast((word > 0x7FF && word <= 0xD7FF) || - (word >= 0xE000)); // three-byte - } - return counter; -} - -template -inline size_t utf32_length_from_utf16(const char16_t *p, size_t len) { - // We are not BOM aware. - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - char16_t word = !match_system(big_endian) ? u16_swap_bytes(p[i]) : p[i]; - counter += ((word & 0xFC00) != 0xDC00); - } - return counter; -} - -simdutf_really_inline void -change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) { - for (size_t i = 0; i < size; i++) { - *output++ = char16_t(input[i] >> 8 | input[i] << 8); - } -} - -template -simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t *input, - size_t length) { - if (length <= 1) { - return length; - } - uint16_t last_word = uint16_t(input[length - 1]); - last_word = !match_system(big_endian) ? u16_swap_bytes(last_word) : last_word; - length -= ((last_word & 0xFC00) == 0xD800); - return length; -} - -template bool is_high_surrogate(char16_t c) { - c = !match_system(big_endian) ? u16_swap_bytes(c) : c; - return (0xd800 <= c && c <= 0xdbff); -} - -template bool is_low_surrogate(char16_t c) { - c = !match_system(big_endian) ? u16_swap_bytes(c) : c; - return (0xdc00 <= c && c <= 0xdfff); -} - -// variable templates are a C++14 extension -template char16_t replacement() { - return !match_system(big_endian) ? scalar::u16_swap_bytes(0xfffd) : 0xfffd; -} - -template -void to_well_formed_utf16(const char16_t *input, size_t len, char16_t *output) { - const char16_t replacement = utf16::replacement(); - bool high_surrogate_prev = false, high_surrogate, low_surrogate; - size_t i = 0; - for (; i < len; i++) { - char16_t c = input[i]; - high_surrogate = is_high_surrogate(c); - low_surrogate = is_low_surrogate(c); - if (high_surrogate_prev && !low_surrogate) { - output[i - 1] = replacement; - } - - if (!high_surrogate_prev && low_surrogate) { - output[i] = replacement; - } else { - output[i] = input[i]; - } - high_surrogate_prev = high_surrogate; - } - - /* string may not end with high surrogate */ - if (high_surrogate_prev) { - output[i - 1] = replacement; - } -} - -} // namespace utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING || - // (SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1) -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/scalar/utf32.h */ -#ifndef SIMDUTF_UTF32_H -#define SIMDUTF_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32 { - -inline simdutf_warn_unused bool validate(const char32_t *buf, - size_t len) noexcept { - const uint32_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - for (; pos < len; pos++) { - uint32_t word = data[pos]; - if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { - return false; - } - } - return true; -} - -inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, - size_t len) noexcept { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - for (; pos < len; pos++) { - uint32_t word = data[pos]; - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - } - return result(error_code::SUCCESS, pos); -} - -inline size_t utf8_length_from_utf32(const char32_t *buf, size_t len) { - // We are not BOM aware. - const uint32_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - // credit: @ttsugriy for the vectorizable approach - counter++; // ASCII - counter += static_cast(p[i] > 0x7F); // two-byte - counter += static_cast(p[i] > 0x7FF); // three-byte - counter += static_cast(p[i] > 0xFFFF); // four-bytes - } - return counter; -} - -inline size_t utf16_length_from_utf32(const char32_t *buf, size_t len) { - // We are not BOM aware. - const uint32_t *p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { - counter++; // non-surrogate word - counter += static_cast(p[i] > 0xFFFF); // surrogate pair - } - return counter; -} - -} // namespace utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/latin1.h */ -#ifndef SIMDUTF_LATIN1_H -#define SIMDUTF_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace latin1 { - -simdutf_really_inline size_t utf8_length_from_latin1(const char *buf, - size_t len) { - const uint8_t *c = reinterpret_cast(buf); - size_t answer = 0; - for (size_t i = 0; i < len; i++) { - if ((c[i] >> 7)) { - answer++; - } - } - return answer + len; -} - -} // namespace latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/latin1.h */ -#endif // SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/scalar/base64.h */ -#ifndef SIMDUTF_BASE64_H -#define SIMDUTF_BASE64_H - -#include -#include -#include -#include - -namespace simdutf { -namespace scalar { -namespace { -namespace base64 { - -// This function is not expected to be fast. Do not use in long loops. -template bool is_ascii_white_space(char_type c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f'; -} - -template bool is_ascii_white_space_or_padding(char_type c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || - c == '='; -} - -template bool is_eight_byte(char_type c) { - if (sizeof(char_type) == 1) { - return true; - } - return uint8_t(c) == c; -} - -// Returns true upon success. The destination buffer must be large enough. -// This functions assumes that the padding (=) has been removed. -template -full_result -base64_tail_decode(char *dst, const char_type *src, size_t length, - size_t padded_characters, // number of padding characters - // '=', typically 0, 1, 2. - base64_options options, - last_chunk_handling_options last_chunk_options) { - // This looks like 5 branches, but we expect the compiler to resolve this to a - // single branch: - const uint8_t *to_base64 = (options & base64_url) - ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - const uint32_t *d0 = (options & base64_url) - ? tables::base64::base64_url::d0 - : tables::base64::base64_default::d0; - const uint32_t *d1 = (options & base64_url) - ? tables::base64::base64_url::d1 - : tables::base64::base64_default::d1; - const uint32_t *d2 = (options & base64_url) - ? tables::base64::base64_url::d2 - : tables::base64::base64_default::d2; - const uint32_t *d3 = (options & base64_url) - ? tables::base64::base64_url::d3 - : tables::base64::base64_default::d3; - - const char_type *srcend = src + length; - const char_type *srcinit = src; - const char *dstinit = dst; - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - - uint32_t x; - size_t idx; - uint8_t buffer[4]; - while (true) { - while (src + 4 <= srcend && is_eight_byte(src[0]) && - is_eight_byte(src[1]) && is_eight_byte(src[2]) && - is_eight_byte(src[3]) && - (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | - d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { - if (match_system(endianness::BIG)) { - x = scalar::u32_swap_bytes(x); - } - std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes - dst += 3; - src += 4; - } - idx = 0; - // we need at least four characters. -#ifdef __clang__ - // If possible, we read four characters at a time. (It is an optimization.) - if (ignore_garbage && src + 4 <= srcend) { - char_type c0 = src[0]; - char_type c1 = src[1]; - char_type c2 = src[2]; - char_type c3 = src[3]; - uint8_t code0 = to_base64[uint8_t(c0)]; - uint8_t code1 = to_base64[uint8_t(c1)]; - uint8_t code2 = to_base64[uint8_t(c2)]; - uint8_t code3 = to_base64[uint8_t(c3)]; - buffer[idx] = code0; - idx += (is_eight_byte(c0) && code0 <= 63); - buffer[idx] = code1; - idx += (is_eight_byte(c1) && code1 <= 63); - buffer[idx] = code2; - idx += (is_eight_byte(c2) && code2 <= 63); - buffer[idx] = code3; - idx += (is_eight_byte(c3) && code3 <= 63); - src += 4; - } -#endif - while ((idx < 4) && (src < srcend)) { - char_type c = *src; - uint8_t code = to_base64[uint8_t(c)]; - buffer[idx] = uint8_t(code); - if (is_eight_byte(c) && code <= 63) { - idx++; - } else if (!ignore_garbage && - (code > 64 || !scalar::base64::is_eight_byte(c))) { - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else { - // We have a space or a newline or garbage. We ignore it. - } - src++; - } - if (idx != 4) { - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::strict && - (idx != 1) && ((idx + padded_characters) & 3) != 0) { - // The partial chunk was at src - idx - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else if (!ignore_garbage && - last_chunk_options == - last_chunk_handling_options::stop_before_partial && - (idx != 1) && ((idx + padded_characters) & 3) != 0) { - // Rewind src to before partial chunk - src -= idx; - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } else { - if (idx == 2) { - uint32_t triple = - (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xffff)) { - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 1); - } else { - triple = scalar::u32_swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 1); - } - dst += 1; - } else if (idx == 3) { - uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) + - (uint32_t(buffer[1]) << 2 * 6) + - (uint32_t(buffer[2]) << 1 * 6); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xff)) { - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 2); - } else { - triple = scalar::u32_swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 2); - } - dst += 2; - } else if (!ignore_garbage && idx == 1) { - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } - } - - uint32_t triple = - (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) + - (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6); - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 3); - } else { - triple = scalar::u32_swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 3); - } - dst += 3; - } -} - -// like base64_tail_decode, but it will not write past the end of the output -// buffer. The outlen paramter is modified to reflect the number of bytes -// written. This functions assumes that the padding (=) has been removed. -template -result base64_tail_decode_safe( - char *dst, size_t &outlen, const char_type *&srcr, size_t length, - size_t padded_characters, // number of padding characters '=', typically 0, - // 1, 2. - base64_options options, last_chunk_handling_options last_chunk_options) { - const char_type *src = srcr; - if (length == 0) { - outlen = 0; - return {SUCCESS, 0}; - } - // This looks like 5 branches, but we expect the compiler to resolve this to a - // single branch: - const uint8_t *to_base64 = (options & base64_url) - ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - const uint32_t *d0 = (options & base64_url) - ? tables::base64::base64_url::d0 - : tables::base64::base64_default::d0; - const uint32_t *d1 = (options & base64_url) - ? tables::base64::base64_url::d1 - : tables::base64::base64_default::d1; - const uint32_t *d2 = (options & base64_url) - ? tables::base64::base64_url::d2 - : tables::base64::base64_default::d2; - const uint32_t *d3 = (options & base64_url) - ? tables::base64::base64_url::d3 - : tables::base64::base64_default::d3; - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - - const char_type *srcend = src + length; - const char_type *srcinit = src; - const char *dstinit = dst; - const char *dstend = dst + outlen; - - uint32_t x; - size_t idx; - uint8_t buffer[4]; - while (true) { - while (src + 4 <= srcend && is_eight_byte(src[0]) && - is_eight_byte(src[1]) && is_eight_byte(src[2]) && - is_eight_byte(src[3]) && - (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] | - d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) { - if (dstend - dst < 3) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit)}; - } - if (match_system(endianness::BIG)) { - x = scalar::u32_swap_bytes(x); - } - std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes - dst += 3; - src += 4; - } - idx = 0; - const char_type *srccur = src; - // We need at least four characters. -#ifdef __clang__ - // If possible, we read four characters at a time. (It is an optimization.) - if (ignore_garbage && src + 4 <= srcend) { - char_type c0 = src[0]; - char_type c1 = src[1]; - char_type c2 = src[2]; - char_type c3 = src[3]; - uint8_t code0 = to_base64[uint8_t(c0)]; - uint8_t code1 = to_base64[uint8_t(c1)]; - uint8_t code2 = to_base64[uint8_t(c2)]; - uint8_t code3 = to_base64[uint8_t(c3)]; - buffer[idx] = code0; - idx += (is_eight_byte(c0) && code0 <= 63); - buffer[idx] = code1; - idx += (is_eight_byte(c1) && code1 <= 63); - buffer[idx] = code2; - idx += (is_eight_byte(c2) && code2 <= 63); - buffer[idx] = code3; - idx += (is_eight_byte(c3) && code3 <= 63); - src += 4; - } -#endif - while (idx < 4 && src < srcend) { - char_type c = *src; - uint8_t code = to_base64[uint8_t(c)]; - - buffer[idx] = uint8_t(code); - if (is_eight_byte(c) && code <= 63) { - idx++; - } else if (!ignore_garbage && - (code > 64 || !scalar::base64::is_eight_byte(c))) { - outlen = size_t(dst - dstinit); - srcr = src; - return {INVALID_BASE64_CHARACTER, size_t(src - srcinit)}; - } else { - // We have a space or a newline or garbage. We ignore it. - } - src++; - } - if (idx != 4) { - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::strict && - ((idx + padded_characters) & 3) != 0) { - outlen = size_t(dst - dstinit); - srcr = src; - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; - } else if (!ignore_garbage && - last_chunk_options == - last_chunk_handling_options::stop_before_partial && - ((idx + padded_characters) & 3) != 0) { - // Rewind src to before partial chunk - srcr = srccur; - outlen = size_t(dst - dstinit); - return {SUCCESS, size_t(dst - dstinit)}; - } else { // loose mode - if (idx == 0) { - // No data left; return success - outlen = size_t(dst - dstinit); - srcr = src; - return {SUCCESS, size_t(dst - dstinit)}; - } else if (!ignore_garbage && idx == 1) { - // Error: Incomplete chunk of length 1 is invalid in loose mode - outlen = size_t(dst - dstinit); - srcr = src; - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit)}; - } else if (idx == 2 || idx == 3) { - // Check if there's enough space in the destination buffer - size_t required_space = (idx == 2) ? 1 : 2; - if (size_t(dstend - dst) < required_space) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; - } - uint32_t triple = 0; - if (idx == 2) { - triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xffff)) { - srcr = src; - return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; - } - // Extract the first byte - triple >>= 16; - dst[0] = static_cast(triple & 0xFF); - dst += 1; - } else if (idx == 3) { - triple = (uint32_t(buffer[0]) << 18) + (uint32_t(buffer[1]) << 12) + - (uint32_t(buffer[2]) << 6); - if (!ignore_garbage && - (last_chunk_options == last_chunk_handling_options::strict) && - (triple & 0xff)) { - srcr = src; - return {BASE64_EXTRA_BITS, size_t(src - srcinit)}; - } - // Extract the first two bytes - triple >>= 8; - dst[0] = static_cast((triple >> 8) & 0xFF); - dst[1] = static_cast(triple & 0xFF); - dst += 2; - } - outlen = size_t(dst - dstinit); - srcr = src; - return {SUCCESS, size_t(dst - dstinit)}; - } - } - } - - if (dstend - dst < 3) { - outlen = size_t(dst - dstinit); - srcr = src; - return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit)}; - } - uint32_t triple = (uint32_t(buffer[0]) << 18) + - (uint32_t(buffer[1]) << 12) + (uint32_t(buffer[2]) << 6) + - (uint32_t(buffer[3])); - if (match_system(endianness::BIG)) { - triple <<= 8; - std::memcpy(dst, &triple, 3); - } else { - triple = scalar::u32_swap_bytes(triple); - triple >>= 8; - std::memcpy(dst, &triple, 3); - } - dst += 3; - } -} - -// Returns the number of bytes written. The destination buffer must be large -// enough. It will add padding (=) if needed. -size_t tail_encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // By default, we use padding if we are not using the URL variant. - // This is check with ((options & base64_url) == 0) which returns true if we - // are not using the URL variant. However, we also allow 'inversion' of the - // convention with the base64_reverse_padding option. If the - // base64_reverse_padding option is set, we use padding if we are using the - // URL variant, and we omit it if we are not using the URL variant. This is - // checked with - // ((options & base64_reverse_padding) == base64_reverse_padding). - bool use_padding = - ((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding); - // This looks like 3 branches, but we expect the compiler to resolve this to - // a single branch: - const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 - : tables::base64::base64_default::e0; - const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 - : tables::base64::base64_default::e1; - const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2 - : tables::base64::base64_default::e2; - char *out = dst; - size_t i = 0; - uint8_t t1, t2, t3; - for (; i + 2 < srclen; i += 3) { - t1 = uint8_t(src[i]); - t2 = uint8_t(src[i + 1]); - t3 = uint8_t(src[i + 2]); - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)]; - *out++ = e2[t3]; - } - switch (srclen - i) { - case 0: - break; - case 1: - t1 = uint8_t(src[i]); - *out++ = e0[t1]; - *out++ = e1[(t1 & 0x03) << 4]; - if (use_padding) { - *out++ = '='; - *out++ = '='; - } - break; - default: /* case 2 */ - t1 = uint8_t(src[i]); - t2 = uint8_t(src[i + 1]); - *out++ = e0[t1]; - *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)]; - *out++ = e2[(t2 & 0x0F) << 2]; - if (use_padding) { - *out++ = '='; - } - } - return (size_t)(out - dst); -} - -template -simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char_type *input, size_t length) noexcept { - // We follow https://infra.spec.whatwg.org/#forgiving-base64-decode - size_t padding = 0; - if (length > 0) { - if (input[length - 1] == '=') { - padding++; - if (length > 1 && input[length - 2] == '=') { - padding++; - } - } - } - size_t actual_length = length - padding; - if (actual_length % 4 <= 1) { - return actual_length / 4 * 3; - } - // if we have a valid input, then the remainder must be 2 or 3 adding one or - // two extra bytes. - return actual_length / 4 * 3 + (actual_length % 4) - 1; -} - -simdutf_warn_unused size_t -base64_length_from_binary(size_t length, base64_options options) noexcept { - // By default, we use padding if we are not using the URL variant. - // This is check with ((options & base64_url) == 0) which returns true if we - // are not using the URL variant. However, we also allow 'inversion' of the - // convention with the base64_reverse_padding option. If the - // base64_reverse_padding option is set, we use padding if we are using the - // URL variant, and we omit it if we are not using the URL variant. This is - // checked with - // ((options & base64_reverse_padding) == base64_reverse_padding). - bool use_padding = - ((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding); - if (!use_padding) { - return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0); - } - return (length + 2) / 3 * - 4; // We use padding to make the length a multiple of 4. -} - -} // namespace base64 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/base64.h */ -#endif // SIMDUTF_FEATURE_BASE64 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H -#define SIMDUTF_VALID_UTF32_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf8 { - -#if SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 -// only used by the fallback and POWER kernel -inline size_t convert_valid(const char32_t *buf, size_t len, - char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } - } - return utf8_output - start; -} -#endif // SIMDUTF_IMPLEMENTATION_FALLBACK || SIMDUTF_IMPLEMENTATION_PPC64 - -} // namespace utf32_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ -/* begin file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ -#ifndef SIMDUTF_UTF32_TO_UTF8_H -#define SIMDUTF_UTF32_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf8 { - -inline size_t convert(const char32_t *buf, size_t len, char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - if (word >= 0xD800 && word <= 0xDFFF) { - return 0; - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - if (word > 0x10FFFF) { - return 0; - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } - } - return utf8_output - start; -} - -inline result convert_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 2 ASCII characters - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF80FFFFFF80) == 0) { - *utf8_output++ = char(buf[pos]); - *utf8_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t word = data[pos]; - if ((word & 0xFFFFFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xFFFFF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xFFFF0000) == 0) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } - } - return result(error_code::SUCCESS, utf8_output - start); -} - -} // namespace utf32_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H -#define SIMDUTF_VALID_UTF32_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf16 { - -template -inline size_t convert_valid(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(uint16_t(word))) - : char16_t(word); - pos++; - } else { - // will generate a surrogate pair - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos++; - } - } - return utf16_output - start; -} - -} // namespace utf32_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ -/* begin file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ -#ifndef SIMDUTF_UTF32_TO_UTF16_H -#define SIMDUTF_UTF32_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_utf16 { - -template -inline size_t convert(const char32_t *buf, size_t len, char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return 0; - } - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(uint16_t(word))) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return 0; - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - pos++; - } - return utf16_output - start; -} - -template -inline result convert_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const uint32_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - uint32_t word = data[pos]; - if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, pos); - } - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(uint16_t(word))) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, pos); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - pos++; - } - return result(error_code::SUCCESS, utf16_output - start); -} - -} // namespace utf32_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H -#define SIMDUTF_VALID_UTF16_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf8 { - -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char *utf8_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 4 ASCII characters - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if (!match_system(big_endian)) { - v = (v >> 8) | (v << (64 - 8)); - } - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(u16_swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; - } - continue; - } - } - - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; - } - } - return utf8_output - start; -} - -} // namespace utf16_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ -/* begin file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ -#ifndef SIMDUTF_UTF16_TO_UTF8_H -#define SIMDUTF_UTF16_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf8 { - -template -inline size_t convert(const char16_t *buf, size_t len, char *utf8_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 8 bytes - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if (!match_system(big_endian)) { - v = (v >> 8) | (v << (64 - 8)); - } - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(u16_swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; - } - continue; - } - } - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // must be a surrogate pair - if (pos + 1 >= len) { - return 0; - } - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return 0; - } - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return 0; - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; - } - } - return utf8_output - start; -} - -template -inline result convert_with_errors(const char16_t *buf, size_t len, - char *utf8_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{utf8_output}; - while (pos < len) { - // try to convert the next block of 8 bytes - if (pos + 4 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if (!match_system(big_endian)) - v = (v >> 8) | (v << (64 - 8)); - if ((v & 0xFF80FF80FF80FF80) == 0) { - size_t final_pos = pos + 4; - while (pos < final_pos) { - *utf8_output++ = !match_system(big_endian) - ? char(u16_swap_bytes(buf[pos])) - : char(buf[pos]); - pos++; - } - continue; - } - } - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF80) == 0) { - // will generate one UTF-8 bytes - *utf8_output++ = char(word); - pos++; - } else if ((word & 0xF800) == 0) { - // will generate two UTF-8 bytes - // we have 0b110XXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else if ((word & 0xF800) != 0xD800) { - // will generate three UTF-8 bytes - // we have 0b1110XXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - pos++; - } else { - // must be a surrogate pair - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); - } - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - pos += 2; - } - } - return result(error_code::SUCCESS, utf8_output - start); -} - -} // namespace utf16_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H -#define SIMDUTF_VALID_UTF16_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf32 { - -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } - } - return utf32_output - start; -} - -} // namespace utf16_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ -/* begin file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ -#ifndef SIMDUTF_UTF16_TO_UTF32_H -#define SIMDUTF_UTF16_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_utf32 { - -template -inline size_t convert(const char16_t *buf, size_t len, char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return 0; - } - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return 0; - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } - } - return utf32_output - start; -} - -template -inline result convert_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - uint16_t word = - !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair, extend 16-bit word to 32-bit word - *utf32_output++ = char32_t(word); - pos++; - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - if (diff > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - if (pos + 1 >= len) { - return result(error_code::SURROGATE, pos); - } // minimal bound checking - uint16_t next_word = !match_system(big_endian) - ? u16_swap_bytes(data[pos + 1]) - : data[pos + 1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if (diff2 > 0x3FF) { - return result(error_code::SURROGATE, pos); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - pos += 2; - } - } - return result(error_code::SUCCESS, utf32_output - start); -} - -} // namespace utf16_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && \ - (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) -/* begin file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H -#define SIMDUTF_VALID_UTF8_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf16 { - -template -inline size_t convert_valid(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 8 ASCII bytes - if (pos + 8 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 8; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - break; - } // minimal bound checking - uint16_t code_point = uint16_t(((leading_byte & 0b00011111) << 6) | - (data[pos + 1] & 0b00111111)); - if (!match_system(big_endian)) { - code_point = u16_swap_bytes(uint16_t(code_point)); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - break; - } // minimal bound checking - uint16_t code_point = uint16_t(((leading_byte & 0b00001111) << 12) | - ((data[pos + 1] & 0b00111111) << 6) | - (data[pos + 2] & 0b00111111)); - if (!match_system(big_endian)) { - code_point = u16_swap_bytes(uint16_t(code_point)); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - break; - } // minimal bound checking - uint32_t code_point = ((leading_byte & 0b00000111) << 18) | - ((data[pos + 1] & 0b00111111) << 12) | - ((data[pos + 2] & 0b00111111) << 6) | - (data[pos + 3] & 0b00111111); - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; - } else { - // we may have a continuation but we do not do error checking - return 0; - } - } - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ -/* begin file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ -#ifndef SIMDUTF_UTF8_TO_UTF16_H -#define SIMDUTF_UTF8_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf16 { - -template -inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } - } - - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return 0; - } - if (!match_system(big_endian)) { - code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - return 0; - } // minimal bound checking - - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point || - (0xd7ff < code_point && code_point < 0xe000)) { - return 0; - } - if (!match_system(big_endian)) { - code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return 0; - } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return 0; - } - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; - } else { - return 0; - } - } - return utf16_output - start; -} - -template -inline result convert_with_errors(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(buf[pos])) - : char16_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf16_output++ = !match_system(big_endian) - ? char16_t(u16_swap_bytes(leading_byte)) - : char16_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return result(error_code::OVERLONG, pos); - } - if (!match_system(big_endian)) { - code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8, it should become - // a single UTF-16 word. - if (pos + 2 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if ((code_point < 0x800) || (0xffff < code_point)) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - if (!match_system(big_endian)) { - code_point = uint32_t(u16_swap_bytes(uint16_t(code_point))); - } - *utf16_output++ = char16_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - code_point -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (code_point >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (code_point & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = u16_swap_bytes(high_surrogate); - low_surrogate = u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - pos += 4; - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } - } - return result(error_code::SUCCESS, utf16_output - start); -} - -/** - * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and - * we have up to len input bytes left, and we encountered some error. It is - * possible that the error is at 'buf' exactly, but it could also be in the - * previous bytes (up to 3 bytes back). - * - * prior_bytes indicates how many bytes, prior to 'buf' may belong to the - * current memory section and can be safely accessed. We prior_bytes to access - * safely up to three bytes before 'buf'. - * - * The caller is responsible to ensure that len > 0. - * - * If the error is believed to have occurred prior to 'buf', the count value - * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. - */ -template -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char16_t *utf16_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - // In theory '3' would be sufficient, but sometimes the error can go back - // quite far. - size_t how_far_back = prior_bytes; - // size_t how_far_back = 3; // 3 bytes in the past + current position - // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); - } - buf -= i; - extra_len = i; - break; - } - } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); - } - result res = convert_with_errors(buf, len + extra_len, utf16_output); - if (res.error) { - res.count -= extra_len; - } - return res; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || - // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 -/* begin file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H -#define SIMDUTF_VALID_UTF8_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf32 { - -inline size_t convert_valid(const char *buf, size_t len, - char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 8 ASCII bytes - if (pos + 8 <= - len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 8; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - break; - } // minimal bound checking - *utf32_output++ = char32_t(((leading_byte & 0b00011111) << 6) | - (data[pos + 1] & 0b00111111)); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - break; - } // minimal bound checking - *utf32_output++ = char32_t(((leading_byte & 0b00001111) << 12) | - ((data[pos + 1] & 0b00111111) << 6) | - (data[pos + 2] & 0b00111111)); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - break; - } // minimal bound checking - uint32_t code_word = ((leading_byte & 0b00000111) << 18) | - ((data[pos + 1] & 0b00111111) << 12) | - ((data[pos + 2] & 0b00111111) << 6) | - (data[pos + 3] & 0b00111111); - *utf32_output++ = char32_t(code_word); - pos += 4; - } else { - // we may have a continuation but we do not do error checking - return 0; - } - } - return utf32_output - start; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ -/* begin file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ -#ifndef SIMDUTF_UTF8_TO_UTF32_H -#define SIMDUTF_UTF8_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_utf32 { - -inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return 0; - } - *utf32_output++ = char32_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - return 0; - } // minimal bound checking - - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point || - (0xd7ff < code_point && code_point < 0xe000)) { - return 0; - } - *utf32_output++ = char32_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return 0; - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return 0; - } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { - return 0; - } - *utf32_output++ = char32_t(code_point); - pos += 4; - } else { - return 0; - } - } - return utf32_output - start; -} - -inline result convert_with_errors(const char *buf, size_t len, - char32_t *utf32_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char32_t *start{utf32_output}; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *utf32_output++ = char32_t(buf[pos]); - pos++; - } - continue; - } - } - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *utf32_output++ = char32_t(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == 0b11000000) { - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { - return result(error_code::OVERLONG, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - if (pos + 2 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - // range check - uint32_t code_point = (leading_byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point) { - return result(error_code::OVERLONG, pos); - } - if (0xd7ff < code_point && code_point < 0xe000) { - return result(error_code::SURROGATE, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 3; - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - if (pos + 3 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } - - // range check - uint32_t code_point = (leading_byte & 0b00000111) << 18 | - (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | - (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff) { - return result(error_code::OVERLONG, pos); - } - if (0x10ffff < code_point) { - return result(error_code::TOO_LARGE, pos); - } - *utf32_output++ = char32_t(code_point); - pos += 4; - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } else { - return result(error_code::HEADER_BITS, pos); - } - } - } - return result(error_code::SUCCESS, utf32_output - start); -} - -/** - * When rewind_and_convert_with_errors is called, we are pointing at 'buf' and - * we have up to len input bytes left, and we encountered some error. It is - * possible that the error is at 'buf' exactly, but it could also be in the - * previous bytes location (up to 3 bytes back). - * - * prior_bytes indicates how many bytes, prior to 'buf' may belong to the - * current memory section and can be safely accessed. We prior_bytes to access - * safely up to three bytes before 'buf'. - * - * The caller is responsible to ensure that len > 0. - * - * If the error is believed to have occurred prior to 'buf', the count value - * contain in the result will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3. - */ -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char32_t *utf32_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - size_t how_far_back = 3; // 3 bytes in the past + current position - if (how_far_back > prior_bytes) { - how_far_back = prior_bytes; - } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); - } - buf -= i; - extra_len = i; - break; - } - } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); - } - - result res = convert_with_errors(buf, len + extra_len, utf32_output); - if (res.error) { - res.count -= extra_len; - } - return res; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF8_H -#define SIMDUTF_LATIN1_TO_UTF8_H - -namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf8 { - -inline size_t convert(const char *buf, size_t len, char *utf8_output) { - const unsigned char *data = reinterpret_cast(buf); - size_t pos = 0; - size_t utf8_pos = 0; - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - utf8_output[utf8_pos++] = char(buf[pos]); - pos++; - } - continue; - } - } - - unsigned char byte = data[pos]; - if ((byte & 0x80) == 0) { // if ASCII - // will generate one UTF-8 bytes - utf8_output[utf8_pos++] = char(byte); - pos++; - } else { - // will generate two UTF-8 bytes - utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); - utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); - pos++; - } - } - return utf8_pos; -} - -inline size_t convert_safe(const char *buf, size_t len, char *utf8_output, - size_t utf8_len) { - const unsigned char *data = reinterpret_cast(buf); - size_t pos = 0; - size_t skip_pos = 0; - size_t utf8_pos = 0; - while (pos < len && utf8_pos < utf8_len) { - // try to convert the next block of 16 ASCII bytes - if (pos >= skip_pos && pos + 16 <= len && - utf8_pos + 16 <= utf8_len) { // if it is safe to read 16 more bytes, - // check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - ::memcpy(utf8_output + utf8_pos, buf + pos, 16); - utf8_pos += 16; - pos += 16; - } else { - // At least one of the next 16 bytes are not ASCII, we will process them - // one by one - skip_pos = pos + 16; - } - } else { - const auto byte = data[pos]; - if ((byte & 0x80) == 0) { // if ASCII - // will generate one UTF-8 bytes - utf8_output[utf8_pos++] = char(byte); - pos++; - } else if (utf8_pos + 2 <= utf8_len) { - // will generate two UTF-8 bytes - utf8_output[utf8_pos++] = char((byte >> 6) | 0b11000000); - utf8_output[utf8_pos++] = char((byte & 0b111111) | 0b10000000); - pos++; - } else { - break; - } - } - } - return utf8_pos; -} - -} // namespace latin1_to_utf8 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/latin1_to_utf8/latin1_to_utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF16_H -#define SIMDUTF_LATIN1_TO_UTF16_H - -namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf16 { - -template -inline size_t convert(const char *buf, size_t len, char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - - while (pos < len) { - uint16_t word = - uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point - *utf16_output++ = - char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); - pos++; - } - - return utf16_output - start; -} - -template -inline result convert_with_errors(const char *buf, size_t len, - char16_t *utf16_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char16_t *start{utf16_output}; - - while (pos < len) { - uint16_t word = - uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point - *utf16_output++ = - char16_t(match_system(big_endian) ? word : u16_swap_bytes(word)); - pos++; - } - - return result(error_code::SUCCESS, utf16_output - start); -} - -} // namespace latin1_to_utf16 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/latin1_to_utf16/latin1_to_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ -#ifndef SIMDUTF_LATIN1_TO_UTF32_H -#define SIMDUTF_LATIN1_TO_UTF32_H - -namespace simdutf { -namespace scalar { -namespace { -namespace latin1_to_utf32 { - -inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) { - const unsigned char *data = reinterpret_cast(buf); - char32_t *start{utf32_output}; - for (size_t i = 0; i < len; i++) { - *utf32_output++ = (char32_t)data[i]; - } - return utf32_output - start; -} - -} // namespace latin1_to_utf32 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/latin1_to_utf32/latin1_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ -#ifndef SIMDUTF_UTF8_TO_LATIN1_H -#define SIMDUTF_UTF8_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_latin1 { - -inline size_t convert(const char *buf, size_t len, char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 - // 1000 1000 .... etc - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; - } - } - - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return 0; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - if (code_point < 0x80 || 0xFF < code_point) { - return 0; // We only care about the range 129-255 which is Non-ASCII - // latin1 characters. A code_point beneath 0x80 is invalid as - // it is already covered by bytes whose leading bit is zero. - } - *latin_output++ = char(code_point); - pos += 2; - } else { - return 0; - } - } - return latin_output - start; -} - -inline result convert_with_errors(const char *buf, size_t len, - char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 - // 1000 1000...etc - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; - } - } - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - return result(error_code::TOO_SHORT, pos); - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return result(error_code::TOO_SHORT, pos); - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - if (code_point < 0x80) { - return result(error_code::OVERLONG, pos); - } - if (0xFF < code_point) { - return result(error_code::TOO_LARGE, pos); - } // We only care about the range 129-255 which is Non-ASCII latin1 - // characters - *latin_output++ = char(code_point); - pos += 2; - } else if ((leading_byte & 0b11110000) == 0b11100000) { - // We have a three-byte UTF-8 - return result(error_code::TOO_LARGE, pos); - } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000 - // we have a 4-byte UTF-8 word. - return result(error_code::TOO_LARGE, pos); - } else { - // we either have too many continuation bytes or an invalid leading byte - if ((leading_byte & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } - - return result(error_code::HEADER_BITS, pos); - } - } - return result(error_code::SUCCESS, latin_output - start); -} - -inline result rewind_and_convert_with_errors(size_t prior_bytes, - const char *buf, size_t len, - char *latin1_output) { - size_t extra_len{0}; - // We potentially need to go back in time and find a leading byte. - // In theory '3' would be sufficient, but sometimes the error can go back - // quite far. - size_t how_far_back = prior_bytes; - // size_t how_far_back = 3; // 3 bytes in the past + current position - // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } - bool found_leading_bytes{false}; - // important: it is i <= how_far_back and not 'i < how_far_back'. - for (size_t i = 0; i <= how_far_back; i++) { - unsigned char byte = buf[-static_cast(i)]; - found_leading_bytes = ((byte & 0b11000000) != 0b10000000); - if (found_leading_bytes) { - if (i > 0 && byte < 128) { - // If we had to go back and the leading byte is ascii - // then we can stop right away. - return result(error_code::TOO_LONG, 0 - i + 1); - } - buf -= i; - extra_len = i; - break; - } - } - // - // It is possible for this function to return a negative count in its result. - // C++ Standard Section 18.1 defines size_t is in which is described - // in C Standard as . C Standard Section 4.1.5 defines size_t as an - // unsigned integral type of the result of the sizeof operator - // - // An unsigned type will simply wrap round arithmetically (well defined). - // - if (!found_leading_bytes) { - // If how_far_back == 3, we may have four consecutive continuation bytes!!! - // [....] [continuation] [continuation] [continuation] | [buf is - // continuation] Or we possibly have a stream that does not start with a - // leading byte. - return result(error_code::TOO_LONG, 0 - how_far_back); - } - result res = convert_with_errors(buf, len + extra_len, latin1_output); - if (res.error) { - res.count -= extra_len; - } - return res; -} - -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_latin1/utf8_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ -#ifndef SIMDUTF_UTF16_TO_LATIN1_H -#define SIMDUTF_UTF16_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_latin1 { - -#include // for std::memcpy - -template -inline size_t convert(const char16_t *buf, size_t len, char *latin_output) { - if (len == 0) { - return 0; - } - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *current_write = latin_output; - uint16_t word = 0; - uint16_t too_large = 0; - - while (pos < len) { - word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - too_large |= word; - *current_write++ = char(word & 0xFF); - pos++; - } - if ((too_large & 0xFF00) != 0) { - return 0; - } - - return current_write - latin_output; -} - -template -inline result convert_with_errors(const char16_t *buf, size_t len, - char *latin_output) { - if (len == 0) { - return result(error_code::SUCCESS, 0); - } - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - uint16_t word; - - while (pos < len) { - if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that - // they are Latin1 - uint64_t v1, v2, v3, v4; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - ::memcpy(&v2, data + pos + 4, sizeof(uint64_t)); - ::memcpy(&v3, data + pos + 8, sizeof(uint64_t)); - ::memcpy(&v4, data + pos + 12, sizeof(uint64_t)); - - if (!match_system(big_endian)) { - v1 = (v1 >> 8) | (v1 << (64 - 8)); - } - if (!match_system(big_endian)) { - v2 = (v2 >> 8) | (v2 << (64 - 8)); - } - if (!match_system(big_endian)) { - v3 = (v3 >> 8) | (v3 << (64 - 8)); - } - if (!match_system(big_endian)) { - v4 = (v4 >> 8) | (v4 << (64 - 8)); - } - - if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) { - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = !match_system(big_endian) - ? char(u16_swap_bytes(data[pos])) - : char(data[pos]); - pos++; - } - continue; - } - } - word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - if ((word & 0xFF00) == 0) { - *latin_output++ = char(word & 0xFF); - pos++; - } else { - return result(error_code::TOO_LARGE, pos); - } - } - return result(error_code::SUCCESS, latin_output - start); -} - -} // namespace utf16_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_latin1/utf16_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ -#ifndef SIMDUTF_UTF32_TO_LATIN1_H -#define SIMDUTF_UTF32_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_latin1 { - -inline size_t convert(const char32_t *buf, size_t len, char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start = latin1_output; - uint32_t utf32_char; - size_t pos = 0; - uint32_t too_large = 0; - - while (pos < len) { - utf32_char = (uint32_t)data[pos]; - too_large |= utf32_char; - *latin1_output++ = (char)(utf32_char & 0xFF); - pos++; - } - if ((too_large & 0xFFFFFF00) != 0) { - return 0; - } - return latin1_output - start; -} - -inline result convert_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start{latin1_output}; - size_t pos = 0; - while (pos < len) { - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are Latin1 - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF00FFFFFF00) == 0) { - *latin1_output++ = char(buf[pos]); - *latin1_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } - } - uint32_t utf32_char = data[pos]; - if ((utf32_char & 0xFFFFFF00) == - 0) { // Check if the character can be represented in Latin-1 - *latin1_output++ = (char)(utf32_char & 0xFF); - pos++; - } else { - return result(error_code::TOO_LARGE, pos); - }; - } - return result(error_code::SUCCESS, latin1_output - start); -} - -} // namespace utf32_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_latin1/utf32_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H -#define SIMDUTF_VALID_UTF8_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf8_to_latin1 { - -inline size_t convert_valid(const char *buf, size_t len, char *latin_output) { - const uint8_t *data = reinterpret_cast(buf); - - size_t pos = 0; - char *start{latin_output}; - - while (pos < len) { - // try to convert the next block of 16 ASCII bytes - if (pos + 16 <= - len) { // if it is safe to read 16 more bytes, check that they are ascii - uint64_t v1; - ::memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | - v2}; // We are only interested in these bits: 1000 1000 1000 - // 1000, so it makes sense to concatenate everything - if ((v & 0x8080808080808080) == - 0) { // if NONE of these are set, e.g. all of them are zero, then - // everything is ASCII - size_t final_pos = pos + 16; - while (pos < final_pos) { - *latin_output++ = char(buf[pos]); - pos++; - } - continue; - } - } - - // suppose it is not an all ASCII byte sequence - uint8_t leading_byte = data[pos]; // leading byte - if (leading_byte < 0b10000000) { - // converting one ASCII byte !!! - *latin_output++ = char(leading_byte); - pos++; - } else if ((leading_byte & 0b11100000) == - 0b11000000) { // the first three bits indicate: - // We have a two-byte UTF-8 - if (pos + 1 >= len) { - break; - } // minimal bound checking - if ((data[pos + 1] & 0b11000000) != 0b10000000) { - return 0; - } // checks if the next byte is a valid continuation byte in UTF-8. A - // valid continuation byte starts with 10. - // range check - - uint32_t code_point = - (leading_byte & 0b00011111) << 6 | - (data[pos + 1] & - 0b00111111); // assembles the Unicode code point from the two bytes. - // It does this by discarding the leading 110 and 10 - // bits from the two bytes, shifting the remaining bits - // of the first byte, and then combining the results - // with a bitwise OR operation. - *latin_output++ = char(code_point); - pos += 2; - } else { - // we may have a continuation but we do not do error checking - return 0; - } - } - return latin_output - start; -} - -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF16_TO_LATIN1_H -#define SIMDUTF_VALID_UTF16_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf16_to_latin1 { - -template -inline size_t convert_valid(const char16_t *buf, size_t len, - char *latin_output) { - const uint16_t *data = reinterpret_cast(buf); - size_t pos = 0; - char *start{latin_output}; - uint16_t word = 0; - - while (pos < len) { - word = !match_system(big_endian) ? u16_swap_bytes(data[pos]) : data[pos]; - *latin_output++ = char(word); - pos++; - } - - return latin_output - start; -} - -} // namespace utf16_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ -#ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H -#define SIMDUTF_VALID_UTF32_TO_LATIN1_H - -namespace simdutf { -namespace scalar { -namespace { -namespace utf32_to_latin1 { - -inline size_t convert_valid(const char32_t *buf, size_t len, - char *latin1_output) { - const uint32_t *data = reinterpret_cast(buf); - char *start = latin1_output; - uint32_t utf32_char; - size_t pos = 0; - - while (pos < len) { - utf32_char = (uint32_t)data[pos]; - - if (pos + 2 <= - len) { // if it is safe to read 8 more bytes, check that they are Latin1 - uint64_t v; - ::memcpy(&v, data + pos, sizeof(uint64_t)); - if ((v & 0xFFFFFF00FFFFFF00) == 0) { - *latin1_output++ = char(buf[pos]); - *latin1_output++ = char(buf[pos + 1]); - pos += 2; - continue; - } else { - // output can not be represented in latin1 - return 0; - } - } - if ((utf32_char & 0xFFFFFF00) == 0) { - *latin1_output++ = char(utf32_char); - } else { - // output can not be represented in latin1 - return 0; - } - pos++; - } - return latin1_output - start; -} - -} // namespace utf32_to_latin1 -} // unnamed namespace -} // namespace scalar -} // namespace simdutf - -#endif -/* end file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -/* begin file src/implementation.cpp */ -#include -#include -#include - -static_assert(sizeof(uint8_t) == sizeof(char), - "simdutf requires that uint8_t be a char"); -static_assert(sizeof(uint16_t) == sizeof(char16_t), - "simdutf requires that char16_t be 16 bits"); -static_assert(sizeof(uint32_t) == sizeof(char32_t), - "simdutf requires that char32_t be 32 bits"); -// next line is redundant, but it is kept to catch defective systems. -static_assert(CHAR_BIT == 8, "simdutf requires 8-bit bytes"); - -// Useful for debugging purposes -namespace simdutf { -namespace { - -template std::string toBinaryString(T b) { - std::string binary = ""; - T mask = T(1) << (sizeof(T) * CHAR_BIT - 1); - while (mask > 0) { - binary += ((b & mask) == 0) ? '0' : '1'; - mask >>= 1; - } - return binary; -} -} // namespace -} // namespace simdutf - -namespace simdutf { -bool implementation::supported_by_runtime_system() const { - uint32_t required_instruction_sets = this->required_instruction_sets(); - uint32_t supported_instruction_sets = - internal::detect_supported_architectures(); - return ((supported_instruction_sets & required_instruction_sets) == - required_instruction_sets); -} - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused encoding_type implementation::autodetect_encoding( - const char *input, size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - // UTF8 is common, it includes ASCII, and is commonly represented - // without a BOM, so if it fits, go with that. Note that it is still - // possible to get it wrong, we are only 'guessing'. If some has UTF-16 - // data without a BOM, it could pass as UTF-8. - // - // An interesting twist might be to check for UTF-16 ASCII first (every - // other byte is zero). - if (validate_utf8(input, length)) { - return encoding_type::UTF8; - } - // The next most common encoding that might appear without BOM is probably - // UTF-16LE, so try that next. - if ((length % 2) == 0) { - // important: we need to divide by two - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - return encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - return encoding_type::UTF32_LE; - } - } - return encoding_type::unspecified; -} - - #ifdef SIMDUTF_INTERNAL_TESTS -std::vector -implementation::internal_tests() const { - return {}; -} - #endif -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - -simdutf_warn_unused size_t implementation::base64_length_from_binary( - size_t length, base64_options options) const noexcept { - return scalar::base64::base64_length_from_binary(length, options); -} -#endif // SIMDUTF_FEATURE_BASE64 - -namespace internal { -// When there is a single implementation, we should not pay a price -// for dispatching to the best implementation. We should just use the -// one we have. This is a compile-time check. -#define SIMDUTF_SINGLE_IMPLEMENTATION \ - (SIMDUTF_IMPLEMENTATION_ICELAKE + SIMDUTF_IMPLEMENTATION_HASWELL + \ - SIMDUTF_IMPLEMENTATION_WESTMERE + SIMDUTF_IMPLEMENTATION_ARM64 + \ - SIMDUTF_IMPLEMENTATION_PPC64 + SIMDUTF_IMPLEMENTATION_LSX + \ - SIMDUTF_IMPLEMENTATION_LASX + SIMDUTF_IMPLEMENTATION_FALLBACK == \ - 1) - -// Static array of known implementations. We are hoping these get baked into the -// executable without requiring a static initializer. - -#if SIMDUTF_IMPLEMENTATION_ICELAKE -static const icelake::implementation *get_icelake_singleton() { - static const icelake::implementation icelake_singleton{}; - return &icelake_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_HASWELL -static const haswell::implementation *get_haswell_singleton() { - static const haswell::implementation haswell_singleton{}; - return &haswell_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_WESTMERE -static const westmere::implementation *get_westmere_singleton() { - static const westmere::implementation westmere_singleton{}; - return &westmere_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_ARM64 -static const arm64::implementation *get_arm64_singleton() { - static const arm64::implementation arm64_singleton{}; - return &arm64_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_PPC64 -static const ppc64::implementation *get_ppc64_singleton() { - static const ppc64::implementation ppc64_singleton{}; - return &ppc64_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_RVV -static const rvv::implementation *get_rvv_singleton() { - static const rvv::implementation rvv_singleton{}; - return &rvv_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_LSX -static const lsx::implementation *get_lsx_singleton() { - static const lsx::implementation lsx_singleton{}; - return &lsx_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_LASX -static const lasx::implementation *get_lasx_singleton() { - static const lasx::implementation lasx_singleton{}; - return &lasx_singleton; -} -#endif -#if SIMDUTF_IMPLEMENTATION_FALLBACK -static const fallback::implementation *get_fallback_singleton() { - static const fallback::implementation fallback_singleton{}; - return &fallback_singleton; -} -#endif - -#if SIMDUTF_SINGLE_IMPLEMENTATION -static const implementation *get_single_implementation() { - return - #if SIMDUTF_IMPLEMENTATION_ICELAKE - get_icelake_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_HASWELL - get_haswell_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_WESTMERE - get_westmere_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_ARM64 - get_arm64_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_PPC64 - get_ppc64_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_LSX - get_lsx_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_LASX - get_lasx_singleton(); - #endif - #if SIMDUTF_IMPLEMENTATION_FALLBACK - get_fallback_singleton(); - #endif -} -#endif - -/** - * @private Detects best supported implementation on first use, and sets it - */ -class detect_best_supported_implementation_on_first_use final - : public implementation { -public: - std::string name() const noexcept final { return set_best()->name(); } - std::string description() const noexcept final { - return set_best()->description(); - } - uint32_t required_instruction_sets() const noexcept final { - return set_best()->required_instruction_sets(); - } - -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int - detect_encodings(const char *input, size_t length) const noexcept override { - return set_best()->detect_encodings(input, length); - } -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool - validate_utf8(const char *buf, size_t len) const noexcept final override { - return set_best()->validate_utf8(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result validate_utf8_with_errors( - const char *buf, size_t len) const noexcept final override { - return set_best()->validate_utf8_with_errors(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool - validate_ascii(const char *buf, size_t len) const noexcept final override { - return set_best()->validate_ascii(buf, len); - } - - simdutf_warn_unused result validate_ascii_with_errors( - const char *buf, size_t len) const noexcept final override { - return set_best()->validate_ascii_with_errors(buf, len); - } -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool - validate_utf16le(const char16_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf16le(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool - validate_utf16be(const char16_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf16be(buf, len); - } - - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf16le_with_errors(buf, len); - } - - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf16be_with_errors(buf, len); - } - void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept final override { - return set_best()->to_well_formed_utf16be(input, len, output); - } - void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept final override { - return set_best()->to_well_formed_utf16le(input, len, output); - } -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool - validate_utf32(const char32_t *buf, - size_t len) const noexcept final override { - return set_best()->validate_utf32(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept final override { - return set_best()->validate_utf32_with_errors(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_latin1_to_utf8(const char *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_latin1_to_utf8(buf, len, utf8_output); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output); - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, - char32_t *latin1_output) const noexcept final override { - return set_best()->convert_latin1_to_utf32(buf, len, latin1_output); - } -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf8_to_latin1(const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf8_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf8_to_latin1_with_errors(buf, len, - latin1_output); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_latin1(buf, len, latin1_output); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16be(buf, len, utf16_output); - } - - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16le_with_errors(buf, len, - utf16_output); - } - - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf8_to_utf16be_with_errors(buf, len, - utf16_output); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf16be(buf, len, utf16_output); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf8_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf8_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf8_to_utf32_with_errors(buf, len, - utf32_output); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf16le_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused size_t - convert_utf16be_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, - latin1_output); - } - - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, - latin1_output); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output); - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - convert_utf16le_to_utf8(const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused size_t - convert_utf16be_to_utf8(const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf8_with_errors(buf, len, - utf8_output); - } - - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf8_with_errors(buf, len, - utf8_output); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); - } - - simdutf_warn_unused result convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1_with_errors(buf, len, - latin1_output); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, - char *latin1_output) const noexcept final override { - return set_best()->convert_utf32_to_latin1(buf, len, latin1_output); - } -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - convert_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf32_to_utf8(buf, len, utf8_output); - } - - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - } - - simdutf_warn_unused size_t - convert_valid_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf8(buf, len, utf8_output); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16be(buf, len, utf16_output); - } - - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16le_with_errors(buf, len, - utf16_output); - } - - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_utf32_to_utf16be_with_errors(buf, len, - utf16_output); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf16le(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, - char16_t *utf16_output) const noexcept final override { - return set_best()->convert_valid_utf32_to_utf16be(buf, len, utf16_output); - } - - simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16le_to_utf32_with_errors(buf, len, - utf32_output); - } - - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_utf16be_to_utf32_with_errors(buf, len, - utf32_output); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf16le_to_utf32(buf, len, utf32_output); - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, - char32_t *utf32_output) const noexcept final override { - return set_best()->convert_valid_utf16be_to_utf32(buf, len, utf32_output); - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *buf, size_t len, - char16_t *output) const noexcept final override { - set_best()->change_endianness_utf16(buf, len, output); - } - - simdutf_warn_unused size_t - count_utf16le(const char16_t *buf, size_t len) const noexcept final override { - return set_best()->count_utf16le(buf, len); - } - - simdutf_warn_unused size_t - count_utf16be(const char16_t *buf, size_t len) const noexcept final override { - return set_best()->count_utf16be(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t - count_utf8(const char *buf, size_t len) const noexcept final override { - return set_best()->count_utf8(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->latin1_length_from_utf8(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_latin1(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t utf8_length_from_utf16le( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf16le(buf, len); - } - - simdutf_warn_unused size_t utf8_length_from_utf16be( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf16be(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf32_length_from_utf16le( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf16le(buf, len); - } - - simdutf_warn_unused size_t utf32_length_from_utf16be( - const char16_t *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf16be(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->utf16_length_from_utf8(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf8_length_from_utf32( - const char32_t *buf, size_t len) const noexcept override { - return set_best()->utf8_length_from_utf32(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t utf16_length_from_utf32( - const char32_t *buf, size_t len) const noexcept override { - return set_best()->utf16_length_from_utf32(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *buf, size_t len) const noexcept override { - return set_best()->utf32_length_from_utf8(buf, len); - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary(input, length, output, options, - last_chunk_handling_options); - } - - simdutf_warn_unused full_result base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary_details(input, length, output, options, - last_chunk_handling_options); - } - - simdutf_warn_unused result base64_to_binary( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary(input, length, output, options, - last_chunk_handling_options); - } - - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options, - last_chunk_handling_options last_chunk_handling_options = - last_chunk_handling_options::loose) const noexcept override { - return set_best()->base64_to_binary_details(input, length, output, options, - last_chunk_handling_options); - } - - size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) const noexcept override { - return set_best()->binary_to_base64(input, length, output, options); - } -#endif // SIMDUTF_FEATURE_BASE64 - - simdutf_really_inline - detect_best_supported_implementation_on_first_use() noexcept - : implementation("best_supported_detector", - "Detects the best supported implementation and sets it", - 0) {} - -private: - const implementation *set_best() const noexcept; -}; - -static_assert(std::is_trivially_destructible< - detect_best_supported_implementation_on_first_use>::value, - "detect_best_supported_implementation_on_first_use should be " - "trivially destructible"); - -static const std::initializer_list & -get_available_implementation_pointers() { - static const std::initializer_list - available_implementation_pointers{ -#if SIMDUTF_IMPLEMENTATION_ICELAKE - get_icelake_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_HASWELL - get_haswell_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_WESTMERE - get_westmere_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_ARM64 - get_arm64_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_PPC64 - get_ppc64_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_RVV - get_rvv_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_LSX - get_lsx_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_LASX - get_lasx_singleton(), -#endif -#if SIMDUTF_IMPLEMENTATION_FALLBACK - get_fallback_singleton(), -#endif - }; // available_implementation_pointers - return available_implementation_pointers; -} - -// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no -// support -class unsupported_implementation final : public implementation { -public: -#if SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused int detect_encodings(const char *, - size_t) const noexcept override { - return encoding_type::unspecified; - } -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool validate_utf8(const char *, - size_t) const noexcept final override { - return false; // Just refuse to validate. Given that we have a fallback - // implementation - // it seems unlikely that unsupported_implementation will ever be used. If - // it is used, then it will flag all strings as invalid. The alternative is - // to return an error_code from which the user has to figure out whether the - // string is valid UTF-8... which seems like a lot of work just to handle - // the very unlikely case that we have an unsupported implementation. And, - // when it does happen (that we have an unsupported implementation), what - // are the chances that the programmer has a fallback? Given that *we* - // provide the fallback, it implies that the programmer would need a - // fallback for our fallback. - } -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused result validate_utf8_with_errors( - const char *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII - simdutf_warn_unused bool - validate_ascii(const char *, size_t) const noexcept final override { - return false; - } - - simdutf_warn_unused result validate_ascii_with_errors( - const char *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool - validate_utf16le(const char16_t *, size_t) const noexcept final override { - return false; - } -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused bool - validate_utf16be(const char16_t *, size_t) const noexcept final override { - return false; - } - - simdutf_warn_unused result validate_utf16le_with_errors( - const char16_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result validate_utf16be_with_errors( - const char16_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } - void to_well_formed_utf16be(const char16_t *, size_t, - char16_t *) const noexcept final override {} - void to_well_formed_utf16le(const char16_t *, size_t, - char16_t *) const noexcept final override {} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - simdutf_warn_unused bool - validate_utf32(const char32_t *, size_t) const noexcept final override { - return false; - } -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused result validate_utf32_with_errors( - const char32_t *, size_t) const noexcept final override { - return result(error_code::OTHER, 0); - } -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf8( - const char *, size_t, char *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *, size_t, char *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *, size_t, char16_t *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *, size_t, char32_t *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf16le_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16be_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t convert_utf16le_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16be_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *, size_t, char *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t convert_utf32_to_latin1( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf32_to_latin1_with_errors( - const char32_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_latin1( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf8( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *, size_t, char *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *, size_t, char *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *, size_t, char16_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *, size_t, char16_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *, size_t, char32_t *) const noexcept final override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *, size_t, char32_t *) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - void change_endianness_utf16(const char16_t *, size_t, - char16_t *) const noexcept final override {} - - simdutf_warn_unused size_t - count_utf16le(const char16_t *, size_t) const noexcept final override { - return 0; - } - - simdutf_warn_unused size_t - count_utf16be(const char16_t *, size_t) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - simdutf_warn_unused size_t count_utf8(const char *, - size_t) const noexcept final override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - latin1_length_from_utf8(const char *, size_t) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - simdutf_warn_unused size_t - utf8_length_from_latin1(const char *, size_t) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf8_length_from_utf16be(const char16_t *, size_t) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf16le(const char16_t *, size_t) const noexcept override { - return 0; - } - - simdutf_warn_unused size_t - utf32_length_from_utf16be(const char16_t *, size_t) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - simdutf_warn_unused size_t - utf16_length_from_utf8(const char *, size_t) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf8_length_from_utf32(const char32_t *, size_t) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf16_length_from_utf32(const char32_t *, size_t) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - simdutf_warn_unused size_t - utf32_length_from_utf8(const char *, size_t) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 - simdutf_warn_unused result - base64_to_binary(const char *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused full_result base64_to_binary_details( - const char *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return full_result(error_code::OTHER, 0, 0); - } - - simdutf_warn_unused result - base64_to_binary(const char16_t *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return result(error_code::OTHER, 0); - } - - simdutf_warn_unused full_result base64_to_binary_details( - const char16_t *, size_t, char *, base64_options, - last_chunk_handling_options) const noexcept override { - return full_result(error_code::OTHER, 0, 0); - } - - size_t binary_to_base64(const char *, size_t, char *, - base64_options) const noexcept override { - return 0; - } -#endif // SIMDUTF_FEATURE_BASE64 - - unsupported_implementation() - : implementation("unsupported", - "Unsupported CPU (no detected SIMD instructions)", 0) {} -}; - -const unsupported_implementation *get_unsupported_singleton() { - static const unsupported_implementation unsupported_singleton{}; - return &unsupported_singleton; -} -static_assert(std::is_trivially_destructible::value, - "unsupported_singleton should be trivially destructible"); - -size_t available_implementation_list::size() const noexcept { - return internal::get_available_implementation_pointers().size(); -} -const implementation *const * -available_implementation_list::begin() const noexcept { - return internal::get_available_implementation_pointers().begin(); -} -const implementation *const * -available_implementation_list::end() const noexcept { - return internal::get_available_implementation_pointers().end(); -} -const implementation * -available_implementation_list::detect_best_supported() const noexcept { - // They are prelisted in priority order, so we just go down the list - uint32_t supported_instruction_sets = - internal::detect_supported_architectures(); - for (const implementation *impl : - internal::get_available_implementation_pointers()) { - uint32_t required_instruction_sets = impl->required_instruction_sets(); - if ((supported_instruction_sets & required_instruction_sets) == - required_instruction_sets) { - return impl; - } - } - return get_unsupported_singleton(); // this should never happen? -} - -const implementation * -detect_best_supported_implementation_on_first_use::set_best() const noexcept { - SIMDUTF_PUSH_DISABLE_WARNINGS - SIMDUTF_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: - // manually verified this is safe - char *force_implementation_name = getenv("SIMDUTF_FORCE_IMPLEMENTATION"); - SIMDUTF_POP_DISABLE_WARNINGS - - if (force_implementation_name) { - auto force_implementation = - get_available_implementations()[force_implementation_name]; - if (force_implementation) { - return get_active_implementation() = force_implementation; - } else { - // Note: abort() and stderr usage within the library is forbidden. - return get_active_implementation() = get_unsupported_singleton(); - } - } - return get_active_implementation() = - get_available_implementations().detect_best_supported(); -} - -} // namespace internal - -/** - * The list of available implementations compiled into simdutf. - */ -SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & -get_available_implementations() { - static const internal::available_implementation_list - available_implementations{}; - return available_implementations; -} - -/** - * The active implementation. - */ -SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr & -get_active_implementation() { -#if SIMDUTF_SINGLE_IMPLEMENTATION - // skip runtime detection - static internal::atomic_ptr active_implementation{ - internal::get_single_implementation()}; - return active_implementation; -#else - static const internal::detect_best_supported_implementation_on_first_use - detect_best_supported_implementation_on_first_use_singleton; - static internal::atomic_ptr active_implementation{ - &detect_best_supported_implementation_on_first_use_singleton}; - return active_implementation; -#endif -} - -#if SIMDUTF_SINGLE_IMPLEMENTATION -const implementation *get_default_implementation() { - return internal::get_single_implementation(); -} -#else -internal::atomic_ptr &get_default_implementation() { - return get_active_implementation(); -} -#endif -#define SIMDUTF_GET_CURRENT_IMPLEMENTION - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { - return get_default_implementation()->validate_utf8(buf, len); -} -simdutf_warn_unused result validate_utf8_with_errors(const char *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf8_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept { - return get_default_implementation()->validate_ascii(buf, len); -} -simdutf_warn_unused result validate_ascii_with_errors(const char *buf, - size_t len) noexcept { - return get_default_implementation()->validate_ascii_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t convert_utf8_to_utf16( - const char *input, size_t length, char16_t *utf16_output) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf8_to_utf16be(input, length, utf16_output); - #else - return convert_utf8_to_utf16le(input, length, utf16_output); - #endif -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t convert_latin1_to_utf8(const char *buf, size_t len, - char *utf8_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf8(buf, len, - utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf16le(buf, len, - utf16_output); -} -simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf16be(buf, len, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *latin1_output) noexcept { - return get_default_implementation()->convert_latin1_to_utf32(buf, len, - latin1_output); -} -simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept { - return length; -} -simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept { - return length; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf8_to_latin1(buf, len, - latin1_output); -} -simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf8_to_latin1_with_errors( - buf, len, latin1_output); -} -simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) noexcept { - return get_default_implementation()->convert_valid_utf8_to_latin1( - buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16le(input, length, - utf16_output); -} -simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16be(input, length, - utf16_output); -} -simdutf_warn_unused result convert_utf8_to_utf16_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf8_to_utf16be_with_errors(input, length, utf16_output); - #else - return convert_utf8_to_utf16le_with_errors(input, length, utf16_output); - #endif -} -simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16le_with_errors( - input, length, utf16_output); -} -simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf16be_with_errors( - input, length, utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *input, size_t length, char32_t *utf32_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf32(input, length, - utf32_output); -} -simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *input, size_t length, char32_t *utf32_output) noexcept { - return get_default_implementation()->convert_utf8_to_utf32_with_errors( - input, length, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool validate_utf16(const char16_t *buf, - size_t len) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return validate_utf16be(buf, len); - #else - return validate_utf16le(buf, len); - #endif -} -void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) noexcept { - return get_default_implementation()->to_well_formed_utf16be(input, len, - output); -} -void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) noexcept { - return get_default_implementation()->to_well_formed_utf16le(input, len, - output); -} -void to_well_formed_utf16(const char16_t *input, size_t len, - char16_t *output) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - to_well_formed_utf16be(input, len, output); - #else - to_well_formed_utf16le(input, len, output); - #endif -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16le(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16be(buf, len); -} -simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, - size_t len) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return validate_utf16be_with_errors(buf, len); - #else - return validate_utf16le_with_errors(buf, len); - #endif -} -simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16le_with_errors(buf, len); -} -simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf16be_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf32(buf, len); -} -simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, - size_t len) noexcept { - return get_default_implementation()->validate_utf32_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t convert_valid_utf8_to_utf16( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf8_to_utf16be(input, length, utf16_buffer); - #else - return convert_valid_utf8_to_utf16le(input, length, utf16_buffer); - #endif -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf16le( - input, length, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *input, size_t length, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf16be( - input, length, utf16_buffer); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *input, size_t length, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf8_to_utf32( - input, length, utf32_buffer); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf8(buf, len, utf8_buffer); - #else - return convert_utf16le_to_utf8(buf, len, utf8_buffer); - #endif -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t convert_utf16_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_latin1(buf, len, latin1_buffer); - #else - return convert_utf16le_to_latin1(buf, len, latin1_buffer); - #endif -} -simdutf_warn_unused size_t convert_latin1_to_utf16( - const char *buf, size_t len, char16_t *utf16_output) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_latin1_to_utf16be(buf, len, utf16_output); - #else - return convert_latin1_to_utf16le(buf, len, utf16_output); - #endif -} -simdutf_warn_unused size_t convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_latin1(buf, len, - latin1_buffer); -} -simdutf_warn_unused size_t convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_latin1(buf, len, - latin1_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_latin1( - buf, len, latin1_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_latin1( - buf, len, latin1_buffer); -} -simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_latin1_with_errors( - buf, len, latin1_buffer); -} -simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_latin1_with_errors( - buf, len, latin1_buffer); -} -simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept { - return length; -} -simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept { - return length; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused result convert_utf16_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); - #else - return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer); - #endif -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused result convert_utf16_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer); - #else - return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer); - #endif -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); - #else - return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); - #endif -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t convert_valid_utf16_to_latin1( - const char16_t *buf, size_t len, char *latin1_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer); - #else - return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer); - #endif -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_utf8( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_utf8( - buf, len, utf8_buffer); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *buf, - size_t len, - char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf8(buf, len, - utf8_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf8_with_errors( - buf, len, utf8_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf8(buf, len, - utf8_buffer); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t convert_utf32_to_utf16( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf32_to_utf16be(buf, len, utf16_buffer); - #else - return convert_utf32_to_utf16le(buf, len, utf16_buffer); - #endif -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t convert_utf32_to_latin1( - const char32_t *input, size_t length, char *latin1_output) noexcept { - return get_default_implementation()->convert_utf32_to_latin1(input, length, - latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16le(buf, len, - utf16_buffer); -} -simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16be(buf, len, - utf16_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf16_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf32_to_utf16be_with_errors(buf, len, utf16_buffer); - #else - return convert_utf32_to_utf16le_with_errors(buf, len, utf16_buffer); - #endif -} -simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16le_with_errors( - buf, len, utf16_buffer); -} -simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_utf32_to_utf16be_with_errors( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf32_to_utf16be(buf, len, utf16_buffer); - #else - return convert_valid_utf32_to_utf16le(buf, len, utf16_buffer); - #endif -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf16le( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_buffer) noexcept { - return get_default_implementation()->convert_valid_utf32_to_utf16be( - buf, len, utf16_buffer); -} -simdutf_warn_unused size_t convert_utf16_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf32(buf, len, utf32_buffer); - #else - return convert_utf16le_to_utf32(buf, len, utf32_buffer); - #endif -} -simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf32(buf, len, - utf32_buffer); -} -simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf32(buf, len, - utf32_buffer); -} -simdutf_warn_unused result convert_utf16_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_utf16be_to_utf32_with_errors(buf, len, utf32_buffer); - #else - return convert_utf16le_to_utf32_with_errors(buf, len, utf32_buffer); - #endif -} -simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16le_to_utf32_with_errors( - buf, len, utf32_buffer); -} -simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_utf16be_to_utf32_with_errors( - buf, len, utf32_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return convert_valid_utf16be_to_utf32(buf, len, utf32_buffer); - #else - return convert_valid_utf16le_to_utf32(buf, len, utf32_buffer); - #endif -} -simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16le_to_utf32( - buf, len, utf32_buffer); -} -simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_buffer) noexcept { - return get_default_implementation()->convert_valid_utf16be_to_utf32( - buf, len, utf32_buffer); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void change_endianness_utf16(const char16_t *input, size_t length, - char16_t *output) noexcept { - get_default_implementation()->change_endianness_utf16(input, length, output); -} -simdutf_warn_unused size_t count_utf16(const char16_t *input, - size_t length) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return count_utf16be(input, length); - #else - return count_utf16le(input, length); - #endif -} -simdutf_warn_unused size_t count_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->count_utf16le(input, length); -} -simdutf_warn_unused size_t count_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->count_utf16be(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t count_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->count_utf8(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t latin1_length_from_utf8(const char *buf, - size_t len) noexcept { - return get_default_implementation()->latin1_length_from_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t utf8_length_from_latin1(const char *buf, - size_t len) noexcept { - return get_default_implementation()->utf8_length_from_latin1(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, - size_t length) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return utf8_length_from_utf16be(input, length); - #else - return utf8_length_from_utf16le(input, length); - #endif -} -simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf16le(input, length); -} -simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf16be(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, - size_t length) noexcept { - #if SIMDUTF_IS_BIG_ENDIAN - return utf32_length_from_utf16be(input, length); - #else - return utf32_length_from_utf16le(input, length); - #endif -} -simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf16le(input, length); -} -simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf16be(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->utf16_length_from_utf8(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, - size_t length) noexcept { - return get_default_implementation()->utf8_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, - size_t length) noexcept { - return get_default_implementation()->utf16_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, - size_t length) noexcept { - return get_default_implementation()->utf32_length_from_utf8(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused size_t -maximal_binary_length_from_base64(const char *input, size_t length) noexcept { - return get_default_implementation()->maximal_binary_length_from_base64( - input, length); -} - -simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return get_default_implementation()->base64_to_binary( - input, length, output, options, last_chunk_handling_options); -} - -simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) noexcept { - return get_default_implementation()->maximal_binary_length_from_base64( - input, length); -} - -simdutf_warn_unused result base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return get_default_implementation()->base64_to_binary( - input, length, output, options, last_chunk_handling_options); -} - -template -simdutf_warn_unused result base64_to_binary_safe_impl( - const chartype *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - static_assert(std::is_same::value || - std::is_same::value, - "Only char and char16_t are supported."); - // The implementation could be nicer, but we expect that most times, the user - // will provide us with a buffer that is large enough. - size_t max_length = maximal_binary_length_from_base64(input, length); - if (outlen >= max_length) { - // fast path - full_result r = get_default_implementation()->base64_to_binary_details( - input, length, output, options, last_chunk_handling_options); - if (r.error != error_code::INVALID_BASE64_CHARACTER && - r.error != error_code::BASE64_EXTRA_BITS) { - outlen = r.output_count; - if (last_chunk_handling_options == stop_before_partial) { - if ((r.output_count % 3) != 0) { - bool empty_trail = true; - for (size_t i = r.input_count; i < length; i++) { - if (!scalar::base64::is_ascii_white_space_or_padding(input[i])) { - empty_trail = false; - break; - } - } - if (empty_trail) { - r.input_count = length; - } - } - return {r.error, r.input_count}; - } - return {r.error, length}; - } - return r; - } - // The output buffer is maybe too small. We will decode a truncated version of - // the input. - size_t outlen3 = outlen / 3 * 3; // round down to multiple of 3 - size_t safe_input = base64_length_from_binary(outlen3, options); - full_result r = get_default_implementation()->base64_to_binary_details( - input, safe_input, output, options, loose); - if (r.error == error_code::INVALID_BASE64_CHARACTER) { - return r; - } - size_t offset = - (r.error == error_code::BASE64_INPUT_REMAINDER) - ? 1 - : ((r.output_count % 3) == 0 ? 0 : (r.output_count % 3) + 1); - size_t output_index = r.output_count - (r.output_count % 3); - size_t input_index = safe_input; - // offset is a value that is no larger than 3. We backtrack - // by up to offset characters + an undetermined number of - // white space characters. It is expected that the next loop - // runs at most 3 times + the number of white space characters - // in between them, so we are not worried about performance. - while (offset > 0 && input_index > 0) { - chartype c = input[--input_index]; - if (scalar::base64::is_ascii_white_space(c)) { - // skipping - } else { - offset--; - } - } - size_t remaining_out = outlen - output_index; - const chartype *tail_input = input + input_index; - size_t tail_length = length - input_index; - while (tail_length > 0 && - scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { - tail_length--; - } - size_t padding_characts = 0; - if (tail_length > 0 && tail_input[tail_length - 1] == '=') { - tail_length--; - padding_characts++; - while (tail_length > 0 && - scalar::base64::is_ascii_white_space(tail_input[tail_length - 1])) { - tail_length--; - } - if (tail_length > 0 && tail_input[tail_length - 1] == '=') { - tail_length--; - padding_characts++; - } - } - // this will advance tail_input and tail_length - result rr = scalar::base64::base64_tail_decode_safe( - output + output_index, remaining_out, tail_input, tail_length, - padding_characts, options, last_chunk_handling_options); - outlen = output_index + remaining_out; - if (last_chunk_handling_options != stop_before_partial && - rr.error == error_code::SUCCESS && padding_characts > 0) { - // additional checks - if ((outlen % 3 == 0) || ((outlen % 3) + 1 + padding_characts != 4)) { - rr.error = error_code::INVALID_BASE64_CHARACTER; - } - } - if (rr.error == error_code::SUCCESS && - last_chunk_handling_options == stop_before_partial) { - if (tail_input > input + input_index) { - rr.count = tail_input - input; - } else if (r.input_count > 0) { - rr.count = r.input_count + rr.count; - } - return rr; - } - rr.count += input_index; - return rr; -} - - #if SIMDUTF_ATOMIC_REF -size_t atomic_binary_to_base64(const char *input, size_t length, char *output, - base64_options options) noexcept { - static_assert(std::atomic_ref::required_alignment == 1); - size_t retval = 0; - // Arbitrary block sizes: 3KB for input, 4KB for output. Total is 7KB. - constexpr size_t input_block_size = 1024 * 3; - constexpr size_t output_block_size = input_block_size * 4 / 3; - std::array inbuf; - std::array outbuf; - - // std::atomic_ref must not have a const T, see - // https://cplusplus.github.io/LWG/issue3508 - // we instead provide a mutable input, which is ok since we are only reading - // from it. - char *mutable_input = const_cast(input); - - for (size_t i = 0; i < length; i += input_block_size) { - const size_t current_block_size = std::min(input_block_size, length - i); - // This copy is inefficient. - // Under x64, we could use 16-byte aligned loads. - // Note that we warn users that the performance might be poor. - for (size_t j = 0; j < current_block_size; ++j) { - inbuf[j] = std::atomic_ref(mutable_input[i + j]) - .load(std::memory_order_relaxed); - } - const size_t written = binary_to_base64(inbuf.data(), current_block_size, - outbuf.data(), options); - // This copy is inefficient. - // Under x64, we could use 16-byte aligned stores. - for (size_t j = 0; j < written; ++j) { - std::atomic_ref(output[retval + j]) - .store(outbuf[j], std::memory_order_relaxed); - } - retval += written; - } - return retval; -} - #endif // SIMDUTF_ATOMIC_REF - -#endif // SIMDUTF_FEATURE_BASE64 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t convert_latin1_to_utf8_safe( - const char *buf, size_t len, char *utf8_output, size_t utf8_len) noexcept { - const auto start{utf8_output}; - - while (true) { - // convert_latin1_to_utf8 will never write more than input length * 2 - auto read_len = std::min(len, utf8_len >> 1); - if (read_len <= 16) { - break; - } - - const auto write_len = - simdutf::convert_latin1_to_utf8(buf, read_len, utf8_output); - - utf8_output += write_len; - utf8_len -= write_len; - buf += read_len; - len -= read_len; - } - - utf8_output += - scalar::latin1_to_utf8::convert_safe(buf, len, utf8_output, utf8_len); - - return utf8_output - start; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result base64_to_binary_safe( - const char *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return base64_to_binary_safe_impl(input, length, output, outlen, - options, last_chunk_handling_options); -} -simdutf_warn_unused result base64_to_binary_safe( - const char16_t *input, size_t length, char *output, size_t &outlen, - base64_options options, - last_chunk_handling_options last_chunk_handling_options) noexcept { - return base64_to_binary_safe_impl( - input, length, output, outlen, options, last_chunk_handling_options); -} - -simdutf_warn_unused size_t -base64_length_from_binary(size_t length, base64_options options) noexcept { - return get_default_implementation()->base64_length_from_binary(length, - options); -} - -size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options) noexcept { - return get_default_implementation()->binary_to_base64(input, length, output, - options); -} -#endif // SIMDUTF_FEATURE_BASE64 - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused simdutf::encoding_type -autodetect_encoding(const char *buf, size_t length) noexcept { - return get_default_implementation()->autodetect_encoding(buf, length); -} - -simdutf_warn_unused int detect_encodings(const char *buf, - size_t length) noexcept { - return get_default_implementation()->detect_encodings(buf, length); -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -const implementation *builtin_implementation() { - static const implementation *builtin_impl = - get_available_implementations()[SIMDUTF_STRINGIFY( - SIMDUTF_BUILTIN_IMPLEMENTATION)]; - return builtin_impl; -} - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) { - return scalar::utf8::trim_partial_utf8(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, - size_t length) { - return scalar::utf16::trim_partial_utf16(input, length); -} - -simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, - size_t length) { - return scalar::utf16::trim_partial_utf16(input, length); -} - -simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, - size_t length) { - #if SIMDUTF_IS_BIG_ENDIAN - return trim_partial_utf16be(input, length); - #else - return trim_partial_utf16le(input, length); - #endif -} -#endif // SIMDUTF_FEATURE_UTF16 - -} // namespace simdutf -/* end file src/implementation.cpp */ - -SIMDUTF_PUSH_DISABLE_WARNINGS -SIMDUTF_DISABLE_UNDESIRED_WARNINGS - -#if SIMDUTF_IMPLEMENTATION_ARM64 -/* begin file src/arm64/implementation.cpp */ -/* begin file src/simdutf/arm64/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "arm64" -// #define SIMDUTF_IMPLEMENTATION arm64 -#define SIMDUTF_SIMD_HAS_BYTEMASK 1 -/* end file src/simdutf/arm64/begin.h */ -namespace simdutf { -namespace arm64 { -namespace { -#ifndef SIMDUTF_ARM64_H - #error "arm64.h must be included" -#endif -using namespace simd; - -#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ - SIMDUTF_FEATURE_UTF8 -simdutf_really_inline bool is_ascii(const simd8x64 &input) { - simd8 bits = input.reduce_or(); - return bits.max_val() < 0b10000000u; -} -#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || - // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_really_inline simd8 -must_be_2_3_continuation(const simd8 prev2, - const simd8 prev3) { - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - return is_third_byte ^ is_fourth_byte; -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) -// common functions for utf8 conversions -simdutf_really_inline uint16x4_t convert_utf8_3_byte_to_utf16(uint8x16_t in) { - // Low half contains 10cccccc|1110aaaa - // High half contains 10bbbbbb|10bbbbbb - #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t sh = simdutf_make_uint8x16_t(0, 2, 3, 5, 6, 8, 9, 11, 1, 1, - 4, 4, 7, 7, 10, 10); - #else - const uint8x16_t sh = {0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10}; - #endif - uint8x16_t perm = vqtbl1q_u8(in, sh); - // Split into half vectors. - // 10cccccc|1110aaaa - uint8x8_t perm_low = vget_low_u8(perm); // no-op - // 10bbbbbb|10bbbbbb - uint8x8_t perm_high = vget_high_u8(perm); - // xxxxxxxx 10bbbbbb - uint16x4_t mid = vreinterpret_u16_u8(perm_high); // no-op - // xxxxxxxx 1110aaaa - uint16x4_t high = vreinterpret_u16_u8(perm_low); // no-op - // Assemble with shift left insert. - // xxxxxxaa aabbbbbb - uint16x4_t mid_high = vsli_n_u16(mid, high, 6); - // (perm_low << 8) | (perm_low >> 8) - // xxxxxxxx 10cccccc - uint16x4_t low = vreinterpret_u16_u8(vrev16_u8(perm_low)); - // Shift left insert into the low bits - // aaaabbbb bbcccccc - uint16x4_t composed = vsli_n_u16(low, mid_high, 6); - return composed; -} - -simdutf_really_inline uint16x8_t convert_utf8_2_byte_to_utf16(uint8x16_t in) { - // Converts 6 2 byte UTF-8 characters to 6 UTF-16 characters. - // Technically this calculates 8, but 6 does better and happens more often - // (The languages which use these codepoints use ASCII spaces so 8 would need - // to be in the middle of a very long word). - - // 10bbbbbb 110aaaaa - uint16x8_t upper = vreinterpretq_u16_u8(in); - // (in << 8) | (in >> 8) - // 110aaaaa 10bbbbbb - uint16x8_t lower = vreinterpretq_u16_u8(vrev16q_u8(in)); - // 00000000 000aaaaa - uint16x8_t upper_masked = vandq_u16(upper, vmovq_n_u16(0x1F)); - // Assemble with shift left insert. - // 00000aaa aabbbbbb - uint16x8_t composed = vsliq_n_u16(lower, upper_masked, 6); - return composed; -} - -simdutf_really_inline uint16x8_t -convert_utf8_1_to_2_byte_to_utf16(uint8x16_t in, size_t shufutf8_idx) { - // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. - // This is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx])); - // Shuffle - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 110aaaaa 10bbbbbb - uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh)); - // Mask - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000000 00bbbbbb - uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits - // 1 byte: 00000000 00000000 - // 2 byte: 000aaaaa 00000000 - uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits - // Combine with a shift right accumulate - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000aaa aabbbbbb - uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); - return composed; -} -#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || - // SIMDUTF_FEATURE_UTF32) - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/arm64/arm_utf16fix.cpp */ - -/* - * Returns if a vector of type uint8x16_t is all zero. - */ -simdutf_really_inline int veq_non_zero(uint8x16_t v) { - // might compile to two instructions: - // umaxv s0, v0.4s - // fmov w0, s0 - // On Apple hardware, they both have a latency of 3 cycles, with a throughput - // of four instructions per cycle. So that's 6 cycles of latency (!!!) for the - // two instructions. A narrowing shift has the same latency and throughput. - return vmaxvq_u32(vreinterpretq_u32_u8(v)); -} - -/* - * Process one block of 16 characters. If in_place is false, - * copy the block from in to out. If there is a sequencing - * error in the block, overwrite the illsequenced characters - * with the replacement character. This function reads one - * character before the beginning of the buffer as a lookback. - * If that character is illsequenced, it too is overwritten. - */ -template -void utf16fix_block(char16_t *out, const char16_t *in) { - const char16_t replacement = scalar::utf16::replacement(); - uint8x16x2_t lb, block; - uint8x16_t lb_masked, block_masked, lb_is_high, block_is_low; - uint8x16_t illseq; - - const int idx = !match_system(big_endian) ? 0 : 1; - - /* TODO: compute lookback using shifts */ - lb = vld2q_u8((const uint8_t *)(in - 1)); - block = vld2q_u8((const uint8_t *)in); - lb_masked = vandq_u8(lb.val[idx], vdupq_n_u8(0xfc)); - block_masked = vandq_u8(block.val[idx], vdupq_n_u8(0xfc)); - lb_is_high = vceqq_u8(lb_masked, vdupq_n_u8(0xd8)); - block_is_low = vceqq_u8(block_masked, vdupq_n_u8(0xdc)); - - illseq = veorq_u8(lb_is_high, block_is_low); - if (veq_non_zero(illseq)) { - uint8x16_t lb_illseq, block_illseq; - char16_t lbc; - int ill; - - /* compute the cause of the illegal sequencing */ - lb_illseq = vbicq_u8(lb_is_high, block_is_low); - block_illseq = vorrq_u8(vbicq_u8(block_is_low, lb_is_high), - vextq_u8(lb_illseq, vdupq_n_u8(0), 1)); - - /* fix illegal sequencing in the lookback */ - ill = vgetq_lane_u8(lb_illseq, 0); - lbc = out[-1]; - out[-1] = ill ? replacement : lbc; - - /* fix illegal sequencing in the main block */ - if (!match_system(big_endian)) { - block.val[1] = vbslq_u8(block_illseq, vdupq_n_u8(0xfd), block.val[1]); - block.val[0] = vorrq_u8(block_illseq, block.val[0]); - } else { - block.val[0] = vbslq_u8(block_illseq, vdupq_n_u8(0xfd), block.val[0]); - block.val[1] = vorrq_u8(block_illseq, block.val[1]); - } - - vst2q_u8((uint8_t *)out, block); - } else if (!inplace) { - vst2q_u8((uint8_t *)out, block); - } -} - -template -uint8x16_t get_mismatch_copy(const char16_t *in, char16_t *out) { - const int idx = !match_system(big_endian) ? 0 : 1; - uint8x16x2_t lb = vld2q_u8((const uint8_t *)(in - 1)); - uint8x16x2_t block = vld2q_u8((const uint8_t *)in); - uint8x16_t lb_masked = vandq_u8(lb.val[idx], vdupq_n_u8(0xfc)); - uint8x16_t block_masked = vandq_u8(block.val[idx], vdupq_n_u8(0xfc)); - uint8x16_t lb_is_high = vceqq_u8(lb_masked, vdupq_n_u8(0xd8)); - uint8x16_t block_is_low = vceqq_u8(block_masked, vdupq_n_u8(0xdc)); - uint8x16_t illseq = veorq_u8(lb_is_high, block_is_low); - if (!inplace) { - vst2q_u8((uint8_t *)out, block); - } - return illseq; -} - -simdutf_really_inline uint64_t get_mask(uint8x16_t illse0, uint8x16_t illse1, - uint8x16_t illse2, uint8x16_t illse3) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - uint8x16_t sum0 = - vpaddq_u8(vandq_u8(illse0, bit_mask), vandq_u8(illse1, bit_mask)); - uint8x16_t sum1 = - vpaddq_u8(vandq_u8(illse2, bit_mask), vandq_u8(illse3, bit_mask)); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); -} - -// The idea is to process 64 characters at a time, and if there is a mismatch -// we can fix it with a bit of scalar code. When the input is correct, this -// function might be faster than alternative implementations working on small -// blocks of input. -template -bool utf16fix_block64(char16_t *out, const char16_t *in) { - const char16_t replacement = scalar::utf16::replacement(); - - uint8x16_t illse0 = inplace ? get_mismatch_copy(in, out) - : get_mismatch_copy(in, out); - uint8x16_t illse1 = - inplace ? get_mismatch_copy(in + 16, out + 16) - : get_mismatch_copy(in + 16, out + 16); - uint8x16_t illse2 = - inplace ? get_mismatch_copy(in + 32, out + 32) - : get_mismatch_copy(in + 32, out + 32); - uint8x16_t illse3 = - inplace ? get_mismatch_copy(in + 48, out + 48) - : get_mismatch_copy(in + 48, out + 48); - // this branch could be marked as unlikely: - if (veq_non_zero( - vorrq_u8(vorrq_u8(illse0, illse1), vorrq_u8(illse2, illse3)))) { - uint64_t matches = get_mask(illse0, illse1, illse2, illse3); - // Given that ARM has a fast bitreverse instruction, we can - // reverse once and then use clz to find the first bit set. - // It is how it is done in simdjson and *might* be beneficial. - // - // We might also proceed in reverse to reduce the RAW hazard, - // but it might require more instructions. - - while (matches != 0) { - int r = trailing_zeroes(matches); // generates rbit + clz - // Either we have a high surrogate followed by a non-low surrogate - // or we have a low surrogate not preceded by a high surrogate. - bool is_high = scalar::utf16::is_high_surrogate(in[r - 1]); - out[r - is_high] = replacement; - matches = clear_least_significant_bit(matches); - } - return false; - } - return true; -} - -template -void utf16fix_neon_64bits(const char16_t *in, size_t n, char16_t *out) { - size_t i; - const char16_t replacement = scalar::utf16::replacement(); - if (n < 17) { - return scalar::utf16::to_well_formed_utf16(in, n, out); - } - out[0] = - scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; - i = 1; - - /* duplicate code to have the compiler specialise utf16fix_block() */ - if (in == out) { - for (i = 1; i + 64 < n; i += 64) { - utf16fix_block64(out + i, in + i); - } - - for (; i + 16 < n; i += 16) { - utf16fix_block(out + i, in + i); - } - - /* tbd: find carry */ - utf16fix_block(out + n - 16, in + n - 16); - } else { - for (i = 1; i + 64 < n; i += 64) { - utf16fix_block64(out + i, in + i); - } - for (; i + 16 < n; i += 16) { - utf16fix_block(out + i, in + i); - } - - utf16fix_block(out + n - 16, in + n - 16); - } - out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) - ? replacement - : out[n - 1]; -} -/* end file src/arm64/arm_utf16fix.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/arm64/arm_validate_utf16.cpp */ -template -const char16_t *arm_validate_utf16(const char16_t *input, size_t size) { - const char16_t *end = input + size; - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - while (end - input >= 16) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - if (!match_system(big_endian)) { - in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0))); - in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1))); - } - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - const simd8 in = simd16::pack(t0, t1); - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64(); - if (surrogates_wordmask == 0) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint64_t V = ~surrogates_wordmask; - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = ((in & v_fc) == v_dc); - const uint64_t H = vH.to_bitmask64(); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint64_t L = ~H & surrogates_wordmask; - - const uint64_t a = - L & (H >> 4); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint64_t b = - a << 4; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint64_t c = V | a | b; // Combine all the masks into the final one. - if (c == ~0ull) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0xfffffffffffffffull) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return nullptr; - } - } - } - return input; -} - -template -const result arm_validate_utf16_with_errors(const char16_t *input, - size_t size) { - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - while (input + 16 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - if (!match_system(big_endian)) { - in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0))); - in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1))); - } - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - const simd8 in = simd16::pack(t0, t1); - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64(); - if (surrogates_wordmask == 0) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher word) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint64_t V = ~surrogates_wordmask; - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = ((in & v_fc) == v_dc); - const uint64_t H = vH.to_bitmask64(); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint64_t L = ~H & surrogates_wordmask; - - const uint64_t a = - L & (H >> 4); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint64_t b = - a << 4; // Just mark that the opposite fact is hold, - // thanks to that we have only two masks for valid case. - const uint64_t c = V | a | b; // Combine all the masks into the final one. - if (c == ~0ull) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0xfffffffffffffffull) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - return result(error_code::SUCCESS, input - start); -} -/* end file src/arm64/arm_validate_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/arm64/arm_validate_utf32le.cpp */ - -const char32_t *arm_validate_utf32le(const char32_t *input, size_t size) { - const char32_t *end = input + size; - - const uint32x4_t standardmax = vmovq_n_u32(0x10ffff); - const uint32x4_t offset = vmovq_n_u32(0xffff2000); - const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff); - uint32x4_t currentmax = vmovq_n_u32(0x0); - uint32x4_t currentoffsetmax = vmovq_n_u32(0x0); - - while (end - input >= 4) { - const uint32x4_t in = vld1q_u32(reinterpret_cast(input)); - currentmax = vmaxq_u32(in, currentmax); - currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax); - input += 4; - } - - uint32x4_t is_zero = - veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax); - if (vmaxvq_u32(is_zero) != 0) { - return nullptr; - } - - is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (vmaxvq_u32(is_zero) != 0) { - return nullptr; - } - - return input; -} - -const result arm_validate_utf32le_with_errors(const char32_t *input, - size_t size) { - const char32_t *start = input; - const char32_t *end = input + size; - - const uint32x4_t standardmax = vmovq_n_u32(0x10ffff); - const uint32x4_t offset = vmovq_n_u32(0xffff2000); - const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff); - uint32x4_t currentmax = vmovq_n_u32(0x0); - uint32x4_t currentoffsetmax = vmovq_n_u32(0x0); - - while (end - input >= 4) { - const uint32x4_t in = vld1q_u32(reinterpret_cast(input)); - currentmax = vmaxq_u32(in, currentmax); - currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax); - - uint32x4_t is_zero = - veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax); - if (vmaxvq_u32(is_zero) != 0) { - return result(error_code::TOO_LARGE, input - start); - } - - is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (vmaxvq_u32(is_zero) != 0) { - return result(error_code::SURROGATE, input - start); - } - - input += 4; - } - - return result(error_code::SUCCESS, input - start); -} -/* end file src/arm64/arm_validate_utf32le.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/arm64/arm_convert_latin1_to_utf16.cpp */ -template -std::pair -arm_convert_latin1_to_utf16(const char *buf, size_t len, - char16_t *utf16_output) { - const char *end = buf + len; - - while (end - buf >= 16) { - uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf)); - uint16x8_t inlow = vmovl_u8(vget_low_u8(in8)); - if (!match_system(big_endian)) { - inlow = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inlow))); - } - vst1q_u16(reinterpret_cast(utf16_output), inlow); - uint16x8_t inhigh = vmovl_u8(vget_high_u8(in8)); - if (!match_system(big_endian)) { - inhigh = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inhigh))); - } - vst1q_u16(reinterpret_cast(utf16_output + 8), inhigh); - utf16_output += 16; - buf += 16; - } - - return std::make_pair(buf, utf16_output); -} -/* end file src/arm64/arm_convert_latin1_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/arm64/arm_convert_latin1_to_utf32.cpp */ -std::pair -arm_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - const char *end = buf + len; - - while (end - buf >= 16) { - uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf)); - uint16x8_t in8low = vmovl_u8(vget_low_u8(in8)); - uint32x4_t in16lowlow = vmovl_u16(vget_low_u16(in8low)); - uint32x4_t in16lowhigh = vmovl_u16(vget_high_u16(in8low)); - uint16x8_t in8high = vmovl_u8(vget_high_u8(in8)); - uint32x4_t in8highlow = vmovl_u16(vget_low_u16(in8high)); - uint32x4_t in8highhigh = vmovl_u16(vget_high_u16(in8high)); - vst1q_u32(reinterpret_cast(utf32_output), in16lowlow); - vst1q_u32(reinterpret_cast(utf32_output + 4), in16lowhigh); - vst1q_u32(reinterpret_cast(utf32_output + 8), in8highlow); - vst1q_u32(reinterpret_cast(utf32_output + 12), in8highhigh); - - utf32_output += 16; - buf += 16; - } - - return std::make_pair(buf, utf32_output); -} -/* end file src/arm64/arm_convert_latin1_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/arm64/arm_convert_latin1_to_utf8.cpp */ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -std::pair -arm_convert_latin1_to_utf8(const char *latin1_input, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char *end = latin1_input + len; - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - // We always write 16 bytes, of which more than the first 8 bytes - // are valid. A safety margin of 8 is more than sufficient. - while (end - latin1_input >= 16 + 8) { - uint8x16_t in8 = vld1q_u8(reinterpret_cast(latin1_input)); - if (vmaxvq_u8(in8) <= 0x7F) { // ASCII fast path!!!! - vst1q_u8(utf8_output, in8); - utf8_output += 16; - latin1_input += 16; - continue; - } - - // We just fallback on UTF-16 code. This could be optimized/simplified - // further. - uint16x8_t in16 = vmovl_u8(vget_low_u8(in8)); - // 1. prepare 2-byte values - // input 8-bit word : [aabb|bbbb] x 8 - // expected output : [1100|00aa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); - - // t0 = [0000|00aa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(in16, 2); - // t1 = [0000|00aa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(in16, v_003f); - // t3 = [0000|00aa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [1100|00aa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in16, v_007f); - const uint8x16_t utf8_unpacked = - vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in16, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); - - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); - // 6. adjust pointers - latin1_input += 8; - utf8_output += row[0]; - - } // while - - return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); -} -/* end file src/arm64/arm_convert_latin1_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/arm64/arm_convert_utf8_to_latin1.cpp */ -// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 16, usually 12). -size_t convert_masked_utf8_to_latin1(const char *input, - uint64_t utf8_end_of_code_point_mask, - char *&latin1_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - uint8x16_t in = vld1q_u8(reinterpret_cast(input)); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - - // We first try a few fast paths. - // The obvious first test is ASCII, which actually consumes the full 16. - if (utf8_end_of_code_point_mask == 0xfff) { - // We process in chunks of 12 bytes - vst1q_u8(reinterpret_cast(latin1_output), in); - latin1_output += 12; // We wrote 12 18-bit characters. - return 12; // We consumed 12 bytes. - } - /// We do not have a fast path available, or the fast path is unimportant, so - /// we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - // this indicates an invalid input: - if (idx >= 64) { - return consumed; - } - // Here we should have (idx < 64), if not, there is a bug in the validation or - // elsewhere. SIX (6) input code-code units this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 - // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy - // scenario we process SIX (6) input code-code units. The max length in bytes - // of six code code units spanning between 1 and 2 bytes each is 12 bytes. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - // Shuffle - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 110aaaaa 10bbbbbb - uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh)); - // Mask - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000000 00bbbbbb - uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits - // 1 byte: 00000000 00000000 - // 2 byte: 000aaaaa 00000000 - uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits - // Combine with a shift right accumulate - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000aaa aabbbbbb - uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2); - // writing 8 bytes even though we only care about the first 6 bytes. - uint8x8_t latin1_packed = vmovn_u16(composed); - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - latin1_output += 6; // We wrote 6 bytes. - return consumed; -} -/* end file src/arm64/arm_convert_utf8_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/arm64/arm_convert_utf8_to_utf16.cpp */ -// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 16, usually 12). -template -size_t convert_masked_utf8_to_utf16(const char *input, - uint64_t utf8_end_of_code_point_mask, - char16_t *&utf16_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - uint8x16_t in = vld1q_u8(reinterpret_cast(input)); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - - // We first try a few fast paths. - // The obvious first test is ASCII, which actually consumes the full 16. - if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xffff) { - // We process in chunks of 16 bytes - // The routine in simd.h is reused. - simd8 temp{vreinterpretq_s8_u8(in)}; - temp.store_ascii_as_utf16(utf16_output); - utf16_output += 16; // We wrote 16 16-bit characters. - return 16; // We consumed 16 bytes. - } - - // 3 byte sequences are the next most common, as seen in CJK, which has long - // sequences of these. - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte - // UTF-16 code units. - uint16x4_t composed = convert_utf8_3_byte_to_utf16(in); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed))); - } - vst1_u16(reinterpret_cast(utf16_output), composed); - utf16_output += 4; // We wrote 4 16-bit characters. - return 12; // We consumed 12 bytes. - } - - // 2 byte sequences occur in short bursts in languages like Greek and Russian. - if ((utf8_end_of_code_point_mask & 0xFFF) == 0xaaa) { - // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte - // UTF-16 code units. - uint16x8_t composed = convert_utf8_2_byte_to_utf16(in); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = - vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); - } - vst1q_u16(reinterpret_cast(utf16_output), composed); - - utf16_output += 6; // We wrote 6 16-bit characters. - return 12; // We consumed 12 bytes. - } - - /// We do not have a fast path available, or the fast path is unimportant, so - /// we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - - if (idx < 64) { - // SIX (6) input code-code units - // Convert to UTF-16 - uint16x8_t composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = - vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); - } - // Store - vst1q_u16(reinterpret_cast(utf16_output), composed); - utf16_output += 6; // We wrote 6 16-bit characters. - return consumed; - } else if (idx < 145) { - // FOUR (4) input code-code units - // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - // XXX: depending on the system scalar instructions might be faster. - // 1 byte: 00000000 00000000 0ccccccc - // 2 byte: 00000000 110bbbbb 10cccccc - // 3 byte: 1110aaaa 10bbbbbb 10cccccc - uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); - // 1 byte: 00000000 0ccccccc - // 2 byte: xx0bbbbb x0cccccc - // 3 byte: xxbbbbbb x0cccccc - uint16x4_t lowperm = vmovn_u32(perm); - // Partially mask with bic (doesn't require a temporary register unlike and) - // The shift left insert below will clear the top bits. - // 1 byte: 00000000 00000000 - // 2 byte: xx0bbbbb 00000000 - // 3 byte: xxbbbbbb 00000000 - uint16x4_t middlebyte = vbic_u16(lowperm, vmov_n_u16(uint16_t(~0xFF00))); - // ASCII - // 1 byte: 00000000 0ccccccc - // 2+byte: 00000000 00cccccc - uint16x4_t ascii = vand_u16(lowperm, vmov_n_u16(0x7F)); - // Split into narrow vectors. - // 2 byte: 00000000 00000000 - // 3 byte: 00000000 xxxxaaaa - uint16x4_t highperm = vshrn_n_u32(perm, 16); - // Shift right accumulate the middle byte - // 1 byte: 00000000 0ccccccc - // 2 byte: 00xx0bbb bbcccccc - // 3 byte: 00xxbbbb bbcccccc - uint16x4_t middlelow = vsra_n_u16(ascii, middlebyte, 2); - // Shift left and insert the top 4 bits, overwriting the garbage - // 1 byte: 00000000 0ccccccc - // 2 byte: 00000bbb bbcccccc - // 3 byte: aaaabbbb bbcccccc - uint16x4_t composed = vsli_n_u16(middlelow, highperm, 12); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed))); - } - vst1_u16(reinterpret_cast(utf16_output), composed); - - utf16_output += 4; // We wrote 4 16-bit codepoints - return consumed; - } else if (idx < 209) { - // THREE (3) input code-code units - if (input_utf8_end_of_code_point_mask == 0x888) { - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but - // it is easier when we can assume they are all pairs. This version does - // not use the LUT, but 4 byte sequences are less common and the overhead - // of the extra memory access is less important than the early branch - // overhead in shorter sequences. - - // Swap byte pairs - // 10dddddd 10cccccc|10bbbbbb 11110aaa - // 10cccccc 10dddddd|11110aaa 10bbbbbb - uint8x16_t swap = vrev16q_u8(in); - // Shift left 2 bits - // cccccc00 dddddd00 xxxxxxxx bbbbbb00 - uint32x4_t shift = vreinterpretq_u32_u8(vshlq_n_u8(swap, 2)); - // Create a magic number containing the low 2 bits of the trail surrogate - // and all the corrections needed to create the pair. UTF-8 4b prefix = - // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) - // surrogate high = +0x0000|0xD800 - // surrogate low = +0xDC00|0x0000 - // ------------------------------- - // = +0xDC00|0xE7C0 - uint32x4_t magic = vmovq_n_u32(0xDC00E7C0); - // Generate unadjusted trail surrogate minus lowest 2 bits - // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 - uint32x4_t trail = - vbslq_u32(vmovq_n_u32(0x0000FF00), vreinterpretq_u32_u8(swap), shift); - // Insert low 2 bits of trail surrogate to magic number for later - // 11011100 00000000 11100111 110000cc - uint16x8_t magic_with_low_2 = - vreinterpretq_u16_u32(vsraq_n_u32(magic, shift, 30)); - // Generate lead surrogate - // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx - uint32x4_t lead = vreinterpretq_u32_u16( - vsliq_n_u16(vreinterpretq_u16_u8(swap), vreinterpretq_u16_u8(in), 6)); - // Mask out lead - // 000000cc ccdddddd|xxxxxxxx xxxxxxxx - lead = vbicq_u32(lead, vmovq_n_u32(uint32_t(~0x03FFFFFF))); - // Blend pairs - // 000000cc ccdddddd|11110aaa bbbbbb00 - uint16x8_t blend = vreinterpretq_u16_u32( - vbslq_u32(vmovq_n_u32(0x0000FFFF), trail, lead)); - // Add magic number to finish the result - // 110111CC CCDDDDDD|110110AA BBBBBBCC - uint16x8_t composed = vaddq_u16(blend, magic_with_low_2); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = - vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed))); - } - uint16_t buffer[8]; - vst1q_u16(reinterpret_cast(buffer), composed); - for (int k = 0; k < 6; k++) { - utf16_output[k] = buffer[k]; - } // the loop might compiler to a couple of instructions. - // We need some validation. See - // https://github.com/simdutf/simdutf/pull/631 -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - uint8x16_t expected_mask = simdutf_make_uint8x16_t( - 0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, - 0xc0, 0x0, 0x0, 0x0, 0x0); -#else - uint8x16_t expected_mask = {0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, - 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, - 0x0, 0x0, 0x0, 0x0}; -#endif -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - uint8x16_t expected = simdutf_make_uint8x16_t( - 0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, - 0x80, 0x0, 0x0, 0x0, 0x0); -#else - uint8x16_t expected = {0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, - 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; -#endif - uint8x16_t check = vceqq_u8(vandq_u8(in, expected_mask), expected); - bool correct = (vminvq_u32(vreinterpretq_u32_u8(check)) == 0xFFFFFFFF); - // The validation is just three instructions and it is not on a critical - // path. - if (correct) { - utf16_output += 6; // We wrote 3 32-bit surrogate pairs. - } - return 12; // We consumed 12 bytes. - } - // 3 1-4 byte sequences - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - - // 1 byte: 00000000 00000000 00000000 0ddddddd - // 3 byte: 00000000 00000000 110ccccc 10dddddd - // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd - // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd - uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); - // added to fix issue https://github.com/simdutf/simdutf/issues/514 - // We only want to write 2 * 16-bit code units when that is actually what we - // have. Unfortunately, we cannot trust the input. So it is possible to get - // 0xff as an input byte and it should not result in a surrogate pair. We - // need to check for that. - uint32_t permbuffer[4]; - vst1q_u32(permbuffer, perm); - // Mask the low and middle bytes - // 00000000 00000000 00000000 0ddddddd - uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7f)); - // Because the surrogates need more work, the high surrogate is computed - // first. - uint32x4_t middlehigh = vshlq_n_u32(perm, 2); - // 00000000 00000000 00cccccc 00000000 - uint32x4_t middlebyte = vandq_u32(perm, vmovq_n_u32(0x3F00)); - // Start assembling the sequence. Since the 4th byte is in the same position - // as it would be in a surrogate and there is no dependency, shift left - // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: - // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx - uint32x4_t ab = vbslq_u32(vmovq_n_u32(0xFF000000), perm, middlehigh); - // Top 16 bits contains the high ten bits of the surrogate pair before - // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa - // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction - uint32x4_t abc = - vbslq_u32(vmovq_n_u32(0xFFFC0000), ab, vshlq_n_u32(middlebyte, 4)); - // Combine the low 6 or 7 bits by a shift right accumulate - // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct - // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o - // correction - uint32x4_t composed = vsraq_n_u32(ascii, abc, 6); - // After this is for surrogates - // Blend the low and high surrogates - // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd - uint32x4_t mixed = vbslq_u32(vmovq_n_u32(0xFFFF0000), abc, composed); - // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits - // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: - // 11110aaa bbbbbbcc|000000cc ccdddddd - uint16x8_t masked_pair = vreinterpretq_u16_u32( - vbicq_u32(mixed, vmovq_n_u32(uint32_t(~0xFFFF03FF)))); - // Correct the remaining UTF-8 prefix, surrogate offset, and add the - // surrogate prefixes in one magic 16-bit addition. similar magic number but - // without the continue byte adjust and halfword swapped UTF-8 4b prefix = - // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) - // surrogate high = +0xD800|0x0000 - // surrogate low = +0x0000|0xDC00 - // ----------------------------------- - // = +0xE7C0|0xDC00 - uint16x8_t magic = vreinterpretq_u16_u32(vmovq_n_u32(0xE7C0DC00)); - // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete - uint32x4_t surrogates = - vreinterpretq_u32_u16(vaddq_u16(masked_pair, magic)); - // If the high bit is 1 (s32 less than zero), this needs a surrogate pair - uint32x4_t is_pair = vcltzq_s32(vreinterpretq_s32_u32(perm)); - - // Select either the 4 byte surrogate pair or the 2 byte solo codepoint - // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd - // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - uint32x4_t selected = vbslq_u32(is_pair, surrogates, composed); - // Byte swap if necessary - if (!match_system(big_endian)) { - selected = - vreinterpretq_u32_u8(vrev16q_u8(vreinterpretq_u8_u32(selected))); - } - // Attempting to shuffle and store would be complex, just scalarize. - uint32_t buffer[4]; - vst1q_u32(buffer, selected); - // Test for the top bit of the surrogate mask. Remove due to issue 514 - // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : - // 0x00800000; - for (size_t i = 0; i < 3; i++) { - // Surrogate - // Used to be if (buffer[i] & SURROGATE_MASK) { - // See discussion above. - // patch for issue https://github.com/simdutf/simdutf/issues/514 - if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { - utf16_output[0] = uint16_t(buffer[i] >> 16); - utf16_output[1] = uint16_t(buffer[i] & 0xFFFF); - utf16_output += 2; - } else { - utf16_output[0] = uint16_t(buffer[i] & 0xFFFF); - utf16_output++; - } - } - return consumed; - } else { - // here we know that there is an error but we do not handle errors - return 12; - } -} -/* end file src/arm64/arm_convert_utf8_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/arm64/arm_convert_utf8_to_utf32.cpp */ -// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_utf32(const char *input, - uint64_t utf8_end_of_code_point_mask, - char32_t *&utf32_out) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - uint32_t *&utf32_output = reinterpret_cast(utf32_out); - uint8x16_t in = vld1q_u8(reinterpret_cast(input)); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xFFF; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - if (utf8_end_of_code_point_mask == 0xfff) { - // We process in chunks of 12 bytes. - // use fast implementation in src/simdutf/arm64/simd.h - // Ideally the compiler can keep the tables in registers. - simd8 temp{vreinterpretq_s8_u8(in)}; - temp.store_ascii_as_utf32_tbl(utf32_out); - utf32_output += 12; // We wrote 12 32-bit characters. - return 12; // We consumed 12 bytes. - } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte - // UTF-32 code units. Convert to UTF-16 - uint16x4_t composed_utf16 = convert_utf8_3_byte_to_utf16(in); - // Zero extend and store via ST2 with a zero. - uint16x4x2_t interleaver = {{composed_utf16, vmov_n_u16(0)}}; - vst2_u16(reinterpret_cast(utf32_output), interleaver); - utf32_output += 4; // We wrote 4 32-bit characters. - return 12; // We consumed 12 bytes. - } - - // 2 byte sequences occur in short bursts in languages like Greek and Russian. - if (input_utf8_end_of_code_point_mask == 0xaaa) { - // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte - // UTF-32 code units. Convert to UTF-16 - uint16x8_t composed_utf16 = convert_utf8_2_byte_to_utf16(in); - // Zero extend and store via ST2 with a zero. - uint16x8x2_t interleaver = {{composed_utf16, vmovq_n_u16(0)}}; - vst2q_u16(reinterpret_cast(utf32_output), interleaver); - utf32_output += 6; // We wrote 6 32-bit characters. - return 12; // We consumed 12 bytes. - } - /// Either no fast path or an unimportant fast path. - - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - - if (idx < 64) { - // SIX (6) input code-code units - // Convert to UTF-16 - uint16x8_t composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); - // Zero extend and store with ST2 and zero - uint16x8x2_t interleaver = {{composed_utf16, vmovq_n_u16(0)}}; - vst2q_u16(reinterpret_cast(utf32_output), interleaver); - utf32_output += 6; // We wrote 6 32-bit characters. - return consumed; - } else if (idx < 145) { - // FOUR (4) input code-code units - // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - // Shuffle - // 1 byte: 00000000 00000000 0ccccccc - // 2 byte: 00000000 110bbbbb 10cccccc - // 3 byte: 1110aaaa 10bbbbbb 10cccccc - uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); - // Split - // 00000000 00000000 0ccccccc - uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F)); // 6 or 7 bits - // Note: unmasked - // xxxxxxxx aaaaxxxx xxxxxxxx - uint32x4_t high = vshrq_n_u32(perm, 4); // 4 bits - // Use 16 bit bic instead of and. - // The top bits will be corrected later in the bsl - // 00000000 10bbbbbb 00000000 - uint32x4_t middle = vreinterpretq_u32_u16( - vbicq_u16(vreinterpretq_u16_u32(perm), - vmovq_n_u16(uint16_t(~0xff00)))); // 5 or 6 bits - // Combine low and middle with shift right accumulate - // 00000000 00xxbbbb bbcccccc - uint32x4_t lowmid = vsraq_n_u32(ascii, middle, 2); - // Insert top 4 bits from high byte with bitwise select - // 00000000 aaaabbbb bbcccccc - uint32x4_t composed = vbslq_u32(vmovq_n_u32(0x0000F000), high, lowmid); - vst1q_u32(utf32_output, composed); - utf32_output += 4; // We wrote 4 32-bit characters. - return consumed; - } else if (idx < 209) { - // THREE (3) input code-code units - if (input_utf8_end_of_code_point_mask == 0x888) { - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-32 code units. This uses the same method as the fixed 3 byte - // version, reversing and shift left insert. However, there is no need for - // a shuffle mask now, just rev16 and rev32. - // - // This version does not use the LUT, but 4 byte sequences are less common - // and the overhead of the extra memory access is less important than the - // early branch overhead in shorter sequences, so it comes last. - - // Swap pairs of bytes - // 10dddddd|10cccccc|10bbbbbb|11110aaa - // 10cccccc 10dddddd|11110aaa 10bbbbbb - uint16x8_t swap1 = vreinterpretq_u16_u8(vrev16q_u8(in)); - // Shift left and insert - // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb - uint16x8_t merge1 = vsliq_n_u16(swap1, vreinterpretq_u16_u8(in), 6); - // Swap 16-bit lanes - // xxxxcccc ccdddddd xxxxxxxa aabbbbbb - // xxxxxxxa aabbbbbb xxxxcccc ccdddddd - uint32x4_t swap2 = vreinterpretq_u32_u16(vrev32q_u16(merge1)); - // Shift insert again - // xxxxxxxx xxxaaabb bbbbcccc ccdddddd - uint32x4_t merge2 = vsliq_n_u32(swap2, vreinterpretq_u32_u16(merge1), 12); - // Clear the garbage - // 00000000 000aaabb bbbbcccc ccdddddd - uint32x4_t composed = vandq_u32(merge2, vmovq_n_u32(0x1FFFFF)); - // Store - vst1q_u32(utf32_output, composed); - - utf32_output += 3; // We wrote 3 32-bit characters. - return 12; // We consumed 12 bytes. - } - // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit - // due to surrogates no longer being involved. - uint8x16_t sh = vld1q_u8(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx])); - // 1 byte: 00000000 00000000 00000000 0ddddddd - // 2 byte: 00000000 00000000 110ccccc 10dddddd - // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd - // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd - uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh)); - // Ascii - uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F)); - uint32x4_t middle = vandq_u32(perm, vmovq_n_u32(0x3f00)); - // When converting the way we do, the 3 byte prefix will be interpreted as - // the 18th bit being set, since the code would interpret the lead byte - // (0b1110bbbb) as a continuation byte (0b10bbbbbb). To fix this, we can - // either xor or do an 8 bit add of the 6th bit shifted right by 1. Since - // NEON has shift right accumulate, we use that. - // 4 byte 3 byte - // 10bbbbbb 1110bbbb - // 00000000 01000000 6th bit - // 00000000 00100000 shift right - // 10bbbbbb 0000bbbb add - // 00bbbbbb 0000bbbb mask - uint8x16_t correction = - vreinterpretq_u8_u32(vandq_u32(perm, vmovq_n_u32(0x00400000))); - uint32x4_t corrected = vreinterpretq_u32_u8( - vsraq_n_u8(vreinterpretq_u8_u32(perm), correction, 1)); - // 00000000 00000000 0000cccc ccdddddd - uint32x4_t cd = vsraq_n_u32(ascii, middle, 2); - // Insert twice - // xxxxxxxx xxxaaabb bbbbxxxx xxxxxxxx - uint32x4_t ab = vbslq_u32(vmovq_n_u32(0x01C0000), vshrq_n_u32(corrected, 6), - vshrq_n_u32(corrected, 4)); - // 00000000 000aaabb bbbbcccc ccdddddd - uint32x4_t composed = vbslq_u32(vmovq_n_u32(0xFFE00FFF), cd, ab); - // Store - vst1q_u32(utf32_output, composed); - utf32_output += 3; // We wrote 3 32-bit characters. - return consumed; - } else { - // here we know that there is an error but we do not handle errors - return 12; - } -} -/* end file src/arm64/arm_convert_utf8_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/arm64/arm_convert_utf16_to_latin1.cpp */ - -template -std::pair -arm_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - while (end - buf >= 8) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - if (vmaxvq_u16(in) <= 0xff) { - // 1. pack the bytes - uint8x8_t latin1_packed = vmovn_u16(in); - // 2. store (8 bytes) - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} - -template -std::pair -arm_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - while (end - buf >= 8) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - if (vmaxvq_u16(in) <= 0xff) { - // 1. pack the bytes - uint8x8_t latin1_packed = vmovn_u16(in); - // 2. store (8 bytes) - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - // Let us do a scalar fallback. - for (int k = 0; k < 8; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/arm64/arm_convert_utf16_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/arm64/arm_convert_utf16_to_utf32.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::pair -arm_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_out) { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - const char16_t *end = buf + len; - - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - - while (end - buf >= 8) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - - const uint16x8_t surrogates_bytemask = - vceqq_u16(vandq_u16(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (vmaxvq_u16(surrogates_bytemask) == 0) { - // case: no surrogate pairs, extend all 16-bit code units to 32-bit code - // units - vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in))); - vst1q_u32(utf32_output + 4, vmovl_high_u16(in)); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(buf, reinterpret_cast(utf32_output)); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -arm_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_out) { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - const char16_t *start = buf; - const char16_t *end = buf + len; - - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - - while ((end - buf) >= 8) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - - const uint16x8_t surrogates_bytemask = - vceqq_u16(vandq_u16(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (vmaxvq_u16(surrogates_bytemask) == 0) { - // case: no surrogate pairs, extend all 16-bit code units to 32-bit code - // units - vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(in))); - vst1q_u32(utf32_output + 4, vmovl_high_u16(in)); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf32_output)); -} -/* end file src/arm64/arm_convert_utf16_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 -/* begin file src/arm64/arm_convert_utf16_to_utf8.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::pair -arm_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char16_t *end = buf + len; - - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - if (vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!! - // It is common enough that we have sequences of 16 consecutive ASCII - // characters. - uint16x8_t nextin = - vld1q_u16(reinterpret_cast(buf) + 8); - if (!match_system(big_endian)) { - nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); - } - if (vmaxvq_u16(nextin) > 0x7F) { - // 1. pack the bytes - // obviously suboptimal. - uint8x8_t utf8_packed = vmovn_u16(in); - // 2. store (8 bytes) - vst1_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } else { - // 1. pack the bytes - // obviously suboptimal. - uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin); - // 2. store (16 bytes) - vst1q_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - } - - if (vmaxvq_u16(in) <= 0x7FF) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(in, 2); - // t1 = [000a|aaaa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); - const uint8x16_t utf8_unpacked = - vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); - - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - const uint16x8_t surrogates_bytemask = - vceqq_u16(vandq_u16(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (vmaxvq_u16(surrogates_bytemask) == 0) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t dup_even = simdutf_make_uint16x8_t( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#else - const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; -#endif - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const uint16x8_t t0 = vreinterpretq_u16_u8( - vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - const uint16x8_t s0 = vshrq_n_u16(in, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - const uint16x8_t s1s = vshlq_n_u16(s1, 2); - // [00bb|bbbb|0000|aaaa] - const uint16x8_t s2 = vorrq_u16(s0, s1s); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); - const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); - const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); - const uint16x8_t m0 = - vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); - const uint16x8_t s4 = veorq_u16(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); - const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t onemask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); - const uint16x8_t twomask = simdutf_make_uint16x8_t( - 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); -#else - const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0100, 0x0400, 0x1000, 0x4000}; - const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, - 0x0200, 0x0800, 0x2000, 0x8000}; -#endif - const uint16x8_t combined = - vorrq_u16(vandq_u16(one_byte_bytemask, onemask), - vandq_u16(one_or_two_bytes_bytemask, twomask)); - const uint16_t mask = vaddvq_u16(combined); - // The following fast path may or may not be beneficial. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); - vst1q_u8(utf8_output, utf8_0); - utf8_output += 12; - vst1q_u8(utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); - - vst1q_u8(utf8_output, utf8_0); - utf8_output += row0[0]; - vst1q_u8(utf8_output, utf8_1); - utf8_output += row1[0]; - - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(buf, reinterpret_cast(utf8_output)); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -arm_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char16_t *start = buf; - const char16_t *end = buf + len; - - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - uint16x8_t in = vld1q_u16(reinterpret_cast(buf)); - if (!match_system(big_endian)) { - in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); - } - if (vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!! - // It is common enough that we have sequences of 16 consecutive ASCII - // characters. - uint16x8_t nextin = - vld1q_u16(reinterpret_cast(buf) + 8); - if (!match_system(big_endian)) { - nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); - } - if (vmaxvq_u16(nextin) > 0x7F) { - // 1. pack the bytes - // obviously suboptimal. - uint8x8_t utf8_packed = vmovn_u16(in); - // 2. store (8 bytes) - vst1_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } else { - // 1. pack the bytes - // obviously suboptimal. - uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin); - // 2. store (16 bytes) - vst1q_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - } - - if (vmaxvq_u16(in) <= 0x7FF) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(in, 2); - // t1 = [000a|aaaa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); - const uint8x16_t utf8_unpacked = - vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); - - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - const uint16x8_t surrogates_bytemask = - vceqq_u16(vandq_u16(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (vmaxvq_u16(surrogates_bytemask) == 0) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t dup_even = simdutf_make_uint16x8_t( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#else - const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; -#endif - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const uint16x8_t t0 = vreinterpretq_u16_u8( - vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - const uint16x8_t s0 = vshrq_n_u16(in, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - const uint16x8_t s1s = vshlq_n_u16(s1, 2); - // [00bb|bbbb|0000|aaaa] - const uint16x8_t s2 = vorrq_u16(s0, s1s); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); - const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); - const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); - const uint16x8_t m0 = - vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); - const uint16x8_t s4 = veorq_u16(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); - const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t onemask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); - const uint16x8_t twomask = simdutf_make_uint16x8_t( - 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); -#else - const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0100, 0x0400, 0x1000, 0x4000}; - const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, - 0x0200, 0x0800, 0x2000, 0x8000}; -#endif - const uint16x8_t combined = - vorrq_u16(vandq_u16(one_byte_bytemask, onemask), - vandq_u16(one_or_two_bytes_bytemask, twomask)); - const uint16_t mask = vaddvq_u16(combined); - // The following fast path may or may not be beneficial. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); - vst1q_u8(utf8_output, utf8_0); - utf8_output += 12; - vst1q_u8(utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); - - vst1q_u8(utf8_output, utf8_0); - utf8_output += row0[0]; - vst1q_u8(utf8_output, utf8_1); - utf8_output += row1[0]; - - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - reinterpret_cast(utf8_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf8_output)); -} - -template -simdutf_really_inline size_t -arm64_utf8_length_from_utf16_bytemask(const char16_t *in, size_t size) { - size_t pos = 0; - - constexpr size_t N = 8; - const auto one = vmovq_n_u16(1); - // each char16 yields at least one byte - size_t count = size / N * N; - - for (; pos < size / N * N; pos += N) { - auto input = vld1q_u16(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(input))); - } - // 0xd800 .. 0xdbff - low surrogate - // 0xdc00 .. 0xdfff - high surrogate - const auto is_surrogate = - vceqq_u16(vandq_u16(input, vmovq_n_u16(0xf800)), vmovq_n_u16(0xd800)); - - // c0 - chars that yield 2- or 3-byte UTF-8 codes - const auto c0 = vminq_u16(vandq_u16(input, vmovq_n_u16(0xff80)), one); - - // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) - const auto c1 = vminq_u16(vandq_u16(input, vmovq_n_u16(0xf800)), one); - - /* - Explanation how the counting works. - - In the case of a non-surrogate character we count: - * always 1 -- see how `count` is initialized above; - * c0 = 1 if the current char yields 2 or 3 bytes; - * c1 = 1 if the current char yields 3 bytes. - - Thus, we always have correct count for the current char: - from 1, 2 or 3 bytes. - - A trickier part is how we count surrogate pairs. Whether - we encounter a surrogate (low or high), we count it as - 3 chars and then minus 1 (`is_surrogate` is -1 or 0). - Each surrogate char yields 2. A surrogate pair, that - is a low surrogate followed by a high one, yields - the expected 4 bytes. - - It also correctly handles cases when low surrogate is - processed by the this loop, but high surrogate is counted - by the scalar procedure. The scalar procedure uses exactly - the described approach, thanks to that for valid UTF-16 - strings it always count correctly. - */ - auto v_count = vaddq_u16(c1, c0); - v_count = vaddq_u16(v_count, is_surrogate); - count += vaddlvq_u16(v_count); - } - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} -/* end file src/arm64/arm_convert_utf16_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/arm64/arm_base64.cpp */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ - -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // credit: Wojciech Muła - uint8_t *out = (uint8_t *)dst; - constexpr static uint8_t source_table[64] = { - 'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D', - 'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W', - 'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p', - '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8', - 'N', 'd', 't', '9', 'O', 'e', 'u', '+', 'P', 'f', 'v', '/', - }; - constexpr static uint8_t source_table_url[64] = { - 'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D', - 'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W', - 'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p', - '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8', - 'N', 'd', 't', '9', 'O', 'e', 'u', '-', 'P', 'f', 'v', '_', - }; - const uint8x16_t v3f = vdupq_n_u8(0x3f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - // When trying to load a uint8_t array, Visual Studio might - // error with: error C2664: '__n128x4 neon_ld4m_q8(const char *)': - // cannot convert argument 1 from 'const uint8_t [64]' to 'const char * - const uint8x16x4_t table = vld4q_u8( - (reinterpret_cast(options & base64_url) ? source_table_url - : source_table)); -#else - const uint8x16x4_t table = - vld4q_u8((options & base64_url) ? source_table_url : source_table); -#endif - size_t i = 0; - for (; i + 16 * 3 <= srclen; i += 16 * 3) { - const uint8x16x3_t in = vld3q_u8((const uint8_t *)src + i); - uint8x16x4_t result; - result.val[0] = vshrq_n_u8(in.val[0], 2); - result.val[1] = - vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[1], 4), in.val[0], 4), v3f); - result.val[2] = - vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[2], 6), in.val[1], 2), v3f); - result.val[3] = vandq_u8(in.val[2], v3f); - result.val[0] = vqtbl4q_u8(table, result.val[0]); - result.val[1] = vqtbl4q_u8(table, result.val[1]); - result.val[2] = vqtbl4q_u8(table, result.val[2]); - result.val[3] = vqtbl4q_u8(table, result.val[3]); - vst4q_u8(out, result); - out += 64; - } - - if (i + 24 <= srclen) { - const uint8x8_t v3f_d = vdup_n_u8(0x3f); - const uint8x8x3_t in = vld3_u8((const uint8_t *)src + i); - uint8x8x4_t result; - result.val[0] = vshr_n_u8(in.val[0], 2); - result.val[1] = - vand_u8(vsli_n_u8(vshr_n_u8(in.val[1], 4), in.val[0], 4), v3f_d); - result.val[2] = - vand_u8(vsli_n_u8(vshr_n_u8(in.val[2], 6), in.val[1], 2), v3f_d); - result.val[3] = vand_u8(in.val[2], v3f_d); - result.val[0] = vqtbl4_u8(table, result.val[0]); - result.val[1] = vqtbl4_u8(table, result.val[1]); - result.val[2] = vqtbl4_u8(table, result.val[2]); - result.val[3] = vqtbl4_u8(table, result.val[3]); - vst4_u8(out, result); - out += 32; - i += 24; - } - - out += scalar::base64::tail_encode_base64((char *)out, src + i, srclen - i, - options); - - return size_t((char *)out - dst); -} - -static inline void compress(uint8x16_t data, uint16_t mask, char *output) { - if (mask == 0) { - vst1q_u8((uint8_t *)output, data); - return; - } - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - uint64x2_t compactmasku64 = {tables::base64::thintable_epi8[mask1], - tables::base64::thintable_epi8[mask2]}; - uint8x16_t compactmask = vreinterpretq_u8_u64(compactmasku64); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t off = - simdutf_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); -#else - const uint8x16_t off = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8}; -#endif - - compactmask = vaddq_u8(compactmask, off); - uint8x16_t pruned = vqtbl1q_u8(data, compactmask); - - int pop1 = tables::base64::BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - compactmask = vld1q_u8(tables::base64::pshufb_combine_table + pop1 * 8); - uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); - vst1q_u8((uint8_t *)output, answer); -} - -struct block64 { - uint8x16_t chunks[4]; -}; - -static_assert(sizeof(block64) == 64, "block64 is not 64 bytes"); -template uint64_t to_base64_mask(block64 *b, bool *error) { - uint8x16_t v0f = vdupq_n_u8(0xf); - - uint8x16_t underscore0, underscore1, underscore2, underscore3; - if (base64_url) { - underscore0 = vceqq_u8(b->chunks[0], vdupq_n_u8(0x5f)); - underscore1 = vceqq_u8(b->chunks[1], vdupq_n_u8(0x5f)); - underscore2 = vceqq_u8(b->chunks[2], vdupq_n_u8(0x5f)); - underscore3 = vceqq_u8(b->chunks[3], vdupq_n_u8(0x5f)); - } else { - (void)underscore0; - (void)underscore1; - (void)underscore2; - (void)underscore3; - } - - uint8x16_t lo_nibbles0 = vandq_u8(b->chunks[0], v0f); - uint8x16_t lo_nibbles1 = vandq_u8(b->chunks[1], v0f); - uint8x16_t lo_nibbles2 = vandq_u8(b->chunks[2], v0f); - uint8x16_t lo_nibbles3 = vandq_u8(b->chunks[3], v0f); - - // Needed by the decoding step. - uint8x16_t hi_nibbles0 = vshrq_n_u8(b->chunks[0], 4); - uint8x16_t hi_nibbles1 = vshrq_n_u8(b->chunks[1], 4); - uint8x16_t hi_nibbles2 = vshrq_n_u8(b->chunks[2], 4); - uint8x16_t hi_nibbles3 = vshrq_n_u8(b->chunks[3], 4); - uint8x16_t lut_lo; -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - if (base64_url) { - lut_lo = - simdutf_make_uint8x16_t(0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x61, 0xe1, 0xf4, 0xe5, 0xa5, 0xf4, 0xf4); - } else { - lut_lo = - simdutf_make_uint8x16_t(0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x61, 0xe1, 0xb4, 0xe5, 0xe5, 0xf4, 0xb4); - } -#else - if (base64_url) { - lut_lo = uint8x16_t{0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x61, 0xe1, 0xf4, 0xe5, 0xa5, 0xf4, 0xf4}; - } else { - lut_lo = uint8x16_t{0x3a, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, - 0x70, 0x61, 0xe1, 0xb4, 0xe5, 0xe5, 0xf4, 0xb4}; - } -#endif - uint8x16_t lo0 = vqtbl1q_u8(lut_lo, lo_nibbles0); - uint8x16_t lo1 = vqtbl1q_u8(lut_lo, lo_nibbles1); - uint8x16_t lo2 = vqtbl1q_u8(lut_lo, lo_nibbles2); - uint8x16_t lo3 = vqtbl1q_u8(lut_lo, lo_nibbles3); - uint8x16_t lut_hi; -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - if (base64_url) { - lut_hi = - simdutf_make_uint8x16_t(0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20); - } else { - lut_hi = - simdutf_make_uint8x16_t(0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20); - } -#else - if (base64_url) { - lut_hi = uint8x16_t{0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}; - } else { - lut_hi = uint8x16_t{0x11, 0x20, 0x42, 0x80, 0x8, 0x4, 0x8, 0x4, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}; - } -#endif - uint8x16_t hi0 = vqtbl1q_u8(lut_hi, hi_nibbles0); - uint8x16_t hi1 = vqtbl1q_u8(lut_hi, hi_nibbles1); - uint8x16_t hi2 = vqtbl1q_u8(lut_hi, hi_nibbles2); - uint8x16_t hi3 = vqtbl1q_u8(lut_hi, hi_nibbles3); - - if (base64_url) { - hi0 = vbicq_u8(hi0, underscore0); - hi1 = vbicq_u8(hi1, underscore1); - hi2 = vbicq_u8(hi2, underscore2); - hi3 = vbicq_u8(hi3, underscore3); - } - - uint8_t checks = - vmaxvq_u8(vorrq_u8(vorrq_u8(vandq_u8(lo0, hi0), vandq_u8(lo1, hi1)), - vorrq_u8(vandq_u8(lo2, hi2), vandq_u8(lo3, hi3)))); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = - simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - uint64_t badcharmask = 0; - *error = checks > 0x3; - if (checks) { - // Add each of the elements next to each other, successively, to stuff each - // 8 byte mask into one. - uint8x16_t test0 = vtstq_u8(lo0, hi0); - uint8x16_t test1 = vtstq_u8(lo1, hi1); - uint8x16_t test2 = vtstq_u8(lo2, hi2); - uint8x16_t test3 = vtstq_u8(lo3, hi3); - uint8x16_t sum0 = - vpaddq_u8(vandq_u8(test0, bit_mask), vandq_u8(test1, bit_mask)); - uint8x16_t sum1 = - vpaddq_u8(vandq_u8(test2, bit_mask), vandq_u8(test3, bit_mask)); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - badcharmask = vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); - } - // This is the transformation step that can be done while we are waiting for - // sum0 - uint8x16_t roll_lut; -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - if (base64_url) { - roll_lut = - simdutf_make_uint8x16_t(0xe0, 0x11, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0); - } else { - roll_lut = - simdutf_make_uint8x16_t(0x0, 0x10, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0); - } -#else - if (base64_url) { - roll_lut = uint8x16_t{0xe0, 0x11, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; - } else { - roll_lut = uint8x16_t{0x0, 0x10, 0x13, 0x4, 0xbf, 0xbf, 0xb9, 0xb9, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; - } -#endif - uint8x16_t vsecond_last = base64_url ? vdupq_n_u8(0x2d) : vdupq_n_u8(0x2f); - if (base64_url) { - hi_nibbles0 = vbicq_u8(hi_nibbles0, underscore0); - hi_nibbles1 = vbicq_u8(hi_nibbles1, underscore1); - hi_nibbles2 = vbicq_u8(hi_nibbles2, underscore2); - hi_nibbles3 = vbicq_u8(hi_nibbles3, underscore3); - } - uint8x16_t roll0 = vqtbl1q_u8( - roll_lut, vaddq_u8(vceqq_u8(b->chunks[0], vsecond_last), hi_nibbles0)); - uint8x16_t roll1 = vqtbl1q_u8( - roll_lut, vaddq_u8(vceqq_u8(b->chunks[1], vsecond_last), hi_nibbles1)); - uint8x16_t roll2 = vqtbl1q_u8( - roll_lut, vaddq_u8(vceqq_u8(b->chunks[2], vsecond_last), hi_nibbles2)); - uint8x16_t roll3 = vqtbl1q_u8( - roll_lut, vaddq_u8(vceqq_u8(b->chunks[3], vsecond_last), hi_nibbles3)); - b->chunks[0] = vaddq_u8(b->chunks[0], roll0); - b->chunks[1] = vaddq_u8(b->chunks[1], roll1); - b->chunks[2] = vaddq_u8(b->chunks[2], roll2); - b->chunks[3] = vaddq_u8(b->chunks[3], roll3); - return badcharmask; -} - -void copy_block(block64 *b, char *output) { - vst1q_u8((uint8_t *)output, b->chunks[0]); - vst1q_u8((uint8_t *)output + 16, b->chunks[1]); - vst1q_u8((uint8_t *)output + 32, b->chunks[2]); - vst1q_u8((uint8_t *)output + 48, b->chunks[3]); -} - -uint64_t compress_block(block64 *b, uint64_t mask, char *output) { - uint64_t popcounts = - vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); - uint64_t offsets = popcounts * 0x0101010101010101; - compress(b->chunks[0], uint16_t(mask), output); - compress(b->chunks[1], uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF]); - compress(b->chunks[2], uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF]); - compress(b->chunks[3], uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF]); - return offsets >> 56; -} - -// The caller of this function is responsible to ensure that there are 64 bytes -// available from reading at src. The data is read into a block64 structure. -void load_block(block64 *b, const char *src) { - b->chunks[0] = vld1q_u8(reinterpret_cast(src)); - b->chunks[1] = vld1q_u8(reinterpret_cast(src) + 16); - b->chunks[2] = vld1q_u8(reinterpret_cast(src) + 32); - b->chunks[3] = vld1q_u8(reinterpret_cast(src) + 48); -} - -// The caller of this function is responsible to ensure that there are 32 bytes -// available from reading at data. It returns a 16-byte value, narrowing with -// saturation the 16-bit words. -inline uint8x16_t load_satured(const uint16_t *data) { - uint16x8_t in1 = vld1q_u16(data); - uint16x8_t in2 = vld1q_u16(data + 8); - return vqmovn_high_u16(vqmovn_u16(in1), in2); -} - -// The caller of this function is responsible to ensure that there are 128 bytes -// available from reading at src. The data is read into a block64 structure. -void load_block(block64 *b, const char16_t *src) { - b->chunks[0] = load_satured(reinterpret_cast(src)); - b->chunks[1] = load_satured(reinterpret_cast(src) + 16); - b->chunks[2] = load_satured(reinterpret_cast(src) + 32); - b->chunks[3] = load_satured(reinterpret_cast(src) + 48); -} - -// decode 64 bytes and output 48 bytes -void base64_decode_block(char *out, const char *src) { - uint8x16x4_t str = vld4q_u8((uint8_t *)src); - uint8x16x3_t outvec; - outvec.val[0] = vsliq_n_u8(vshrq_n_u8(str.val[1], 4), str.val[0], 2); - outvec.val[1] = vsliq_n_u8(vshrq_n_u8(str.val[2], 2), str.val[1], 4); - outvec.val[2] = vsliq_n_u8(str.val[3], str.val[2], 6); - vst3q_u8((uint8_t *)out, outvec); -} - -static size_t compress_block_single(block64 *b, uint64_t mask, char *output) { - const size_t pos64 = trailing_zeroes(mask); - const int8_t pos = pos64 & 0xf; - - // Predefine the index vector -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t v1 = simdutf_make_uint8x16_t(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15); -#else // SIMDUTF_REGULAR_VISUAL_STUDIO - const uint8x16_t v1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -#endif // SIMDUTF_REGULAR_VISUAL_STUDIO - - switch (pos64 >> 4) { - case 0b00: { - const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); - const uint8x16_t v2 = - vcgtq_s8(vreinterpretq_s8_u8(v1), - vreinterpretq_s8_u8(v0)); // Compare greater than - const uint8x16_t sh = vsubq_u8(v1, v2); // Subtract - const uint8x16_t compressed = - vqtbl1q_u8(b->chunks[0], sh); // Table lookup (shuffle) - - vst1q_u8((uint8_t *)(output + 0 * 16), compressed); - vst1q_u8((uint8_t *)(output + 1 * 16 - 1), b->chunks[1]); - vst1q_u8((uint8_t *)(output + 2 * 16 - 1), b->chunks[2]); - vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); - } break; - - case 0b01: { - vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); - - const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); - const uint8x16_t v2 = - vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); - const uint8x16_t sh = vsubq_u8(v1, v2); - const uint8x16_t compressed = vqtbl1q_u8(b->chunks[1], sh); - - vst1q_u8((uint8_t *)(output + 1 * 16), compressed); - vst1q_u8((uint8_t *)(output + 2 * 16 - 1), b->chunks[2]); - vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); - } break; - - case 0b10: { - vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); - vst1q_u8((uint8_t *)(output + 1 * 16), b->chunks[1]); - - const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); - const uint8x16_t v2 = - vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); - const uint8x16_t sh = vsubq_u8(v1, v2); - const uint8x16_t compressed = vqtbl1q_u8(b->chunks[2], sh); - - vst1q_u8((uint8_t *)(output + 2 * 16), compressed); - vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]); - } break; - - case 0b11: { - vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]); - vst1q_u8((uint8_t *)(output + 1 * 16), b->chunks[1]); - vst1q_u8((uint8_t *)(output + 2 * 16), b->chunks[2]); - - const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1)); - const uint8x16_t v2 = - vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0)); - const uint8x16_t sh = vsubq_u8(v1, v2); - const uint8x16_t compressed = vqtbl1q_u8(b->chunks[3], sh); - - vst1q_u8((uint8_t *)(output + 3 * 16), compressed); - } break; - } - return 63; -} - -template bool is_power_of_two(T x) { return (x & (x - 1)) == 0; } - -template -full_result -compress_decode_base64(char *dst, const char_type *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - const char_type *const srcinit = src; - const char *const dstinit = dst; - const char_type *const srcend = src + srclen; - - constexpr size_t block_size = 10; - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const char_type *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b; - load_block(&b, src); - src += 64; - bool error = false; - uint64_t badcharmask = to_base64_mask(&b, &error); - if (badcharmask) { - if (error && !ignore_garbage) { - src -= 64; - while (src < srcend && scalar::base64::is_eight_byte(*src) && - to_base64[uint8_t(*src)] <= 64) { - src++; - } - if (src < srcend) { - // should never happen - } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - } - - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - if (is_power_of_two(badcharmask)) { - bufferptr += compress_block_single(&b, badcharmask, bufferptr); - } else { - bufferptr += compress_block(&b, badcharmask, bufferptr); - } - } else { - // optimization opportunity: if bufferptr == buffer and mask == 0, we - // can avoid the call to compress_block and decode directly. - copy_block(&b, bufferptr); - bufferptr += 64; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 1); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } - } - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if ((!scalar::base64::is_eight_byte(*src) || val > 64) && - !ignore_garbage) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } - } - - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - base64_decode_block(dst, buffer_start); - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::u32_swap_bytes(triple); - std::memcpy(dst, &triple, 4); - - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::u32_swap_bytes(triple); - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - if (!ignore_garbage) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - } else { - while (to_base64[uint8_t(*(src - 1))] >= 64) { - src--; - } - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - r.input_count += size_t(src - srcinit); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - return r; - } else { - r.output_count += size_t(dst - dstinit); - } - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } - } - return r; - } - if (equalsigns > 0 && !ignore_garbage) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } - } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} -/* end file src/arm64/arm_base64.cpp */ -#endif // SIMDUTF_FEATURE_BASE64 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/arm64/arm_convert_utf32_to_latin1.cpp */ -std::pair -arm_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *end = buf + len; - while (end - buf >= 8) { - uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf)); - uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf + 4)); - - uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2)); - if (vmaxvq_u16(utf16_packed) <= 0xff) { - // 1. pack the bytes - uint8x8_t latin1_packed = vmovn_u16(utf16_packed); - // 2. store (8 bytes) - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} - -std::pair -arm_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *start = buf; - const char32_t *end = buf + len; - - while (end - buf >= 8) { - uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf)); - uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf + 4)); - - uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2)); - - if (vmaxvq_u16(utf16_packed) <= 0xff) { - // 1. pack the bytes - uint8x8_t latin1_packed = vmovn_u16(utf16_packed); - // 2. store (8 bytes) - vst1_u8(reinterpret_cast(latin1_output), latin1_packed); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - // Let us do a scalar fallback. - for (int k = 0; k < 8; k++) { - uint32_t word = buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/arm64/arm_convert_utf32_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 -/* begin file src/arm64/arm_convert_utf32_to_utf16.cpp */ -struct expansion_result_t { - size_t u16count; - uint8x16_t compressed_v; -}; - -static simdutf_really_inline uint64_t invalid_utf32(const uint32x4x2_t in) { - const auto standardmax = vdupq_n_u32(0x10ffff); - const auto v_d800 = vdupq_n_u32(0xd800); - const auto v_fffff800 = vdupq_n_u32(0xfffff800); - const auto too_large1 = vcgtq_u32(in.val[0], standardmax); - const auto too_large2 = vcgtq_u32(in.val[1], standardmax); - const auto surrogate1 = vceqq_u32(vandq_u32(in.val[0], v_fffff800), v_d800); - const auto surrogate2 = vceqq_u32(vandq_u32(in.val[1], v_fffff800), v_d800); - const auto err1 = vorrq_u32(too_large1, surrogate1); - const auto err2 = vorrq_u32(too_large2, surrogate2); - const auto err = - vuzp2q_u16(vreinterpretq_u16_u32(err1), vreinterpretq_u16_u32(err2)); - - return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(err, 8)), 0); -} - -template -expansion_result_t neon_expand_surrogate(const uint32x4_t in) { - const uint32x4_t v_ffff0000 = vdupq_n_u32(0xffff0000); - const uint32x4_t non_surrogate_mask = vceqzq_u32(vandq_u32(in, v_ffff0000)); - const uint64_t cmp_bits = - vget_lane_u64(vreinterpret_u64_u32(vshrn_n_u64( - vreinterpretq_u64_u32(non_surrogate_mask), 31)), - 0); - const uint8_t mask = - uint8_t(~((cmp_bits & 0x3) | ((cmp_bits >> 30) & 0xc)) & 0xf); - const uint32x4_t v_10000 = vdupq_n_u32(0x00010000); - const uint32x4_t t0 = vsubq_u32(in, v_10000); - const uint32x4_t t1 = vandq_u32(t0, vdupq_n_u32(0xfffff)); - const uint32x4_t t2 = vshrq_n_u32(t1, 10); - const uint32x4_t t3 = vsliq_n_u32(t2, t1, 16); - const uint32x4_t surrogates = vorrq_u32( - vandq_u32(t3, vdupq_n_u32(0x03ff03ff)), vdupq_n_u32(0xdc00d800)); - const uint8x16_t merged = - vreinterpretq_u8_u32(vbslq_u32(non_surrogate_mask, in, surrogates)); - - const uint8x16_t shuffle_v = vld1q_u8(reinterpret_cast( - (byte_order == endianness::LITTLE) - ? tables::utf32_to_utf16::pack_utf32_to_utf16le[mask] - : tables::utf32_to_utf16::pack_utf32_to_utf16be[mask])); - - const size_t u16count = 4 + vget_lane_u8(vcnt_u8(vcreate_u8(mask)), 0); - const uint8x16_t compressed_v = vqtbl1q_u8(merged, shuffle_v); - - return {u16count, compressed_v}; -} - -template -std::pair -arm_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_out) { - uint16_t *utf16_output = reinterpret_cast(utf16_out); - const char32_t *end = buf + len; - - uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0); - // To avoid buffer overflow while writing compressed_v - const size_t safety_margin = 4; - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - uint32x4x2_t in = vld1q_u32_x2(reinterpret_cast(buf)); - - // Check if no bits set above 16th - if (vmaxvq_u32(vorrq_u32(in.val[0], in.val[1])) <= 0xFFFF) { - uint16x8_t utf16_packed = vuzp1q_u16(vreinterpretq_u16_u32(in.val[0]), - vreinterpretq_u16_u32(in.val[1])); - - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - forbidden_bytemask = - vorrq_u16(vceqq_u16(vandq_u16(utf16_packed, v_f800), v_d800), - forbidden_bytemask); - - if (!match_system(big_endian)) { - utf16_packed = vreinterpretq_u16_u8( - vrev16q_u8(vreinterpretq_u8_u16(utf16_packed))); - } - vst1q_u16(utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; - } else { - const uint64_t err = invalid_utf32(in); - if (simdutf_unlikely(err)) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - expansion_result_t res = neon_expand_surrogate(in.val[0]); - vst1q_u8(reinterpret_cast(utf16_output), res.compressed_v); - utf16_output += res.u16count; - res = neon_expand_surrogate(in.val[1]); - vst1q_u8(reinterpret_cast(utf16_output), res.compressed_v); - utf16_output += res.u16count; - buf += 8; - } - } - - // check for invalid input - if (vmaxvq_u16(forbidden_bytemask) != 0) { - return std::make_pair(nullptr, reinterpret_cast(utf16_output)); - } - - return std::make_pair(buf, reinterpret_cast(utf16_output)); -} - -template -std::pair -arm_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_out) { - uint16_t *utf16_output = reinterpret_cast(utf16_out); - const char32_t *start = buf; - const char32_t *end = buf + len; - - // To avoid buffer overflow while writing compressed_v - const size_t safety_margin = 4; - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - uint32x4x2_t in = vld1q_u32_x2(reinterpret_cast(buf)); - - // Check if no bits set above 16th - if (vmaxvq_u32(vorrq_u32(in.val[0], in.val[1])) <= 0xFFFF) { - uint16x8_t utf16_packed = vuzp1q_u16(vreinterpretq_u16_u32(in.val[0]), - vreinterpretq_u16_u32(in.val[1])); - - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800); - const uint16x8_t forbidden_bytemask = - vceqq_u16(vandq_u16(utf16_packed, v_f800), v_d800); - if (vmaxvq_u16(forbidden_bytemask) != 0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf16_output)); - } - - if (!match_system(big_endian)) { - utf16_packed = vreinterpretq_u16_u8( - vrev16q_u8(vreinterpretq_u8_u16(utf16_packed))); - } - vst1q_u16(utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; - } else { - const uint64_t err = invalid_utf32(in); - if (simdutf_unlikely(err)) { - const size_t pos = trailing_zeroes(err) / 8; - for (size_t k = 0; k < pos; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - } else { - // will generate a surrogate pair - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = - uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - const uint32_t word = buf[pos]; - const size_t error_pos = buf - start + pos; - if (word > 0x10FFFF) { - return {result(error_code::TOO_LARGE, error_pos), - reinterpret_cast(utf16_output)}; - } - if (word >= 0xD800 && word <= 0xDFFF) { - return {result(error_code::SURROGATE, error_pos), - reinterpret_cast(utf16_output)}; - } - return {result(error_code::OTHER, error_pos), - reinterpret_cast(utf16_output)}; - } - expansion_result_t res = neon_expand_surrogate(in.val[0]); - vst1q_u8(reinterpret_cast(utf16_output), res.compressed_v); - utf16_output += res.u16count; - res = neon_expand_surrogate(in.val[1]); - vst1q_u8(reinterpret_cast(utf16_output), res.compressed_v); - utf16_output += res.u16count; - buf += 8; - } - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf16_output)); -} -/* end file src/arm64/arm_convert_utf32_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF8 -/* begin file src/arm64/arm_convert_utf32_to_utf8.cpp */ -std::pair -arm_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char32_t *end = buf + len; - - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - - uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (buf + 16 + safety_margin < end) { - uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); - uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf + 4)); - - // Check if no bits set above 16th - if (vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) { - // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp) - uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin)); - if (vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - uint8x8_t utf8_packed = vmovn_u16(utf16_packed); - // 2. store (8 bytes) - vst1_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - continue; // we are done for this round! - } - - if (vmaxvq_u16(utf16_packed) <= 0x7FF) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2); - // t1 = [000a|aaaa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); - const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16( - vbslq_u16(one_byte_bytemask, utf16_packed, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); - - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } else { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff); - forbidden_bytemask = - vorrq_u16(vandq_u16(vcleq_u16(utf16_packed, v_dfff), - vcgeq_u16(utf16_packed, v_d800)), - forbidden_bytemask); - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t dup_even = simdutf_make_uint16x8_t( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#else - const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; -#endif - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const uint16x8_t t0 = - vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), - vreinterpretq_u8_u16(dup_even))); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - const uint16x8_t s1 = - vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - const uint16x8_t s1s = vshlq_n_u16(s1, 2); - // [00bb|bbbb|0000|aaaa] - const uint16x8_t s2 = vorrq_u16(s0, s1s); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); - const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); - const uint16x8_t one_or_two_bytes_bytemask = - vcleq_u16(utf16_packed, v_07ff); - const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), - one_or_two_bytes_bytemask); - const uint16x8_t s4 = veorq_u16(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); - const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t onemask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); - const uint16x8_t twomask = simdutf_make_uint16x8_t( - 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); -#else - const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0100, 0x0400, 0x1000, 0x4000}; - const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, - 0x0200, 0x0800, 0x2000, 0x8000}; -#endif - const uint16x8_t combined = - vorrq_u16(vandq_u16(one_byte_bytemask, onemask), - vandq_u16(one_or_two_bytes_bytemask, twomask)); - const uint16_t mask = vaddvq_u16(combined); - // The following fast path may or may not be beneficial. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); - vst1q_u8(utf8_output, utf8_0); - utf8_output += 12; - vst1q_u8(utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); - - vst1q_u8(utf8_output, utf8_0); - utf8_output += row0[0]; - vst1q_u8(utf8_output, utf8_1); - utf8_output += row1[0]; - - buf += 8; - } - // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes. - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - // check for invalid input - if (vmaxvq_u16(forbidden_bytemask) != 0) { - return std::make_pair(nullptr, reinterpret_cast(utf8_output)); - } - return std::make_pair(buf, reinterpret_cast(utf8_output)); -} - -std::pair -arm_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char32_t *start = buf; - const char32_t *end = buf + len; - - const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (buf + 16 + safety_margin < end) { - uint32x4_t in = vld1q_u32(reinterpret_cast(buf)); - uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf + 4)); - - // Check if no bits set above 16th - if (vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) { - // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp) - uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin)); - if (vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - uint8x8_t utf8_packed = vmovn_u16(utf16_packed); - // 2. store (8 bytes) - vst1_u8(utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - continue; // we are done for this round! - } - - if (vmaxvq_u16(utf16_packed) <= 0x7FF) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00); - const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2); - // t1 = [000a|aaaa|0000|0000] - const uint16x8_t t1 = vandq_u16(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const uint16x8_t t3 = vorrq_u16(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const uint16x8_t t4 = vorrq_u16(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); - const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16( - vbslq_u16(one_byte_bytemask, utf16_packed, t4)); - // 3. prepare bitmask for 8-bit lookup -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t mask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0002, 0x0008, 0x0020, 0x0080); -#else - const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0002, 0x0008, 0x0020, 0x0080}; -#endif - uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask)); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const uint8x16_t shuffle = vld1q_u8(row + 1); - const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle); - - // 5. store bytes - vst1q_u8(utf8_output, utf8_packed); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } else { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - - // check for invalid input - const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800); - const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff); - const uint16x8_t forbidden_bytemask = vandq_u16( - vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800)); - if (vmaxvq_u16(forbidden_bytemask) != 0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf8_output)); - } - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t dup_even = simdutf_make_uint16x8_t( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); -#else - const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e}; -#endif - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) vmovq_n_u16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const uint16x8_t t0 = - vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), - vreinterpretq_u8_u16(dup_even))); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const uint16x8_t t2 = vorrq_u16(t1, simdutf_vec(0b1000000000000000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - const uint16x8_t s1 = - vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - const uint16x8_t s1s = vshlq_n_u16(s1, 2); - // [00bb|bbbb|0000|aaaa] - const uint16x8_t s2 = vorrq_u16(s0, s1s); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); - const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); - const uint16x8_t one_or_two_bytes_bytemask = - vcleq_u16(utf16_packed, v_07ff); - const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), - one_or_two_bytes_bytemask); - const uint16x8_t s4 = veorq_u16(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); - const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4)); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F); - const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f); -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - const uint16x8_t onemask = simdutf_make_uint16x8_t( - 0x0001, 0x0004, 0x0010, 0x0040, 0x0100, 0x0400, 0x1000, 0x4000); - const uint16x8_t twomask = simdutf_make_uint16x8_t( - 0x0002, 0x0008, 0x0020, 0x0080, 0x0200, 0x0800, 0x2000, 0x8000); -#else - const uint16x8_t onemask = {0x0001, 0x0004, 0x0010, 0x0040, - 0x0100, 0x0400, 0x1000, 0x4000}; - const uint16x8_t twomask = {0x0002, 0x0008, 0x0020, 0x0080, - 0x0200, 0x0800, 0x2000, 0x8000}; -#endif - const uint16x8_t combined = - vorrq_u16(vandq_u16(one_byte_bytemask, onemask), - vandq_u16(one_or_two_bytes_bytemask, twomask)); - const uint16_t mask = vaddvq_u16(combined); - // The following fast path may or may not be beneficial. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0}; - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle); - vst1q_u8(utf8_output, utf8_0); - utf8_output += 12; - vst1q_u8(utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); - const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const uint8x16_t shuffle1 = vld1q_u8(row1 + 1); - const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1); - - vst1q_u8(utf8_output, utf8_0); - utf8_output += row0[0]; - vst1q_u8(utf8_output, utf8_1); - utf8_output += row1[0]; - - buf += 8; - } - // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes. - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf8_output)); -} -/* end file src/arm64/arm_convert_utf32_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF8 - -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf - -/* begin file src/generic/buf_block_reader.h */ -namespace simdutf { -namespace arm64 { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with -// spaces -template struct buf_block_reader { -public: - simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdutf_really_inline size_t block_index(); - simdutf_really_inline bool has_full_block() const; - simdutf_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 - * (in which case this function fills the buffer with spaces and returns 0. In - * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder - * block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdutf_really_inline size_t get_remainder(uint8_t *dst) const; - simdutf_really_inline void advance(); - -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text_64(const uint8_t *text) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text(const simd8x64 &in) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - in.store(reinterpret_cast(buf)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - if (buf[i] < ' ') { - buf[i] = '_'; - } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdutf_unused static char *format_mask(uint64_t mask) { - static char *buf = reinterpret_cast(malloc(64 + 1)); - for (size_t i = 0; i < 64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdutf_really_inline -buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) - : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, - idx{0} {} - -template -simdutf_really_inline size_t buf_block_reader::block_index() { - return idx; -} - -template -simdutf_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdutf_really_inline const uint8_t * -buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdutf_really_inline size_t -buf_block_reader::get_remainder(uint8_t *dst) const { - if (len == idx) { - return 0; - } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, - STEP_SIZE); // std::memset STEP_SIZE because it is more efficient - // to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdutf_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/buf_block_reader.h */ -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_validation { - -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = {255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 0b11110000u - 1, - 0b11100000u - 1, - 0b11000000u - 1}; - const simd8 max_value( - &max_array[sizeof(max_array) - sizeof(simd8)]); - return input.gt_bits(max_value); -} - -struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error |= this->prev_incomplete; - } - - simdutf_really_inline void check_next_input(const simd8x64 &input) { - if (simdutf_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = - is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; - } - } - - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -/* begin file src/generic/utf8_validation/utf8_validator.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_validation { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return !c.errors(); -} - -bool generic_validate_utf8(const char *input, size_t length) { - return generic_validate_utf8( - reinterpret_cast(input), length); -} - -/** - * Validates that the string is actual UTF-8 and stops on errors. - */ -template -result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input + count), length - count); - res.count += count; - return res; - } - reader.advance(); - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input) + count, length - count); - res.count += count; - return res; - } else { - return result(error_code::SUCCESS, length); - } -} - -result generic_validate_utf8_with_errors(const char *input, size_t length) { - return generic_validate_utf8_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_ASCII -/* begin file src/generic/ascii_validation.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace ascii_validation { - -bool generic_validate_ascii(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} - -result generic_validate_ascii_with_errors(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } - reader.advance(); - - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} - -} // namespace ascii_validation -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/ascii_validation.h */ -#endif // SIMDUTF_FEATURE_ASCII -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - // transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_utf16 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - template - simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); - if (howmany == 0) { - return 0; - } - utf16_output += howmany; - } - return utf16_output - start; - } - - template - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf16_output += res.count; - } - } - return result(error_code::SUCCESS, utf16_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_utf16 { - -using namespace simd; - -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - // transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_utf32 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // we have an error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); - if (howmany == 0) { - return 0; - } - utf32_output += howmany; - } - return utf32_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf32_output += res.count; - } - } - return result(error_code::SUCCESS, utf32_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_utf32 { - -using namespace simd; - -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { - size_t pos = 0; - char32_t *start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - size_t max_starting_point = (pos + 64) - 12; - while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - } - } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -// other functions -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/generic/utf16.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf16 { - -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); - count += count_ones(not_pair) / 2; - } - return count + - scalar::utf16::count_code_points(in + pos, size - pos); -} - -template -simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t ascii_mask = input.lteq(0x7F); - uint64_t twobyte_mask = input.lteq(0x7FF); - uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); - - size_t ascii_count = count_ones(ascii_mask) / 2; - size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; - size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; - size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; - count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + - ascii_count; - } - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} - -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; - - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; - } - - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/generic/utf8.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); - } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} - -#ifdef SIMDUTF_SIMD_HAS_BYTEMASK -simdutf_really_inline size_t count_code_points_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 4; - - size_t pos = 0; - size_t count = 0; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - size_t iterations = 0; - for (; pos + 4 * N <= size; pos += 4 * N) { - const auto input0 = - simd8::load(reinterpret_cast(in + pos + 0 * N)); - const auto input1 = - simd8::load(reinterpret_cast(in + pos + 1 * N)); - const auto input2 = - simd8::load(reinterpret_cast(in + pos + 2 * N)); - const auto input3 = - simd8::load(reinterpret_cast(in + pos + 3 * N)); - const auto mask0 = input0 > int8_t(-65); - const auto mask1 = input1 > int8_t(-65); - const auto mask2 = input2 > int8_t(-65); - const auto mask3 = input3 > int8_t(-65); - - local -= vector_u8(mask0); - local -= vector_u8(mask1); - local -= vector_u8(mask2); - local -= vector_u8(mask3); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} -#endif // SIMDUTF_SIMD_HAS_BYTEMASK - -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - // transcoding from UTF-8 to Latin 1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // For UTF-8 to Latin 1, we can allow any ASCII character, and any - // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or - // 0b11000010 and nothing else. - // - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - constexpr const uint8_t FORBIDDEN = 0xff; - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - FORBIDDEN, - // 1110____ ________ - FORBIDDEN, - // 1111____ ________ - FORBIDDEN); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - FORBIDDEN, - // ____0101 ________ - FORBIDDEN, - // ____011_ ________ - FORBIDDEN, FORBIDDEN, - - // ____1___ ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, - // ____1101 ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 16; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); - if (howmany == 0) { - return 0; - } - latin1_output += howmany; - } - return latin1_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - latin1_output += res.count; - } - } - return result(error_code::SUCCESS, latin1_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf -/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -namespace simdutf { -namespace arm64 { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline size_t convert_valid(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last - // 16 bytes, and if the data is valid, then it is entirely safe because 16 - // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally - // assume that you have valid UTF-8 input, so we are going to go back from the - // end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (pos < size) { - size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, - latin1_output); - latin1_output += howmany; - } - return latin1_output - start; -} - -} // namespace utf8_to_latin1 -} // namespace -} // namespace arm64 -} // namespace simdutf - // namespace simdutf -/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -// -// Implementation-specific overrides -// -namespace simdutf { -namespace arm64 { - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - // todo: reimplement as a one-pass algorithm. - int out = 0; - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } - } - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return arm64::utf8_validation::generic_validate_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return arm64::utf8_validation::generic_validate_utf8_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return arm64::ascii_validation::generic_validate_ascii(buf, len); -} - -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return arm64::ascii_validation::generic_validate_ascii_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const char16_t *tail = arm_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, - len - (tail - buf)); - } else { - return false; - } -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const char16_t *tail = arm_validate_utf16(buf, len); - if (tail) { - return scalar::utf16::validate(tail, len - (tail - buf)); - } else { - return false; - } -} - -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - result res = arm_validate_utf16_with_errors(buf, len); - if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - result res = arm_validate_utf16_with_errors(buf, len); - if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return utf16fix_neon_64bits(input, len, output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return utf16fix_neon_64bits(input, len, output); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const char32_t *tail = arm_validate_utf32le(buf, len); - if (tail) { - return scalar::utf32::validate(tail, len - (tail - buf)); - } else { - return false; - } -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - result res = arm_validate_utf32le_with_errors(buf, len); - if (res.count != len) { - result scalar_res = - scalar::utf32::validate_with_errors(buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - arm_convert_latin1_to_utf8(buf, len, utf8_output); - size_t converted_chars = ret.second - utf8_output; - - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - arm_convert_latin1_to_utf16(buf, len, utf16_output); - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - arm_convert_latin1_to_utf16(buf, len, utf16_output); - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - arm_convert_latin1_to_utf32(buf, len, utf32_output); - size_t converted_chars = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert_with_errors(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return arm64::utf8_to_latin1::convert_valid(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert(buf, len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return utf8_to_utf32::convert_valid(input, size, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf16be_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf16le_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - if (simdutf_unlikely(len == 0)) { - return 0; - } - std::pair ret = - arm_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - arm_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = scalar::utf32_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( - ret.first, len - (ret.first - buf), ret.second); - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf32_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - arm_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - arm_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16le(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16be(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16le_to_utf32(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16be_to_utf32(buf, len, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - utf16::change_endianness_utf16(input, length, output); -} - -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} - -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - // See - // https://lemire.me/blog/2023/05/15/computing-the-utf-8-size-of-a-latin-1-string-quickly-arm-neon-edition/ - // credit to Pete Cawley - const uint8_t *data = reinterpret_cast(input); - uint64_t result = 0; - const int lanes = sizeof(uint8x16_t); - uint8_t rem = length % lanes; - const uint8_t *simd_end = data + (length / lanes) * lanes; - const uint8x16_t threshold = vdupq_n_u8(0x80); - for (; data < simd_end; data += lanes) { - // load 16 bytes - uint8x16_t input_vec = vld1q_u8(data); - // compare to threshold (0x80) - uint8x16_t withhighbit = vcgeq_u8(input_vec, threshold); - // vertical addition - result -= vaddvq_s8(vreinterpretq_s8_u8(withhighbit)); - } - return result + (length / lanes) * lanes + - scalar::latin1::utf8_length_from_latin1((const char *)simd_end, rem); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return arm64_utf8_length_from_utf16_bytemask(input, - length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return arm64_utf8_length_from_utf16_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::utf16_length_from_utf8(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const uint32x4_t v_7f = vmovq_n_u32((uint32_t)0x7f); - const uint32x4_t v_7ff = vmovq_n_u32((uint32_t)0x7ff); - const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); - const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1); - size_t pos = 0; - size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos)); - const uint32x4_t ascii_bytes_bytemask = vcleq_u32(in, v_7f); - const uint32x4_t one_two_bytes_bytemask = vcleq_u32(in, v_7ff); - const uint32x4_t two_bytes_bytemask = - veorq_u32(one_two_bytes_bytemask, ascii_bytes_bytemask); - const uint32x4_t three_bytes_bytemask = - veorq_u32(vcleq_u32(in, v_ffff), one_two_bytes_bytemask); - - const uint16x8_t reduced_ascii_bytes_bytemask = - vreinterpretq_u16_u32(vandq_u32(ascii_bytes_bytemask, v_1)); - const uint16x8_t reduced_two_bytes_bytemask = - vreinterpretq_u16_u32(vandq_u32(two_bytes_bytemask, v_1)); - const uint16x8_t reduced_three_bytes_bytemask = - vreinterpretq_u16_u32(vandq_u32(three_bytes_bytemask, v_1)); - - const uint16x8_t compressed_bytemask0 = - vpaddq_u16(reduced_ascii_bytes_bytemask, reduced_two_bytes_bytemask); - const uint16x8_t compressed_bytemask1 = - vpaddq_u16(reduced_three_bytes_bytemask, reduced_three_bytes_bytemask); - - size_t ascii_count = count_ones( - vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 0)); - size_t two_bytes_count = count_ones( - vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 1)); - size_t three_bytes_count = count_ones( - vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask1), 0)); - - count += 16 - 3 * ascii_count - 2 * two_bytes_count - three_bytes_count; - } - return count + - scalar::utf32::utf8_length_from_utf32(input + pos, length - pos); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff); - const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1); - size_t pos = 0; - size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos)); - const uint32x4_t surrogate_bytemask = vcgtq_u32(in, v_ffff); - const uint16x8_t reduced_bytemask = - vreinterpretq_u16_u32(vandq_u32(surrogate_bytemask, v_1)); - const uint16x8_t compressed_bytemask = - vpaddq_u16(reduced_bytemask, reduced_bytemask); - size_t surrogate_count = count_ones( - vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask), 0)); - count += 4 + surrogate_count; - } - return count + - scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - return encode_base64(output, input, length, options); -} -#endif // SIMDUTF_FEATURE_BASE64 - -} // namespace arm64 -} // namespace simdutf - -/* begin file src/simdutf/arm64/end.h */ -#undef SIMDUTF_SIMD_HAS_BYTEMASK -/* end file src/simdutf/arm64/end.h */ -/* end file src/arm64/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_FALLBACK -/* begin file src/fallback/implementation.cpp */ -/* begin file src/simdutf/fallback/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "fallback" -// #define SIMDUTF_IMPLEMENTATION fallback -/* end file src/simdutf/fallback/begin.h */ - -namespace simdutf { -namespace fallback { - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - int out = 0; - // todo: reimplement as a one-pass algorithm. - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } - } - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return scalar::utf8::validate(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return scalar::utf8::validate_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return scalar::ascii::validate(buf, len); -} - -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return scalar::ascii::validate_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - return scalar::utf16::validate(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - return scalar::utf16::validate(buf, len); -} - -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate_with_errors(buf, len); -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - return scalar::utf16::validate_with_errors(buf, len); -} - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - return scalar::utf32::validate(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - return scalar::utf32::validate_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - return scalar::latin1_to_utf8::convert(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::latin1_to_utf16::convert(buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::latin1_to_utf16::convert(buf, len, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::latin1_to_utf32::convert(buf, len, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf8_to_latin1::convert(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf8_to_latin1::convert_with_errors(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf8_to_latin1::convert_valid(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert(buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert(buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert_with_errors( - buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert_with_errors( - buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert_valid(buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf8_to_utf16::convert_valid(buf, len, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf8_to_utf32::convert(buf, len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf8_to_utf32::convert_with_errors(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return scalar::utf8_to_utf32::convert_valid(input, size, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert(buf, len, - latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert(buf, len, - latin1_output); -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert_with_errors( - buf, len, latin1_output); -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert_with_errors( - buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert_valid( - buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf16_to_latin1::convert_valid(buf, len, - latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert(buf, len, - utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert(buf, len, utf8_output); -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_with_errors( - buf, len, utf8_output); -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_with_errors( - buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_valid(buf, len, - utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf16_to_utf8::convert_valid(buf, len, - utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf32_to_latin1::convert(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf32_to_latin1::convert_with_errors(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return scalar::utf32_to_latin1::convert_valid(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert(buf, len, utf8_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert(buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert(buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_with_errors( - buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_with_errors( - buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_valid( - buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return scalar::utf32_to_utf16::convert_valid(buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert(buf, len, - utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert(buf, len, - utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_with_errors( - buf, len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_with_errors( - buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_valid( - buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return scalar::utf16_to_utf32::convert_valid(buf, len, - utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - scalar::utf16::change_endianness_utf16(input, length, output); -} - -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::count_code_points(input, length); -} - -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return scalar::utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return scalar::utf8::count_code_points(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - size_t answer = length; - size_t i = 0; - auto pop = [](uint64_t v) { - return (size_t)(((v >> 7) & UINT64_C(0x0101010101010101)) * - UINT64_C(0x0101010101010101) >> - 56); - }; - for (; i + 32 <= length; i += 32) { - uint64_t v; - memcpy(&v, input + i, 8); - answer += pop(v); - memcpy(&v, input + i + 8, sizeof(v)); - answer += pop(v); - memcpy(&v, input + i + 16, sizeof(v)); - answer += pop(v); - memcpy(&v, input + i + 24, sizeof(v)); - answer += pop(v); - } - for (; i + 8 <= length; i += 8) { - uint64_t v; - memcpy(&v, input + i, sizeof(v)); - answer += pop(v); - } - for (; i + 1 <= length; i += 1) { - answer += static_cast(input[i]) >> 7; - } - return answer; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf8_length_from_utf16(input, - length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf8_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf32_length_from_utf16(input, - length); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return scalar::utf16::utf32_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return scalar::utf8::utf16_length_from_utf8(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return scalar::utf32::utf8_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return scalar::utf32::utf16_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return scalar::utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } - } - return r; -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } - } - return r; -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - return scalar::base64::tail_encode_base64(output, input, length, options); -} -#endif // SIMDUTF_FEATURE_BASE64 - -} // namespace fallback -} // namespace simdutf - -/* begin file src/simdutf/fallback/end.h */ -/* end file src/simdutf/fallback/end.h */ -/* end file src/fallback/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_ICELAKE -/* begin file src/icelake/implementation.cpp */ -#include -#include - -/* begin file src/simdutf/icelake/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "icelake" -// #define SIMDUTF_IMPLEMENTATION icelake - -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE -// nothing needed. -#else -SIMDUTF_TARGET_ICELAKE -#endif - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -// clang-format off -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -// clang-format on -#endif // end of workaround -/* end file src/simdutf/icelake/begin.h */ -namespace simdutf { -namespace icelake { -namespace { -#ifndef SIMDUTF_ICELAKE_H - #error "icelake.h must be included" -#endif -using namespace simd; - -/* begin file src/icelake/icelake_macros.inl.cpp */ - -/* - This upcoming macro (SIMDUTF_ICELAKE_TRANSCODE16) takes 16 + 4 bytes (of a - UTF-8 string) and loads all possible 4-byte substring into an AVX512 - register. - - For example if we have bytes abcdefgh... we create following 32-bit lanes - - [abcd|bcde|cdef|defg|efgh|...] - ^ ^ - byte 0 of reg byte 63 of reg -*/ -/** pshufb - # lane{0,1,2} have got bytes: [ 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, - 11, 12, 13, 14, 15] # lane3 has got bytes: [ 16, 17, 18, 19, 4, 5, - 6, 8, 9, 10, 11, 12, 13, 14, 15] - - expand_ver2 = [ - # lane 0: - 0, 1, 2, 3, - 1, 2, 3, 4, - 2, 3, 4, 5, - 3, 4, 5, 6, - - # lane 1: - 4, 5, 6, 7, - 5, 6, 7, 8, - 6, 7, 8, 9, - 7, 8, 9, 10, - - # lane 2: - 8, 9, 10, 11, - 9, 10, 11, 12, - 10, 11, 12, 13, - 11, 12, 13, 14, - - # lane 3 order: 13, 14, 15, 16 14, 15, 16, 17, 15, 16, 17, 18, 16, - 17, 18, 19 12, 13, 14, 15, 13, 14, 15, 0, 14, 15, 0, 1, 15, 0, 1, 2, - ] -*/ - -#define SIMDUTF_ICELAKE_TRANSCODE16(LANE0, LANE1, MASKED) \ - { \ - const __m512i merged = _mm512_mask_mov_epi32(LANE0, 0x1000, LANE1); \ - const __m512i expand_ver2 = _mm512_setr_epi64( \ - 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, \ - 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, \ - 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); \ - const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); \ - \ - __mmask16 leading_bytes; \ - const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); \ - const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); \ - const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); \ - leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); \ - \ - __m512i char_class; \ - char_class = _mm512_srli_epi32(input, 4); \ - /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ \ - const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); \ - const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); \ - char_class = \ - _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); \ - \ - const int valid_count = static_cast(count_ones(leading_bytes)); \ - const __m512i utf32 = expanded_utf8_to_utf32(char_class, input); \ - \ - const __m512i out = _mm512_mask_compress_epi32(_mm512_setzero_si512(), \ - leading_bytes, utf32); \ - \ - if (UTF32) { \ - if (MASKED) { \ - const __mmask16 valid = uint16_t((1 << valid_count) - 1); \ - _mm512_mask_storeu_epi32((__m512i *)output, valid, out); \ - } else { \ - _mm512_storeu_si512((__m512i *)output, out); \ - } \ - output += valid_count; \ - } else { \ - if (MASKED) { \ - output += utf32_to_utf16_masked( \ - byteflip, out, valid_count, reinterpret_cast(output)); \ - } else { \ - output += utf32_to_utf16( \ - byteflip, out, valid_count, reinterpret_cast(output)); \ - } \ - } \ - } - -#define SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(INPUT, VALID_COUNT, MASKED) \ - { \ - if (UTF32) { \ - if (MASKED) { \ - const __mmask16 valid_mask = uint16_t((1 << VALID_COUNT) - 1); \ - _mm512_mask_storeu_epi32((__m512i *)output, valid_mask, INPUT); \ - } else { \ - _mm512_storeu_si512((__m512i *)output, INPUT); \ - } \ - output += VALID_COUNT; \ - } else { \ - if (MASKED) { \ - output += utf32_to_utf16_masked( \ - byteflip, INPUT, VALID_COUNT, \ - reinterpret_cast(output)); \ - } else { \ - output += \ - utf32_to_utf16(byteflip, INPUT, VALID_COUNT, \ - reinterpret_cast(output)); \ - } \ - } \ - } - -#define SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) \ - if (UTF32) { \ - const __m128i t0 = _mm512_castsi512_si128(utf8); \ - const __m128i t1 = _mm512_extracti32x4_epi32(utf8, 1); \ - const __m128i t2 = _mm512_extracti32x4_epi32(utf8, 2); \ - const __m128i t3 = _mm512_extracti32x4_epi32(utf8, 3); \ - _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ - _mm512_cvtepu8_epi32(t0)); \ - _mm512_storeu_si512((__m512i *)(output + 1 * 16), \ - _mm512_cvtepu8_epi32(t1)); \ - _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ - _mm512_cvtepu8_epi32(t2)); \ - _mm512_storeu_si512((__m512i *)(output + 3 * 16), \ - _mm512_cvtepu8_epi32(t3)); \ - } else { \ - const __m256i h0 = _mm512_castsi512_si256(utf8); \ - const __m256i h1 = _mm512_extracti64x4_epi64(utf8, 1); \ - if (big_endian) { \ - _mm512_storeu_si512( \ - (__m512i *)(output + 0 * 16), \ - _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h0), byteflip)); \ - _mm512_storeu_si512( \ - (__m512i *)(output + 2 * 16), \ - _mm512_shuffle_epi8(_mm512_cvtepu8_epi16(h1), byteflip)); \ - } else { \ - _mm512_storeu_si512((__m512i *)(output + 0 * 16), \ - _mm512_cvtepu8_epi16(h0)); \ - _mm512_storeu_si512((__m512i *)(output + 2 * 16), \ - _mm512_cvtepu8_epi16(h1)); \ - } \ - } -/* end file src/icelake/icelake_macros.inl.cpp */ -/* begin file src/icelake/icelake_common.inl.cpp */ -// file included directly -/** - * Store the last N bytes of previous followed by 512-N bytes from input. - */ -template __m512i prev(__m512i input, __m512i previous) { - static_assert(N <= 32, "N must be no larger than 32"); - const __m512i movemask = - _mm512_setr_epi32(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); - const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous); -#if SIMDUTF_GCC8 || SIMDUTF_GCC9 - constexpr int shift = 16 - N; // workaround for GCC8,9 - return _mm512_alignr_epi8(input, rotated, shift); -#else - return _mm512_alignr_epi8(input, rotated, 16 - N); -#endif // SIMDUTF_GCC8 || SIMDUTF_GCC9 -} - -template -__m512i shuffle_epi128(__m512i v) { - static_assert((idx0 >= 0 && idx0 <= 3), "idx0 must be in range 0..3"); - static_assert((idx1 >= 0 && idx1 <= 3), "idx1 must be in range 0..3"); - static_assert((idx2 >= 0 && idx2 <= 3), "idx2 must be in range 0..3"); - static_assert((idx3 >= 0 && idx3 <= 3), "idx3 must be in range 0..3"); - - constexpr unsigned shuffle = idx0 | (idx1 << 2) | (idx2 << 4) | (idx3 << 6); - return _mm512_shuffle_i32x4(v, v, shuffle); -} - -template constexpr __m512i broadcast_epi128(__m512i v) { - return shuffle_epi128(v); -} - -simdutf_really_inline __m512i broadcast_128bit_lane(__m128i lane) { - const __m512i tmp = _mm512_castsi128_si512(lane); - - return broadcast_epi128<0>(tmp); -} -/* end file src/icelake/icelake_common.inl.cpp */ -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/icelake/icelake_utf8_common.inl.cpp */ -// Common procedures for both validating and non-validating conversions from -// UTF-8. -enum block_processing_mode { SIMDUTF_FULL, SIMDUTF_TAIL }; - -using utf8_to_utf16_result = std::pair; -using utf8_to_utf32_result = std::pair; - -/* - process_block_utf8_to_utf16 converts up to 64 bytes from 'in' from UTF-8 - to UTF-16. When tail = SIMDUTF_FULL, then the full input buffer (64 bytes) - might be used. When tail = SIMDUTF_TAIL, we take into account 'gap' which - indicates how many input bytes are relevant. - - Returns true when the result is correct, otherwise it returns false. - - The provided in and out pointers are advanced according to how many input - bytes have been processed, upon success. -*/ -template -simdutf_really_inline bool -process_block_utf8_to_utf16(const char *&in, char16_t *&out, size_t gap) { - // constants - __m512i mask_identity = _mm512_set_epi8( - 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, - 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, - 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, - 8, 7, 6, 5, 4, 3, 2, 1, 0); - __m512i mask_c0c0c0c0 = _mm512_set1_epi32(0xc0c0c0c0); - __m512i mask_80808080 = _mm512_set1_epi32(0x80808080); - __m512i mask_f0f0f0f0 = _mm512_set1_epi32(0xf0f0f0f0); - __m512i mask_dfdfdfdf_tail = _mm512_set_epi64( - 0xffffdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, - 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, - 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf); - __m512i mask_c2c2c2c2 = _mm512_set1_epi32(0xc2c2c2c2); - __m512i mask_ffffffff = _mm512_set1_epi32(0xffffffff); - __m512i mask_d7c0d7c0 = _mm512_set1_epi32(0xd7c0d7c0); - __m512i mask_dc00dc00 = _mm512_set1_epi32(0xdc00dc00); - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - // Note that 'tail' is a compile-time constant ! - __mmask64 b = - (tail == SIMDUTF_FULL) ? 0xFFFFFFFFFFFFFFFF : (uint64_t(1) << gap) - 1; - __m512i input = (tail == SIMDUTF_FULL) ? _mm512_loadu_si512(in) - : _mm512_maskz_loadu_epi8(b, in); - __mmask64 m1 = (tail == SIMDUTF_FULL) - ? _mm512_cmplt_epu8_mask(input, mask_80808080) - : _mm512_mask_cmplt_epu8_mask(b, input, mask_80808080); - if (_ktestc_mask64_u8(m1, - b)) { // NOT(m1) AND b -- if all zeroes, then all ASCII - // alternatively, we could do 'if (m1 == b) { ' - if (tail == SIMDUTF_FULL) { - in += 64; // consumed 64 bytes - // we convert a full 64-byte block, writing 128 bytes. - __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); - if (big_endian) { - input1 = _mm512_shuffle_epi8(input1, byteflip); - } - _mm512_storeu_si512(out, input1); - out += 32; - __m512i input2 = - _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1)); - if (big_endian) { - input2 = _mm512_shuffle_epi8(input2, byteflip); - } - _mm512_storeu_si512(out, input2); - out += 32; - return true; // we are done - } else { - in += gap; - if (gap <= 32) { - __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); - if (big_endian) { - input1 = _mm512_shuffle_epi8(input1, byteflip); - } - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << (gap)) - 1), - input1); - out += gap; - } else { - __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input)); - if (big_endian) { - input1 = _mm512_shuffle_epi8(input1, byteflip); - } - _mm512_storeu_si512(out, input1); - out += 32; - __m512i input2 = - _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1)); - if (big_endian) { - input2 = _mm512_shuffle_epi8(input2, byteflip); - } - _mm512_mask_storeu_epi16( - out, __mmask32((uint32_t(1) << (gap - 32)) - 1), input2); - out += gap - 32; - } - return true; // we are done - } - } - // classify characters further - __mmask64 m234 = _mm512_cmp_epu8_mask( - mask_c0c0c0c0, input, - _MM_CMPINT_LE); // 0xc0 <= input, 2, 3, or 4 leading byte - __mmask64 m34 = - _mm512_cmp_epu8_mask(mask_dfdfdfdf_tail, input, - _MM_CMPINT_LT); // 0xdf < input, 3 or 4 leading byte - - __mmask64 milltwobytes = _mm512_mask_cmp_epu8_mask( - m234, input, mask_c2c2c2c2, - _MM_CMPINT_LT); // 0xc0 <= input < 0xc2 (illegal two byte sequence) - // Overlong 2-byte sequence - if (_ktestz_mask64_u8(milltwobytes, milltwobytes) == 0) { - // Overlong 2-byte sequence - return false; - } - if (_ktestz_mask64_u8(m34, m34) == 0) { - // We have a 3-byte sequence and/or a 2-byte sequence, or possibly even a - // 4-byte sequence! - __mmask64 m4 = _mm512_cmp_epu8_mask( - input, mask_f0f0f0f0, - _MM_CMPINT_NLT); // 0xf0 <= zmm0 (4 byte start bytes) - - __mmask64 mask_not_ascii = (tail == SIMDUTF_FULL) - ? _knot_mask64(m1) - : _kand_mask64(_knot_mask64(m1), b); - - __mmask64 mp1 = _kshiftli_mask64(m234, 1); - __mmask64 mp2 = _kshiftli_mask64(m34, 2); - // We could do it as follows... - // if (_kortestz_mask64_u8(m4,m4)) { // compute the bitwise OR of the 64-bit - // masks a and b and return 1 if all zeroes but GCC generates better code - // when we do: - if (m4 == 0) { // compute the bitwise OR of the 64-bit masks a and b and - // return 1 if all zeroes - // Fast path with 1,2,3 bytes - __mmask64 mc = _kor_mask64(mp1, mp2); // expected continuation bytes - __mmask64 m1234 = _kor_mask64(m1, m234); - // mismatched continuation bytes: - if (tail == SIMDUTF_FULL) { - __mmask64 xnormcm1234 = _kxnor_mask64( - mc, - m1234); // XNOR of mc and m1234 should be all zero if they differ - // the presence of a 1 bit indicates that they overlap. - // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return - // 1 if all zeroes. - if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { - return false; - } - } else { - __mmask64 bxorm1234 = _kxor_mask64(b, m1234); - if (mc != bxorm1234) { - return false; - } - } - // mend: identifying the last bytes of each sequence to be decoded - __mmask64 mend = _kshiftri_mask64(m1234, 1); - if (tail != SIMDUTF_FULL) { - mend = _kor_mask64(mend, (uint64_t(1) << (gap - 1))); - } - - __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity); - __m512i last_and_thirdu16 = - _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third)); - - __m512i nonasciitags = _mm512_maskz_mov_epi8( - mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000 other: 11000000 - __m512i clearedbytes = _mm512_andnot_si512( - nonasciitags, input); // high two bits cleared where not ASCII - __m512i lastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, last_and_thirdu16, - clearedbytes); // the last byte of each character - - __mmask64 mask_before_non_ascii = _kshiftri_mask64( - mask_not_ascii, 1); // bytes that precede non-ASCII bytes - __m512i indexofsecondlastbytes = _mm512_add_epi16( - mask_ffffffff, last_and_thirdu16); // indices of the second last bytes - __m512i beforeasciibytes = - _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes); - __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, indexofsecondlastbytes, - beforeasciibytes); // the second last bytes (of two, three byte seq, - // surrogates) - secondlastbytes = - _mm512_slli_epi16(secondlastbytes, 6); // shifted into position - - __m512i indexofthirdlastbytes = _mm512_add_epi16( - mask_ffffffff, - indexofsecondlastbytes); // indices of the second last bytes - __m512i thirdlastbyte = - _mm512_maskz_mov_epi8(m34, - clearedbytes); // only those that are the third - // last byte of a sequence - __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, indexofthirdlastbytes, - thirdlastbyte); // the third last bytes (of three byte sequences, hi - // surrogate) - thirdlastbytes = - _mm512_slli_epi16(thirdlastbytes, 12); // shifted into position - __m512i Wout = _mm512_ternarylogic_epi32(lastbytes, secondlastbytes, - thirdlastbytes, 254); - // the elements of Wout excluding the last element if it happens to be a - // high surrogate: - - __mmask64 mprocessed = - (tail == SIMDUTF_FULL) - ? _pdep_u64(0xFFFFFFFF, mend) - : _pdep_u64( - 0xFFFFFFFF, - _kand_mask64( - mend, b)); // we adjust mend at the end of the output. - - // Encodings out of range... - { - // the location of 3-byte sequence start bytes in the input - __mmask64 m3 = m34 & (b ^ m4); - // code units in Wout corresponding to 3-byte sequences. - __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend)); - __m512i mask_08000800 = _mm512_set1_epi32(0x08000800); - __mmask32 Msmall800 = - _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800); - __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800); - __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800); - __mmask32 M3s = - _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800); - if (_kor_mask32(Msmall800, M3s)) { - return false; - } - } - int64_t nout = _mm_popcnt_u64(mprocessed); - in += 64 - _lzcnt_u64(mprocessed); - if (big_endian) { - Wout = _mm512_shuffle_epi8(Wout, byteflip); - } - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout); - out += nout; - return true; // ok - } - // - // We have a 4-byte sequence, this is the general case. - // Slow! - __mmask64 mp3 = _kshiftli_mask64(m4, 3); - __mmask64 mc = - _kor_mask64(_kor_mask64(mp1, mp2), mp3); // expected continuation bytes - __mmask64 m1234 = _kor_mask64(m1, m234); - - // mend: identifying the last bytes of each sequence to be decoded - __mmask64 mend = - _kor_mask64(_kshiftri_mask64(_kor_mask64(mp3, m1234), 1), mp3); - if (tail != SIMDUTF_FULL) { - mend = _kor_mask64(mend, __mmask64(uint64_t(1) << (gap - 1))); - } - __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity); - __m512i last_and_thirdu16 = - _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third)); - - __m512i nonasciitags = _mm512_maskz_mov_epi8( - mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000 other: 11000000 - __m512i clearedbytes = _mm512_andnot_si512( - nonasciitags, input); // high two bits cleared where not ASCII - __m512i lastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, last_and_thirdu16, - clearedbytes); // the last byte of each character - - __mmask64 mask_before_non_ascii = _kshiftri_mask64( - mask_not_ascii, 1); // bytes that precede non-ASCII bytes - __m512i indexofsecondlastbytes = _mm512_add_epi16( - mask_ffffffff, last_and_thirdu16); // indices of the second last bytes - __m512i beforeasciibytes = - _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes); - __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, indexofsecondlastbytes, - beforeasciibytes); // the second last bytes (of two, three byte seq, - // surrogates) - secondlastbytes = - _mm512_slli_epi16(secondlastbytes, 6); // shifted into position - - __m512i indexofthirdlastbytes = _mm512_add_epi16( - mask_ffffffff, - indexofsecondlastbytes); // indices of the second last bytes - __m512i thirdlastbyte = _mm512_maskz_mov_epi8( - m34, - clearedbytes); // only those that are the third last byte of a sequence - __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8( - 0x5555555555555555, indexofthirdlastbytes, - thirdlastbyte); // the third last bytes (of three byte sequences, hi - // surrogate) - thirdlastbytes = - _mm512_slli_epi16(thirdlastbytes, 12); // shifted into position - __m512i thirdsecondandlastbytes = _mm512_ternarylogic_epi32( - lastbytes, secondlastbytes, thirdlastbytes, 254); - uint64_t Mlo_uint64 = _pext_u64(mp3, mend); - __mmask32 Mlo = __mmask32(Mlo_uint64); - __mmask32 Mhi = __mmask32(Mlo_uint64 >> 1); - __m512i lo_surr_mask = _mm512_maskz_mov_epi16( - Mlo, - mask_dc00dc00); // lo surr: 1101110000000000, other: 0000000000000000 - __m512i shifted4_thirdsecondandlastbytes = - _mm512_srli_epi16(thirdsecondandlastbytes, - 4); // hi surr: 00000WVUTSRQPNML vuts = WVUTS - 1 - __m512i tagged_lo_surrogates = _mm512_or_si512( - thirdsecondandlastbytes, - lo_surr_mask); // lo surr: 110111KJHGFEDCBA, other: unchanged - __m512i Wout = _mm512_mask_add_epi16( - tagged_lo_surrogates, Mhi, shifted4_thirdsecondandlastbytes, - mask_d7c0d7c0); // hi sur: 110110vutsRQPNML, other: unchanged - // the elements of Wout excluding the last element if it happens to be a - // high surrogate: - __mmask32 Mout = ~(Mhi & 0x80000000); - __mmask64 mprocessed = - (tail == SIMDUTF_FULL) - ? _pdep_u64(Mout, mend) - : _pdep_u64( - Mout, - _kand_mask64(mend, - b)); // we adjust mend at the end of the output. - - // mismatched continuation bytes: - if (tail == SIMDUTF_FULL) { - __mmask64 xnormcm1234 = _kxnor_mask64( - mc, m1234); // XNOR of mc and m1234 should be all zero if they differ - // the presence of a 1 bit indicates that they overlap. - // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return 1 - // if all zeroes. - if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { - return false; - } - } else { - __mmask64 bxorm1234 = _kxor_mask64(b, m1234); - if (mc != bxorm1234) { - return false; - } - } - // Encodings out of range... - { - // the location of 3-byte sequence start bytes in the input - __mmask64 m3 = m34 & (b ^ m4); - // code units in Wout corresponding to 3-byte sequences. - __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend)); - __m512i mask_08000800 = _mm512_set1_epi32(0x08000800); - __mmask32 Msmall800 = - _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800); - __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800); - __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800); - __mmask32 M3s = - _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800); - __m512i mask_04000400 = _mm512_set1_epi32(0x04000400); - __mmask32 M4s = - _mm512_mask_cmpge_epu16_mask(Mhi, Moutminusd800, mask_04000400); - if (!_kortestz_mask32_u8(M4s, _kor_mask32(Msmall800, M3s))) { - return false; - } - } - in += 64 - _lzcnt_u64(mprocessed); - int64_t nout = _mm_popcnt_u64(mprocessed); - if (big_endian) { - Wout = _mm512_shuffle_epi8(Wout, byteflip); - } - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout); - out += nout; - return true; // ok - } - // Fast path 2: all ASCII or 2 byte - __mmask64 continuation_or_ascii = (tail == SIMDUTF_FULL) - ? _knot_mask64(m234) - : _kand_mask64(_knot_mask64(m234), b); - // on top of -0xc0 we subtract -2 which we get back later of the - // continuation byte tags - __m512i leading2byte = _mm512_maskz_sub_epi8(m234, input, mask_c2c2c2c2); - __mmask64 leading = tail == (tail == SIMDUTF_FULL) - ? _kor_mask64(m1, m234) - : _kand_mask64(_kor_mask64(m1, m234), - b); // first bytes of each sequence - if (tail == SIMDUTF_FULL) { - __mmask64 xnor234leading = - _kxnor_mask64(_kshiftli_mask64(m234, 1), leading); - if (!_kortestz_mask64_u8(xnor234leading, xnor234leading)) { - return false; - } - } else { - __mmask64 bxorleading = _kxor_mask64(b, leading); - if (_kshiftli_mask64(m234, 1) != bxorleading) { - return false; - } - } - // - if (tail == SIMDUTF_FULL) { - // In the two-byte/ASCII scenario, we are easily latency bound, so we want - // to increment the input buffer as quickly as possible. - // We process 32 bytes unless the byte at index 32 is a continuation byte, - // in which case we include it as well for a total of 33 bytes. - // Note that if x is an ASCII byte, then the following is false: - // int8_t(x) <= int8_t(0xc0) under two's complement. - in += 32; - if (int8_t(*in) <= int8_t(0xc0)) - in++; - // The alternative is to do - // in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); - // but it requires loading the input, doing the mask computation, and - // converting back the mask to a general register. It just takes too long, - // leaving the processor likely to be idle. - } else { - in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii)); - } - __m512i lead = _mm512_maskz_compress_epi8( - leading, leading2byte); // will contain zero for ascii, and the data - lead = _mm512_cvtepu8_epi16( - _mm512_castsi512_si256(lead)); // ... zero extended into code units - __m512i follow = _mm512_maskz_compress_epi8( - continuation_or_ascii, input); // the last bytes of each sequence - follow = _mm512_cvtepu8_epi16( - _mm512_castsi512_si256(follow)); // ... zero extended into code units - lead = _mm512_slli_epi16(lead, 6); // shifted into position - __m512i final = _mm512_add_epi16(follow, lead); // combining lead and follow - - if (big_endian) { - final = _mm512_shuffle_epi8(final, byteflip); - } - if (tail == SIMDUTF_FULL) { - // Next part is UTF-16 specific and can be generalized to UTF-32. - int nout = _mm_popcnt_u32(uint32_t(leading)); - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final); - out += nout; // UTF-8 to UTF-16 is only expansionary in this case. - } else { - int nout = int(_mm_popcnt_u64(_pdep_u64(0xFFFFFFFF, leading))); - _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), final); - out += nout; // UTF-8 to UTF-16 is only expansionary in this case. - } - - return true; // we are fine. -} - -/* - utf32_to_utf16_masked converts `count` lower UTF-32 code units - from input `utf32` into UTF-16. It differs from utf32_to_utf16 - in that it 'masks' the writes. - - Returns how many 16-bit code units were stored. - - byteflip is used for flipping 16-bit code units, and it should be - __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809 - ); - We pass it to the (always inlined) function to encourage the compiler to - keep the value in a (constant) register. -*/ -template -simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, - __m512i utf32, - unsigned int count, - char16_t *output) { - - const __mmask16 valid = uint16_t((1 << count) - 1); - // 1. check if we have any surrogate pairs - const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff); - const __mmask16 sp_mask = - _mm512_mask_cmpgt_epu32_mask(valid, utf32, v_0000_ffff); - - if (sp_mask == 0) { - if (big_endian) { - _mm256_mask_storeu_epi16( - (__m256i *)output, valid, - _mm256_shuffle_epi8(_mm512_cvtepi32_epi16(utf32), - _mm512_castsi512_si256(byteflip))); - - } else { - _mm256_mask_storeu_epi16((__m256i *)output, valid, - _mm512_cvtepi32_epi16(utf32)); - } - return count; - } - - { - // build surrogate pair code units in 32-bit lanes - - // t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb] - const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000); - const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000); - - // t1 = 8 x [000000aaaaaaaaaa|bbbbbbbbbb000000] - const __m512i t1 = _mm512_slli_epi32(t0, 6); - - // t2 = 8 x [000000aaaaaaaaaa|aaaaaabbbbbbbbbb] -- copy hi word from t1 - // to t0 - // 0xe4 = (t1 and v_ffff_0000) or (t0 and not v_ffff_0000) - const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000); - const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4); - - // t2 = 8 x [110110aaaaaaaaaa|110111bbbbbbbbbb] -- copy hi word from t1 - // to t0 - // 0xba = (t2 and not v_fc00_fc000) or v_d800_dc00 - const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00); - const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00); - const __m512i t3 = - _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba); - const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3); - __m512i t5 = _mm512_ror_epi32(t4, 16); - // Here we want to trim all of the upper 16-bit code units from the 2-byte - // characters represented as 4-byte values. We can compute it from - // sp_mask or the following... It can be more optimized! - const __mmask32 nonzero = _kor_mask32( - 0xaaaaaaaa, _mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512())); - const __mmask32 nonzero_masked = - _kand_mask32(nonzero, __mmask32((uint64_t(1) << (2 * count)) - 1)); - if (big_endian) { - t5 = _mm512_shuffle_epi8(t5, byteflip); - } - // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (AMD Zen4 has terrible performance with it, it is effectively broken) - __m512i compressed = _mm512_maskz_compress_epi16(nonzero_masked, t5); - _mm512_mask_storeu_epi16( - output, _bzhi_u32(0xFFFFFFFF, count + _mm_popcnt_u32(sp_mask)), - compressed); - //_mm512_mask_compressstoreu_epi16(output, nonzero_masked, t5); - } - - return count + static_cast(count_ones(sp_mask)); -} - -/* - utf32_to_utf16 converts `count` lower UTF-32 code units - from input `utf32` into UTF-16. It may overflow. - - Returns how many 16-bit code units were stored. - - byteflip is used for flipping 16-bit code units, and it should be - __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809, - 0x0607040502030001, - 0x0e0f0c0d0a0b0809 - ); - We pass it to the (always inlined) function to encourage the compiler to - keep the value in a (constant) register. -*/ -template -simdutf_really_inline size_t utf32_to_utf16(const __m512i byteflip, - __m512i utf32, unsigned int count, - char16_t *output) { - // check if we have any surrogate pairs - const __m512i v_0000_ffff = _mm512_set1_epi32(0x0000ffff); - const __mmask16 sp_mask = _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff); - - if (sp_mask == 0) { - // technically, it should be _mm256_storeu_epi16 - if (big_endian) { - _mm256_storeu_si256( - (__m256i *)output, - _mm256_shuffle_epi8(_mm512_cvtepi32_epi16(utf32), - _mm512_castsi512_si256(byteflip))); - } else { - _mm256_storeu_si256((__m256i *)output, _mm512_cvtepi32_epi16(utf32)); - } - return count; - } - - { - // build surrogate pair code units in 32-bit lanes - - // t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb] - const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000); - const __m512i t0 = _mm512_sub_epi32(utf32, v_0001_0000); - - // t1 = 8 x [000000aaaaaaaaaa|bbbbbbbbbb000000] - const __m512i t1 = _mm512_slli_epi32(t0, 6); - - // t2 = 8 x [000000aaaaaaaaaa|aaaaaabbbbbbbbbb] -- copy hi word from t1 - // to t0 - // 0xe4 = (t1 and v_ffff_0000) or (t0 and not v_ffff_0000) - const __m512i v_ffff_0000 = _mm512_set1_epi32(0xffff0000); - const __m512i t2 = _mm512_ternarylogic_epi32(t1, t0, v_ffff_0000, 0xe4); - - // t2 = 8 x [110110aaaaaaaaaa|110111bbbbbbbbbb] -- copy hi word from t1 - // to t0 - // 0xba = (t2 and not v_fc00_fc000) or v_d800_dc00 - const __m512i v_fc00_fc00 = _mm512_set1_epi32(0xfc00fc00); - const __m512i v_d800_dc00 = _mm512_set1_epi32(0xd800dc00); - const __m512i t3 = - _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba); - const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3); - __m512i t5 = _mm512_ror_epi32(t4, 16); - const __mmask32 nonzero = _kor_mask32( - 0xaaaaaaaa, _mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512())); - if (big_endian) { - t5 = _mm512_shuffle_epi8(t5, byteflip); - } - // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (zen4) - __m512i compressed = _mm512_maskz_compress_epi16(nonzero, t5); - _mm512_mask_storeu_epi16( - output, - (1 << (count + static_cast(count_ones(sp_mask)))) - 1, - compressed); - //_mm512_mask_compressstoreu_epi16(output, nonzero, t5); - } - - return count + static_cast(count_ones(sp_mask)); -} - -/* - expanded_utf8_to_utf32 converts expanded UTF-8 characters (`utf8`) - stored at separate 32-bit lanes. - - For each lane we have also a character class (`char_class), given in form - 0x8080800N, where N is 4 highest bits from the leading byte; 0x80 resets - corresponding bytes during pshufb. -*/ -simdutf_really_inline __m512i expanded_utf8_to_utf32(__m512i char_class, - __m512i utf8) { - /* - Input: - - utf8: bytes stored at separate 32-bit code units - - valid: which code units have valid UTF-8 characters - - Bit layout of single word. We show 4 cases for each possible - UTF-8 character encoding. The `?` denotes bits we must not - assume their value. - - |10dd.dddd|10cc.cccc|10bb.bbbb|1111.0aaa| 4-byte char - |????.????|10cc.cccc|10bb.bbbb|1110.aaaa| 3-byte char - |????.????|????.????|10bb.bbbb|110a.aaaa| 2-byte char - |????.????|????.????|????.????|0aaa.aaaa| ASCII char - byte 3 byte 2 byte 1 byte 0 - */ - - /* 1. Reset control bits of continuation bytes and the MSB - of the leading byte; this makes all bytes unsigned (and - does not alter ASCII char). - - |00dd.dddd|00cc.cccc|00bb.bbbb|0111.0aaa| 4-byte char - |00??.????|00cc.cccc|00bb.bbbb|0110.aaaa| 3-byte char - |00??.????|00??.????|00bb.bbbb|010a.aaaa| 2-byte char - |00??.????|00??.????|00??.????|0aaa.aaaa| ASCII char - ^^ ^^ ^^ ^ - */ - __m512i values; - const __m512i v_3f3f_3f7f = _mm512_set1_epi32(0x3f3f3f7f); - values = _mm512_and_si512(utf8, v_3f3f_3f7f); - - /* 2. Swap and join fields A-B and C-D - - |0000.cccc|ccdd.dddd|0001.110a|aabb.bbbb| 4-byte char - |0000.cccc|cc??.????|0001.10aa|aabb.bbbb| 3-byte char - |0000.????|????.????|0001.0aaa|aabb.bbbb| 2-byte char - |0000.????|????.????|000a.aaaa|aa??.????| ASCII char */ - const __m512i v_0140_0140 = _mm512_set1_epi32(0x01400140); - values = _mm512_maddubs_epi16(values, v_0140_0140); - - /* 3. Swap and join fields AB & CD - - |0000.0001|110a.aabb|bbbb.cccc|ccdd.dddd| 4-byte char - |0000.0001|10aa.aabb|bbbb.cccc|cc??.????| 3-byte char - |0000.0001|0aaa.aabb|bbbb.????|????.????| 2-byte char - |0000.000a|aaaa.aa??|????.????|????.????| ASCII char */ - const __m512i v_0001_1000 = _mm512_set1_epi32(0x00011000); - values = _mm512_madd_epi16(values, v_0001_1000); - - /* 4. Shift left the values by variable amounts to reset highest UTF-8 bits - |aaab.bbbb|bccc.cccd|dddd.d000|0000.0000| 4-byte char -- by 11 - |aaaa.bbbb|bbcc.cccc|????.??00|0000.0000| 3-byte char -- by 10 - |aaaa.abbb|bbb?.????|????.???0|0000.0000| 2-byte char -- by 9 - |aaaa.aaa?|????.????|????.????|?000.0000| ASCII char -- by 7 */ - { - /** pshufb - - continuation = 0 - ascii = 7 - _2_bytes = 9 - _3_bytes = 10 - _4_bytes = 11 - - shift_left_v3 = 4 * [ - ascii, # 0000 - ascii, # 0001 - ascii, # 0010 - ascii, # 0011 - ascii, # 0100 - ascii, # 0101 - ascii, # 0110 - ascii, # 0111 - continuation, # 1000 - continuation, # 1001 - continuation, # 1010 - continuation, # 1011 - _2_bytes, # 1100 - _2_bytes, # 1101 - _3_bytes, # 1110 - _4_bytes, # 1111 - ] */ - const __m512i shift_left_v3 = _mm512_setr_epi64( - 0x0707070707070707, 0x0b0a090900000000, 0x0707070707070707, - 0x0b0a090900000000, 0x0707070707070707, 0x0b0a090900000000, - 0x0707070707070707, 0x0b0a090900000000); - - const __m512i shift = _mm512_shuffle_epi8(shift_left_v3, char_class); - values = _mm512_sllv_epi32(values, shift); - } - - /* 5. Shift right the values by variable amounts to reset lowest bits - |0000.0000|000a.aabb|bbbb.cccc|ccdd.dddd| 4-byte char -- by 11 - |0000.0000|0000.0000|aaaa.bbbb|bbcc.cccc| 3-byte char -- by 16 - |0000.0000|0000.0000|0000.0aaa|aabb.bbbb| 2-byte char -- by 21 - |0000.0000|0000.0000|0000.0000|0aaa.aaaa| ASCII char -- by 25 */ - { - // 4 * [25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 21, 21, 16, 11] - const __m512i shift_right = _mm512_setr_epi64( - 0x1919191919191919, 0x0b10151500000000, 0x1919191919191919, - 0x0b10151500000000, 0x1919191919191919, 0x0b10151500000000, - 0x1919191919191919, 0x0b10151500000000); - - const __m512i shift = _mm512_shuffle_epi8(shift_right, char_class); - values = _mm512_srlv_epi32(values, shift); - } - - return values; -} - -simdutf_really_inline __m512i expand_and_identify(__m512i lane0, __m512i lane1, - int &count) { - const __m512i merged = _mm512_mask_mov_epi32(lane0, 0x1000, lane1); - const __m512i expand_ver2 = _mm512_setr_epi64( - 0x0403020103020100, 0x0605040305040302, 0x0807060507060504, - 0x0a09080709080706, 0x0c0b0a090b0a0908, 0x0e0d0c0b0d0c0b0a, - 0x000f0e0d0f0e0d0c, 0x0201000f01000f0e); - const __m512i input = _mm512_shuffle_epi8(merged, expand_ver2); - const __m512i v_0000_00c0 = _mm512_set1_epi32(0xc0); - const __m512i t0 = _mm512_and_si512(input, v_0000_00c0); - const __m512i v_0000_0080 = _mm512_set1_epi32(0x80); - const __mmask16 leading_bytes = _mm512_cmpneq_epu32_mask(t0, v_0000_0080); - count = static_cast(count_ones(leading_bytes)); - return _mm512_mask_compress_epi32(_mm512_setzero_si512(), leading_bytes, - input); -} - -simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) { - __m512i char_class = _mm512_srli_epi32(input, 4); - /* char_class = ((input >> 4) & 0x0f) | 0x80808000 */ - const __m512i v_0000_000f = _mm512_set1_epi32(0x0f); - const __m512i v_8080_8000 = _mm512_set1_epi32(0x80808000); - char_class = - _mm512_ternarylogic_epi32(char_class, v_0000_000f, v_8080_8000, 0xea); - return expanded_utf8_to_utf32(char_class, input); -} -/* end file src/icelake/icelake_utf8_common.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/icelake/icelake_utf8_validation.inl.cpp */ -// file included directly - -simdutf_really_inline __m512i check_special_cases(__m512i input, - const __m512i prev1) { - __m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080, - 0x0202020202020202, 0x4915012180808080); - const __m512i v_0f = _mm512_set1_epi8(0x0f); - __m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f); - - __m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1); - __m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb, - 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb); - __m512i index2 = _mm512_and_si512(prev1, v_0f); - - __m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2); - __m512i mask3 = - _mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101, - 0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6, - 0x101010101010101, 0x1010101babaaee6); - __m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f); - __m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3); - return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128); -} - -simdutf_really_inline __m512i check_multibyte_lengths(const __m512i input, - const __m512i prev_input, - const __m512i sc) { - __m512i prev2 = prev<2>(input, prev_input); - __m512i prev3 = prev<3>(input, prev_input); - __m512i is_third_byte = _mm512_subs_epu8( - prev2, _mm512_set1_epi8(0b11100000u - 1)); // Only 111_____ will be > 0 - __m512i is_fourth_byte = _mm512_subs_epu8( - prev3, _mm512_set1_epi8(0b11110000u - 1)); // Only 1111____ will be > 0 - __m512i is_third_or_fourth_byte = - _mm512_or_si512(is_third_byte, is_fourth_byte); - const __m512i v_7f = _mm512_set1_epi8(char(0x7f)); - is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte); - // We want to compute (is_third_or_fourth_byte AND v80) XOR sc. - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc, - 0b1101010); - //__m512i is_third_or_fourth_byte_mask = - //_mm512_and_si512(is_third_or_fourth_byte, v_80); return - // _mm512_xor_si512(is_third_or_fourth_byte_mask, sc); -} -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline __m512i is_incomplete(const __m512i input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - __m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xbfdfefffffffffff); - return _mm512_subs_epu8(input, max_value); -} - -struct avx512_utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - __m512i error{}; - - // The last input we received - __m512i prev_input_block{}; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - __m512i prev_incomplete{}; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const __m512i input, - const __m512i prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - __m512i prev1 = prev<1>(input, prev_input); - __m512i sc = check_special_cases(input, prev1); - this->error = _mm512_or_si512( - check_multibyte_lengths(input, prev_input, sc), this->error); - } - - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error = _mm512_or_si512(this->error, this->prev_incomplete); - } - - // returns true if ASCII. - simdutf_really_inline bool check_next_input(const __m512i input) { - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80); - if (ascii == 0) { - this->error = _mm512_or_si512(this->error, this->prev_incomplete); - return true; - } else { - this->check_utf8_bytes(input, this->prev_input_block); - this->prev_incomplete = is_incomplete(input); - this->prev_input_block = input; - return false; - } - } - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return _mm512_test_epi8_mask(this->error, this->error) != 0; - } -}; // struct avx512_utf8_checker -/* end file src/icelake/icelake_utf8_validation.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 && \ - (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) -/* begin file src/icelake/icelake_from_valid_utf8.inl.cpp */ -// file included directly - -// File contains conversion procedure from VALID UTF-8 strings. - -/* - valid_utf8_to_fixed_length converts a valid UTF-8 string into UTF-32. - - The `OUTPUT` template type decides what to do with UTF-32: store - it directly or convert into UTF-16 (with AVX512). - - Input: - - str - valid UTF-8 string - - len - string length - - out_buffer - output buffer - - Result: - - pair.first - the first unprocessed input byte - - pair.second - the first unprocessed output word -*/ -template -std::pair -valid_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { - constexpr bool UTF32 = std::is_same::value; - constexpr bool UTF16 = std::is_same::value; - static_assert( - UTF32 or UTF16, - "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); - static_assert(!(UTF32 and big_endian), - "we do not currently support big-endian UTF-32"); - - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - const char *ptr = str; - const char *end = ptr + len; - - OUTPUT *output = dwords; - /** - * In the main loop, we consume 64 bytes per iteration, - * but we access 64 + 4 bytes. - * We check for ptr + 64 + 64 <= end because - * we want to be do maskless writes without overruns. - */ - while (end - ptr >= 64 + 4) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80); - if (ascii == 0) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - continue; - } - - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - const __m512i lane3 = broadcast_epi128<3>(utf8); - int valid_count2; - __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); - uint32_t tmp1; - ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); - const __m512i lane4 = _mm512_set1_epi32(tmp1); - int valid_count3; - __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); - if (valid_count2 + valid_count3 <= 16) { - vec2 = _mm512_mask_expand_epi32( - vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); - valid_count2 += valid_count3; - vec2 = expand_utf8_to_utf32(vec2); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - } else { - vec2 = expand_utf8_to_utf32(vec2); - vec3 = expand_utf8_to_utf32(vec3); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) - } - ptr += 4 * 16; - } - - if (end - ptr >= 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - const __m512i v_80 = _mm512_set1_epi8(char(0x80)); - const __mmask64 ascii = _mm512_test_epi8_mask(utf8, v_80); - if (ascii == 0) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - } else { - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - - const __m512i lane3 = broadcast_epi128<3>(utf8); - SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) - - ptr += 3 * 16; - } - } - return {ptr, output}; -} - -using utf8_to_utf16_result = std::pair; -/* end file src/icelake/icelake_from_valid_utf8.inl.cpp */ -/* begin file src/icelake/icelake_from_utf8.inl.cpp */ -// file included directly - -// File contains conversion procedure from possibly invalid UTF-8 strings. - -template -// todo: replace with the utf-8 to utf-16 routine adapted to utf-32. This code -// is legacy. -std::pair -validating_utf8_to_fixed_length(const char *str, size_t len, OUTPUT *dwords) { - constexpr bool UTF32 = std::is_same::value; - constexpr bool UTF16 = std::is_same::value; - static_assert( - UTF32 or UTF16, - "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); - static_assert(!(UTF32 and big_endian), - "we do not currently support big-endian UTF-32"); - - const char *ptr = str; - const char *end = ptr + len; - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - OUTPUT *output = dwords; - avx512_utf8_checker checker{}; - /** - * In the main loop, we consume 64 bytes per iteration, - * but we access 64 + 4 bytes. - * We use masked writes to avoid overruns, see - * https://github.com/simdutf/simdutf/issues/471 - */ - while (end - ptr >= 64 + 4) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - if (checker.check_next_input(utf8)) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - continue; - } - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - const __m512i lane3 = broadcast_epi128<3>(utf8); - int valid_count2; - __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); - uint32_t tmp1; - ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); - const __m512i lane4 = _mm512_set1_epi32(tmp1); - int valid_count3; - __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); - if (valid_count2 + valid_count3 <= 16) { - vec2 = _mm512_mask_expand_epi32( - vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); - valid_count2 += valid_count3; - vec2 = expand_utf8_to_utf32(vec2); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - } else { - vec2 = expand_utf8_to_utf32(vec2); - vec3 = expand_utf8_to_utf32(vec3); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) - } - ptr += 4 * 16; - } - const char *validatedptr = ptr; // validated up to ptr - - // For the final pass, we validate 64 bytes, but we only transcode - // 3*16 bytes, so we may end up double-validating 16 bytes. - if (end - ptr >= 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - if (checker.check_next_input(utf8)) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - } else { - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - - const __m512i lane3 = broadcast_epi128<3>(utf8); - SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) - - ptr += 3 * 16; - } - validatedptr += 4 * 16; - } - if (end != validatedptr) { - const __m512i utf8 = - _mm512_maskz_loadu_epi8(~UINT64_C(0) >> (64 - (end - validatedptr)), - (const __m512i *)validatedptr); - checker.check_next_input(utf8); - } - checker.check_eof(); - if (checker.errors()) { - return {ptr, nullptr}; // We found an error. - } - return {ptr, output}; -} - -// Like validating_utf8_to_fixed_length but returns as soon as an error is -// identified todo: replace with the utf-8 to utf-16 routine adapted to utf-32. -// This code is legacy. -template -std::tuple -validating_utf8_to_fixed_length_with_constant_checks(const char *str, - size_t len, - OUTPUT *dwords) { - constexpr bool UTF32 = std::is_same::value; - constexpr bool UTF16 = std::is_same::value; - static_assert( - UTF32 or UTF16, - "output type has to be uint32_t (for UTF-32) or char16_t (for UTF-16)"); - static_assert(!(UTF32 and big_endian), - "we do not currently support big-endian UTF-32"); - - const char *ptr = str; - const char *end = ptr + len; - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - OUTPUT *output = dwords; - avx512_utf8_checker checker{}; - /** - * In the main loop, we consume 64 bytes per iteration, - * but we access 64 + 4 bytes. - */ - while (end - ptr >= 4 + 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - bool ascii = checker.check_next_input(utf8); - if (checker.errors()) { - return {ptr, output, false}; // We found an error. - } - if (ascii) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - continue; - } - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - const __m512i lane3 = broadcast_epi128<3>(utf8); - int valid_count2; - __m512i vec2 = expand_and_identify(lane2, lane3, valid_count2); - uint32_t tmp1; - ::memcpy(&tmp1, ptr + 64, sizeof(tmp1)); - const __m512i lane4 = _mm512_set1_epi32(tmp1); - int valid_count3; - __m512i vec3 = expand_and_identify(lane3, lane4, valid_count3); - if (valid_count2 + valid_count3 <= 16) { - vec2 = _mm512_mask_expand_epi32( - vec2, __mmask16(((1 << valid_count3) - 1) << valid_count2), vec3); - valid_count2 += valid_count3; - vec2 = expand_utf8_to_utf32(vec2); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - } else { - vec2 = expand_utf8_to_utf32(vec2); - vec3 = expand_utf8_to_utf32(vec3); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec2, valid_count2, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec3, valid_count3, true) - } - ptr += 4 * 16; - } - const char *validatedptr = ptr; // validated up to ptr - - // For the final pass, we validate 64 bytes, but we only transcode - // 3*16 bytes, so we may end up double-validating 16 bytes. - if (end - ptr >= 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - bool ascii = checker.check_next_input(utf8); - if (checker.errors()) { - return {ptr, output, false}; // We found an error. - } - if (ascii) { - SIMDUTF_ICELAKE_STORE_ASCII(UTF32, utf8, output) - output += 64; - ptr += 64; - } else { - const __m512i lane0 = broadcast_epi128<0>(utf8); - const __m512i lane1 = broadcast_epi128<1>(utf8); - int valid_count0; - __m512i vec0 = expand_and_identify(lane0, lane1, valid_count0); - const __m512i lane2 = broadcast_epi128<2>(utf8); - int valid_count1; - __m512i vec1 = expand_and_identify(lane1, lane2, valid_count1); - if (valid_count0 + valid_count1 <= 16) { - vec0 = _mm512_mask_expand_epi32( - vec0, __mmask16(((1 << valid_count1) - 1) << valid_count0), vec1); - valid_count0 += valid_count1; - vec0 = expand_utf8_to_utf32(vec0); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - } else { - vec0 = expand_utf8_to_utf32(vec0); - vec1 = expand_utf8_to_utf32(vec1); - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec0, valid_count0, true) - SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true) - } - - const __m512i lane3 = broadcast_epi128<3>(utf8); - SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true) - - ptr += 3 * 16; - } - validatedptr += 4 * 16; - } - if (end != validatedptr) { - const __m512i utf8 = - _mm512_maskz_loadu_epi8(~UINT64_C(0) >> (64 - (end - validatedptr)), - (const __m512i *)validatedptr); - checker.check_next_input(utf8); - } - checker.check_eof(); - if (checker.errors()) { - return {ptr, output, false}; // We found an error. - } - return {ptr, output, true}; -} -/* end file src/icelake/icelake_from_utf8.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || - // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1) - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/icelake/icelake_utf16fix.cpp */ -/* - * Process one block of 32 characters. If in_place is false, - * copy the block from in to out. If there is a sequencing - * error in the block, overwrite the illsequenced characters - * with the replacement character. This function reads one - * character before the beginning of the buffer as a lookback. - * If that character is illsequenced, it too is overwritten. - */ -template -simdutf_really_inline void utf16fix_block(char16_t *out, const char16_t *in) { - const char16_t replacement = scalar::utf16::replacement(); - auto swap_if_needed = [](uint16_t c) -> uint16_t { - return !simdutf::match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; - }; - - __m512i lookback, block, lb_masked, block_masked; - __mmask32 lb_is_high, block_is_low, illseq; - - lookback = _mm512_loadu_si512((const __m512i *)(in - 1)); - block = _mm512_loadu_si512((const __m512i *)in); - lb_masked = - _mm512_and_epi32(lookback, _mm512_set1_epi16(swap_if_needed(0xfc00U))); - block_masked = - _mm512_and_epi32(block, _mm512_set1_epi16(swap_if_needed(0xfc00U))); - - lb_is_high = _mm512_cmpeq_epi16_mask( - lb_masked, _mm512_set1_epi16(swap_if_needed(0xd800U))); - block_is_low = _mm512_cmpeq_epi16_mask( - block_masked, _mm512_set1_epi16(swap_if_needed(0xdc00U))); - illseq = _kxor_mask32(lb_is_high, block_is_low); - if (!_ktestz_mask32_u8(illseq, illseq)) { - __mmask32 lb_illseq, block_illseq; - - /* compute the cause of the illegal sequencing */ - lb_illseq = _kandn_mask32(block_is_low, lb_is_high); - block_illseq = _kor_mask32(_kandn_mask32(lb_is_high, block_is_low), - _kshiftri_mask32(lb_illseq, 1)); - - /* fix illegal sequencing in the lookback */ - lb_illseq = _kand_mask32(lb_illseq, _cvtu32_mask32(1)); - _mm512_mask_storeu_epi16(out - 1, lb_illseq, - _mm512_set1_epi16(replacement)); - - /* fix illegal sequencing in the main block */ - if (in_place) { - _mm512_mask_storeu_epi16(out, block_illseq, - _mm512_set1_epi16(replacement)); - } else { - _mm512_storeu_epi32( - out, _mm512_mask_blend_epi16(block_illseq, block, - _mm512_set1_epi16(replacement))); - } - } else if (!in_place) { - _mm512_storeu_si512((__m512i *)out, block); - } -} - -/* - * Special case for inputs of 0--32 bytes. Works for both in-place and - * out-of-place operation. - */ -template -void utf16fix_runt(const char16_t *in, size_t n, char16_t *out) { - const char16_t replacement = scalar::utf16::replacement(); - auto swap_if_needed = [](uint16_t c) -> uint16_t { - return !simdutf::match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; - }; - __m512i lookback, block, lb_masked, block_masked; - __mmask32 lb_is_high, block_is_low, illseq; - uint32_t mask = 0xFFFFFFFF >> (32 - n); - lookback = _mm512_maskz_loadu_epi16(_cvtmask32_u32(mask << 1), - (const uint16_t *)(in - 1)); - block = _mm512_maskz_loadu_epi16(_cvtmask32_u32(mask), (const uint16_t *)in); - lb_masked = - _mm512_and_epi32(lookback, _mm512_set1_epi16(swap_if_needed(0xfc00u))); - block_masked = - _mm512_and_epi32(block, _mm512_set1_epi16(swap_if_needed(0xfc00u))); - - lb_is_high = _mm512_cmpeq_epi16_mask( - lb_masked, _mm512_set1_epi16(swap_if_needed(0xd800u))); - block_is_low = _mm512_cmpeq_epi16_mask( - block_masked, _mm512_set1_epi16(swap_if_needed(0xdc00u))); - illseq = _kxor_mask32(lb_is_high, block_is_low); - if (!_ktestz_mask32_u8(illseq, illseq)) { - __mmask32 lb_illseq, block_illseq; - - /* compute the cause of the illegal sequencing */ - lb_illseq = _kandn_mask32(block_is_low, lb_is_high); - block_illseq = _kor_mask32(_kandn_mask32(lb_is_high, block_is_low), - _kshiftri_mask32(lb_illseq, 1)); - - /* fix illegal sequencing in the main block */ - _mm512_mask_storeu_epi16( - (uint16_t *)out, _cvtmask32_u32(mask), - _mm512_mask_blend_epi16(block_illseq, block, - _mm512_set1_epi16(replacement))); - } else { - _mm512_mask_storeu_epi16((uint16_t *)out, _cvtmask32_u32(mask), block); - } - out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) - ? replacement - : out[n - 1]; -} - -template -void utf16fix_avx512(const char16_t *in, size_t n, char16_t *out) { - const char16_t replacement = scalar::utf16::replacement(); - size_t i; - - if (n == 0) - return; - else if (n < 33) { - utf16fix_runt(in, n, out); - return; - } - out[0] = - scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; - - /* duplicate code to have the compiler specialise utf16fix_block() */ - if (in == out) { - for (i = 1; i + 32 < n; i += 32) { - utf16fix_block(out + i, in + i); - } - - utf16fix_block(out + n - 32, in + n - 32); - } else { - for (i = 1; i + 32 < n; i += 32) { - utf16fix_block(out + i, in + i); - } - - utf16fix_block(out + n - 32, in + n - 32); - } - - out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) - ? replacement - : out[n - 1]; -} -/* end file src/icelake/icelake_utf16fix.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ -// file included directly - -// File contains conversion procedure from possibly invalid UTF-8 strings. - -template -simdutf_really_inline size_t process_block_from_utf8_to_latin1( - const char *buf, size_t len, char *latin_output, __m512i minus64, - __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) { - __mmask64 load_mask = - is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL; - __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf); - __mmask64 nonascii = _mm512_movepi8_mask(input); - if (nonascii == 0) { - if (*next_leading_ptr) { // If we ended with a leading byte, it is an error. - return 0; // Indicates error - } - is_remaining - ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input) - : _mm512_storeu_si512((__m512i *)latin_output, input); - return len; - } - - const __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64); - - __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62)); - __mmask64 invalid_leading_bytes = - _mm512_mask_cmpgt_epu8_mask(leading, highbits, one); - - if (invalid_leading_bytes) { - return 0; // Indicates error - } - - __mmask64 leading_shift = (leading << 1) | *next_leading_ptr; - - if ((nonascii ^ leading) != leading_shift) { - return 0; // Indicates error - } - - const __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one); - input = - _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64); - - __mmask64 retain = ~leading & load_mask; - __m512i output = _mm512_maskz_compress_epi8(retain, input); - int64_t written_out = count_ones(retain); - if (written_out == 0) { - return 0; // Indicates error - } - *next_bit6_ptr = bit6 >> 63; - *next_leading_ptr = leading >> 63; - - __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out); - - _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output); - - return written_out; -} - -size_t utf8_to_latin1_avx512(const char *&inbuf, size_t len, - char *&inlatin_output) { - const char *buf = inbuf; - char *latin_output = inlatin_output; - char *start = latin_output; - size_t pos = 0; - __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000 - __m512i one = _mm512_set1_epi8(1); - __mmask64 next_leading = 0; - __mmask64 next_bit6 = 0; - - while (pos + 64 <= len) { - size_t written = process_block_from_utf8_to_latin1( - buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6); - if (written == 0) { - inlatin_output = latin_output; - inbuf = buf + pos - next_leading; - return 0; // Indicates error at pos or after, or just before pos (too - // short error) - } - latin_output += written; - pos += 64; - } - - if (pos < len) { - size_t remaining = len - pos; - size_t written = process_block_from_utf8_to_latin1( - buf + pos, remaining, latin_output, minus64, one, &next_leading, - &next_bit6); - if (written == 0) { - inbuf = buf + pos - next_leading; - inlatin_output = latin_output; - return 0; // Indicates error at pos or after, or just before pos (too - // short error) - } - latin_output += written; - } - if (next_leading) { - inbuf = buf + len - next_leading; - inlatin_output = latin_output; - return 0; // Indicates error at end of buffer - } - inlatin_output = latin_output; - inbuf += len; - return size_t(latin_output - start); -} -/* end file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */ -/* begin file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ -// file included directly - -// File contains conversion procedure from valid UTF-8 strings. - -template -simdutf_really_inline size_t process_valid_block_from_utf8_to_latin1( - const char *buf, size_t len, char *latin_output, __m512i minus64, - __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) { - __mmask64 load_mask = - is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL; - __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf); - __mmask64 nonascii = _mm512_movepi8_mask(input); - - if (nonascii == 0) { - is_remaining - ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input) - : _mm512_storeu_si512((__m512i *)latin_output, input); - return len; - } - - __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64); - - __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62)); - - *next_leading_ptr = leading >> 63; - - __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one); - input = - _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64); - *next_bit6_ptr = bit6 >> 63; - - __mmask64 retain = ~leading & load_mask; - __m512i output = _mm512_maskz_compress_epi8(retain, input); - int64_t written_out = count_ones(retain); - if (written_out == 0) { - return 0; // Indicates error - } - __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out); - // Optimization opportunity: sometimes, masked writes are not needed. - _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output); - return written_out; -} - -size_t valid_utf8_to_latin1_avx512(const char *buf, size_t len, - char *latin_output) { - char *start = latin_output; - size_t pos = 0; - __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000 - __m512i one = _mm512_set1_epi8(1); - __mmask64 next_leading = 0; - __mmask64 next_bit6 = 0; - - while (pos + 64 <= len) { - size_t written = process_valid_block_from_utf8_to_latin1( - buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6); - latin_output += written; - pos += 64; - } - - if (pos < len) { - size_t remaining = len - pos; - size_t written = process_valid_block_from_utf8_to_latin1( - buf + pos, remaining, latin_output, minus64, one, &next_leading, - &next_bit6); - latin_output += written; - } - - return (size_t)(latin_output - start); -} -/* end file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ -// file included directly -template -size_t icelake_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - __m512i v_0xFF = _mm512_set1_epi16(0xff); - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - __m512i shufmask = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, - 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); - while (end - buf >= 32) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { - return 0; - } - _mm256_storeu_si256( - (__m256i *)latin1_output, - _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); - latin1_output += 32; - buf += 32; - } - if (buf < end) { - uint32_t mask(uint32_t(1 << (end - buf)) - 1); - __m512i in = _mm512_maskz_loadu_epi16(mask, buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { - return 0; - } - _mm256_mask_storeu_epi8( - latin1_output, mask, - _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); - } - return len; -} - -template -std::pair -icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - const char16_t *start = buf; - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - __m512i v_0xFF = _mm512_set1_epi16(0xff); - __m512i shufmask = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, - 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0); - while (end - buf >= 32) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { - uint16_t word; - while ((word = (big_endian ? scalar::u16_swap_bytes(uint16_t(*buf)) - : uint16_t(*buf))) <= 0xff) { - *latin1_output++ = uint8_t(word); - buf++; - } - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - latin1_output); - } - _mm256_storeu_si256( - (__m256i *)latin1_output, - _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); - latin1_output += 32; - buf += 32; - } - if (buf < end) { - uint32_t mask(uint32_t(1 << (end - buf)) - 1); - __m512i in = _mm512_maskz_loadu_epi16(mask, buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) { - - uint16_t word; - while ((word = (big_endian ? scalar::u16_swap_bytes(uint16_t(*buf)) - : uint16_t(*buf))) <= 0xff) { - *latin1_output++ = uint8_t(word); - buf++; - } - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - latin1_output); - } - _mm256_mask_storeu_epi8( - latin1_output, mask, - _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in))); - } - return std::make_pair(result(error_code::SUCCESS, len), latin1_output); -} -/* end file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ -// file included directly - -/** - * This function converts the input (inbuf, inlen), assumed to be valid - * UTF16 (little endian) into UTF-8 (to outbuf). The number of code units - * written is written to 'outlen' and the function reports the number of input - * word consumed. - */ -template -size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen, - unsigned char *outbuf, size_t *outlen) { - __m512i in; - __mmask32 inmask = _cvtu32_mask32(0x7fffffff); - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - const char16_t *const inbuf_orig = inbuf; - const unsigned char *const outbuf_orig = outbuf; - int adjust = 0; - int carry = 0; - - while (inlen >= 32) { - in = _mm512_loadu_si512(inbuf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - inlen -= 31; - lastiteration: - inbuf += 31; - - failiteration: - const __mmask32 is234byte = _mm512_mask_cmp_epu16_mask( - inmask, in, _mm512_set1_epi16(0x0080), _MM_CMPINT_NLT); - - if (_ktestz_mask32_u8(inmask, is234byte)) { - // fast path for ASCII only - _mm512_mask_cvtepi16_storeu_epi8(outbuf, inmask, in); - outbuf += 31; - carry = 0; - - if (inlen < 32) { - goto tail; - } else { - continue; - } - } - - const __mmask32 is12byte = - _mm512_cmp_epu16_mask(in, _mm512_set1_epi16(0x0800), _MM_CMPINT_LT); - - if (_ktestc_mask32_u8(is12byte, inmask)) { - // fast path for 1 and 2 byte only - - const __m512i twobytes = _mm512_ternarylogic_epi32( - _mm512_slli_epi16(in, 8), _mm512_srli_epi16(in, 6), - _mm512_set1_epi16(0x3f3f), 0xa8); // (A|B)&C - in = _mm512_mask_add_epi16(in, is234byte, twobytes, - _mm512_set1_epi16(int16_t(0x80c0))); - const __m512i cmpmask = - _mm512_mask_blend_epi16(inmask, _mm512_set1_epi16(int16_t(0xffff)), - _mm512_set1_epi16(0x0800)); - const __mmask64 smoosh = - _mm512_cmp_epu8_mask(in, cmpmask, _MM_CMPINT_NLT); - const __m512i out = _mm512_maskz_compress_epi8(smoosh, in); - _mm512_mask_storeu_epi8(outbuf, - _cvtu64_mask64(_pext_u64(_cvtmask64_u64(smoosh), - _cvtmask64_u64(smoosh))), - out); - outbuf += 31 + _mm_popcnt_u32(_cvtmask32_u32(is234byte)); - carry = 0; - - if (inlen < 32) { - goto tail; - } else { - continue; - } - } - __m512i lo = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)); - __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)); - - __m512i taglo = _mm512_set1_epi32(0x8080e000); - __m512i taghi = taglo; - - const __m512i fc00masked = - _mm512_and_epi32(in, _mm512_set1_epi16(int16_t(0xfc00))); - const __mmask32 hisurr = _mm512_mask_cmp_epu16_mask( - inmask, fc00masked, _mm512_set1_epi16(int16_t(0xd800)), _MM_CMPINT_EQ); - const __mmask32 losurr = _mm512_cmp_epu16_mask( - fc00masked, _mm512_set1_epi16(int16_t(0xdc00)), _MM_CMPINT_EQ); - - int carryout = 0; - if (!_kortestz_mask32_u8(hisurr, losurr)) { - // handle surrogates - - __m512i los = _mm512_alignr_epi32(hi, lo, 1); - __m512i his = _mm512_alignr_epi32(lo, hi, 1); - - const __mmask32 hisurrhi = _kshiftri_mask32(hisurr, 16); - taglo = _mm512_mask_mov_epi32(taglo, __mmask16(hisurr), - _mm512_set1_epi32(0x808080f0)); - taghi = _mm512_mask_mov_epi32(taghi, __mmask16(hisurrhi), - _mm512_set1_epi32(0x808080f0)); - - lo = _mm512_mask_slli_epi32(lo, __mmask16(hisurr), lo, 10); - hi = _mm512_mask_slli_epi32(hi, __mmask16(hisurrhi), hi, 10); - los = _mm512_add_epi32(los, _mm512_set1_epi32(0xfca02400)); - his = _mm512_add_epi32(his, _mm512_set1_epi32(0xfca02400)); - lo = _mm512_mask_add_epi32(lo, __mmask16(hisurr), lo, los); - hi = _mm512_mask_add_epi32(hi, __mmask16(hisurrhi), hi, his); - - carryout = _cvtu32_mask32(_kshiftri_mask32(hisurr, 30)); - - const uint32_t h = _cvtmask32_u32(hisurr); - const uint32_t l = _cvtmask32_u32(losurr); - // check for mismatched surrogates - if ((h + h + carry) ^ l) { - const uint32_t lonohi = l & ~(h + h + carry); - const uint32_t hinolo = h & ~(l >> 1); - inlen = _tzcnt_u32(hinolo | lonohi); - inmask = __mmask32(0x7fffffff & ((1U << inlen) - 1)); - in = _mm512_maskz_mov_epi16(inmask, in); - adjust = (int)inlen - 31; - inlen = 0; - goto failiteration; - } - } - - hi = _mm512_maskz_mov_epi32(_cvtu32_mask16(0x7fff), hi); - carry = carryout; - - __m512i mslo = - _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), lo); - - __m512i mshi = - _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), hi); - - const __mmask32 outmask = __mmask32(_kandn_mask64(losurr, inmask)); - const __mmask64 outmhi = _kshiftri_mask64(outmask, 16); - - const __mmask32 is1byte = __mmask32(_knot_mask64(is234byte)); - const __mmask64 is1bhi = _kshiftri_mask64(is1byte, 16); - const __mmask64 is12bhi = _kshiftri_mask64(is12byte, 16); - - taglo = _mm512_mask_mov_epi32(taglo, __mmask16(is12byte), - _mm512_set1_epi32(0x80c00000)); - taghi = _mm512_mask_mov_epi32(taghi, __mmask16(is12bhi), - _mm512_set1_epi32(0x80c00000)); - __m512i magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), - _mm512_set1_epi32(0xffffffff), - _mm512_set1_epi32(0x00010101)); - __m512i magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), - _mm512_set1_epi32(0xffffffff), - _mm512_set1_epi32(0x00010101)); - - magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), - _mm512_set1_epi32(0xffffffff), - _mm512_set1_epi32(0x00010101)); - magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), - _mm512_set1_epi32(0xffffffff), - _mm512_set1_epi32(0x00010101)); - - mslo = _mm512_ternarylogic_epi32(mslo, _mm512_set1_epi32(0x3f3f3f3f), taglo, - 0xea); // A&B|C - mshi = _mm512_ternarylogic_epi32(mshi, _mm512_set1_epi32(0x3f3f3f3f), taghi, - 0xea); - mslo = _mm512_mask_slli_epi32(mslo, __mmask16(is1byte), lo, 24); - - mshi = _mm512_mask_slli_epi32(mshi, __mmask16(is1bhi), hi, 24); - - const __mmask64 wantlo = - _mm512_cmp_epu8_mask(mslo, magiclo, _MM_CMPINT_NLT); - const __mmask64 wanthi = - _mm512_cmp_epu8_mask(mshi, magichi, _MM_CMPINT_NLT); - const __m512i outlo = _mm512_maskz_compress_epi8(wantlo, mslo); - const __m512i outhi = _mm512_maskz_compress_epi8(wanthi, mshi); - const uint64_t wantlo_uint64 = _cvtmask64_u64(wantlo); - const uint64_t wanthi_uint64 = _cvtmask64_u64(wanthi); - - uint64_t advlo = _mm_popcnt_u64(wantlo_uint64); - uint64_t advhi = _mm_popcnt_u64(wanthi_uint64); - - _mm512_mask_storeu_epi8( - outbuf, _cvtu64_mask64(_pext_u64(wantlo_uint64, wantlo_uint64)), outlo); - _mm512_mask_storeu_epi8( - outbuf + advlo, _cvtu64_mask64(_pext_u64(wanthi_uint64, wanthi_uint64)), - outhi); - outbuf += advlo + advhi; - } - outbuf += -adjust; - -tail: - if (inlen != 0) { - // We must have inlen < 31. - inmask = _cvtu32_mask32((1U << inlen) - 1); - in = _mm512_maskz_loadu_epi16(inmask, inbuf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - adjust = (int)inlen - 31; - inlen = 0; - goto lastiteration; - } - *outlen = (outbuf - outbuf_orig) + adjust; - return ((inbuf - inbuf_orig) + adjust); -} -/* end file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ -/* begin file src/icelake/icelake_convert_utf8_to_utf16.inl.cpp */ -// file included directly - -// File contains conversion procedure from possibly invalid UTF-8 strings. - -/** - * Attempts to convert up to len 1-byte code units from in (in UTF-8 format) to - * out. - * Returns the position of the input and output after the processing is - * completed. Upon error, the output is set to null. - */ - -template -utf8_to_utf16_result -fast_avx512_convert_utf8_to_utf16(const char *in, size_t len, char16_t *out) { - const char *const final_in = in + len; - bool result = true; - while (result) { - if (final_in - in >= 64) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else if (in < final_in) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else { - break; - } - } - if (!result) { - out = nullptr; - } - return std::make_pair(in, out); -} - -template -simdutf::result fast_avx512_convert_utf8_to_utf16_with_errors(const char *in, - size_t len, - char16_t *out) { - const char *const init_in = in; - const char16_t *const init_out = out; - const char *const final_in = in + len; - bool result = true; - while (result) { - if (final_in - in >= 64) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else if (in < final_in) { - result = process_block_utf8_to_utf16( - in, out, final_in - in); - } else { - break; - } - } - if (!result) { - size_t pos = size_t(in - init_in); - if (pos < len && (init_in[pos] & 0xc0) == 0x80 && pos >= 64) { - // We must check whether we are the fourth continuation byte - bool c1 = (init_in[pos - 1] & 0xc0) == 0x80; - bool c2 = (init_in[pos - 2] & 0xc0) == 0x80; - bool c3 = (init_in[pos - 3] & 0xc0) == 0x80; - if (c1 && c2 && c3) { - return {simdutf::TOO_LONG, pos}; - } - } - // rewind_and_convert_with_errors will seek a potential error from in - // onward, with the ability to go back up to in - init_in bytes, and read - // final_in - in bytes forward. - simdutf::result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - in - init_in, in, final_in - in, out); - res.count += (in - init_in); - return res; - } else { - return simdutf::result(error_code::SUCCESS, out - init_out); - } -} -/* end file src/icelake/icelake_convert_utf8_to_utf16.inl.cpp */ -/* begin file src/icelake/icelake_utf8_length_from_utf16.inl.cpp */ -// This is translation of `utf8_length_from_utf16_bytemask` from -// `generic/utf16.h` -template -simdutf_really_inline size_t icelake_utf8_length_from_utf16(const char16_t *in, - size_t size) { - size_t pos = 0; - - using vector_u16 = simd16; - constexpr size_t N = vector_u16::ELEMENTS; - - const auto one = vector_u16::splat(1); - - auto v_count = vector_u16::zero(); - - // each char16 yields at least one byte - size_t count = size / N * N; - - // in a single iteration the increment is 0, 1 or 2, despite we have - // three additions - constexpr size_t max_iterations = 65535 / 2; - size_t iteration = max_iterations; - - for (; pos < size / N * N; pos += N) { - auto input = vector_u16::load(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input = input.swap_bytes(); - } - - // not_surrogate[i] = non-zero if i-th element is not a surrogate word - const auto not_surrogate = (input & uint16_t(0xf800)) ^ uint16_t(0xd800); - - // not_surrogate[i] = 1 if surrogate word, 0 otherwise - const auto is_surrogate = min(not_surrogate, one) ^ one; - - // c0 - chars that yield 2- or 3-byte UTF-8 codes - const auto c0 = min(input & uint16_t(0xff80), one); - - // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) - const auto c1 = min(input & uint16_t(0xf800), one); - - /* - Explanation how the counting works. - - In the case of a non-surrogate character we count: - * always 1 -- see how `count` is initialized above; - * c0 = 1 if the current char yields 2 or 3 bytes; - * c1 = 1 if the current char yields 3 bytes. - - Thus, we always have correct count for the current char: - from 1, 2 or 3 bytes. - - A trickier part is how we count surrogate pairs. Whether - we encounter a surrogate (low or high), we count it as - 3 chars and then minus 1 (`is_surrogate` is -1 or 0). - Each surrogate char yields 2. A surrogate pair, that - is a low surrogate followed by a high one, yields - the expected 4 bytes. - - It also correctly handles cases when low surrogate is - processed by the this loop, but high surrogate is counted - by the scalar procedure. The scalar procedure uses exactly - the described approach, thanks to that for valid UTF-16 - strings it always count correctly. - */ - v_count += c0; - v_count += c1; - v_count -= is_surrogate; - - iteration -= 1; - if (iteration == 0) { - count += v_count.sum(); - v_count = vector_u16::zero(); - - iteration = max_iterations; - } - } - - if (iteration > 0) { - count += v_count.sum(); - } - - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} -/* end file src/icelake/icelake_utf8_length_from_utf16.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ -// file included directly - -/* - Returns a pair: the first unprocessed byte from buf and utf32_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::tuple -convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const char16_t *end = buf + len; - const __m512i v_fc00 = _mm512_set1_epi16((uint16_t)0xfc00); - const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800); - const __m512i v_dc00 = _mm512_set1_epi16((uint16_t)0xdc00); - __mmask32 carry{0}; - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - while (std::distance(buf, end) >= 32) { - // Always safe because buf + 32 <= end so that end - buf >= 32 bytes: - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (big_endian) { - in = _mm512_shuffle_epi8(in, byteflip); - } - - // H - bitmask for high surrogates - const __mmask32 H = - _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_d800); - // H - bitmask for low surrogates - const __mmask32 L = - _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_dc00); - - if ((H | L)) { - // surrogate pair(s) in a register - const __mmask32 V = - (L ^ - (carry | (H << 1))); // A high surrogate must be followed by low one - // and a low one must be preceded by a high one. - // If valid, V should be equal to 0 - - if (V == 0) { - // valid case - /* - Input surrogate pair: - |1101.11aa.aaaa.aaaa|1101.10bb.bbbb.bbbb| - low surrogate high surrogate - */ - /* 1. Expand all code units to 32-bit code units - in - |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb| - */ - const __m512i first = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)); - const __m512i second = - _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)); - - /* 2. Shift by one 16-bit word to align low surrogates with high - surrogates in - |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb| - shifted - |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa| - */ - const __m512i shifted_first = _mm512_alignr_epi32(second, first, 1); - const __m512i shifted_second = - _mm512_alignr_epi32(_mm512_setzero_si512(), second, 1); - - /* 3. Align all high surrogates in first and second by shifting to the - left by 10 bits - |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000| - */ - const __m512i aligned_first = - _mm512_mask_slli_epi32(first, (__mmask16)H, first, 10); - const __m512i aligned_second = - _mm512_mask_slli_epi32(second, (__mmask16)(H >> 16), second, 10); - - /* 4. Remove surrogate prefixes and add offset 0x10000 by adding in, - shifted and constant in - |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000| - shifted - |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa| - constant|1111.1100.1010.0000.0010.0100.0000.0000|1111.1100.1010.0000.0010.0100.0000.0000| - */ - const __m512i constant = _mm512_set1_epi32((uint32_t)0xfca02400); - const __m512i added_first = _mm512_mask_add_epi32( - aligned_first, (__mmask16)H, aligned_first, shifted_first); - const __m512i utf32_first = _mm512_mask_add_epi32( - added_first, (__mmask16)H, added_first, constant); - - const __m512i added_second = - _mm512_mask_add_epi32(aligned_second, (__mmask16)(H >> 16), - aligned_second, shifted_second); - const __m512i utf32_second = _mm512_mask_add_epi32( - added_second, (__mmask16)(H >> 16), added_second, constant); - - // 5. Store all valid UTF-32 code units (low surrogate positions and - // 32nd word are invalid) - const __mmask32 valid = ~L & 0x7fffffff; - // We deliberately do a _mm512_maskz_compress_epi32 followed by - // storeu_epi32 to ease performance portability to Zen 4. - const __m512i compressed_first = - _mm512_maskz_compress_epi32((__mmask16)(valid), utf32_first); - const size_t howmany1 = count_ones((uint16_t)(valid)); - _mm512_storeu_si512((__m512i *)utf32_output, compressed_first); - utf32_output += howmany1; - const __m512i compressed_second = - _mm512_maskz_compress_epi32((__mmask16)(valid >> 16), utf32_second); - const size_t howmany2 = count_ones((uint16_t)(valid >> 16)); - // The following could be unsafe in some cases? - //_mm512_storeu_epi32((__m512i *) utf32_output, compressed_second); - _mm512_mask_storeu_epi32((__m512i *)utf32_output, - __mmask16((1 << howmany2) - 1), - compressed_second); - utf32_output += howmany2; - // Only process 31 code units, but keep track if the 31st word is a high - // surrogate as a carry - buf += 31; - carry = (H >> 30) & 0x1; - } else { - // invalid case - return std::make_tuple(buf + carry, utf32_output, false); - } - } else { - // no surrogates - // extend all thirty-two 16-bit code units to thirty-two 32-bit code units - _mm512_storeu_si512((__m512i *)(utf32_output), - _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in))); - _mm512_storeu_si512( - (__m512i *)(utf32_output) + 1, - _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1))); - utf32_output += 32; - buf += 32; - carry = 0; - } - } // while - return std::make_tuple(buf + carry, utf32_output, true); -} -/* end file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 -/* begin file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ -// file included directly -size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *end = buf + len; - __m512i v_0xFF = _mm512_set1_epi32(0xff); - __m512i shufmask = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, - 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0); - while (end - buf >= 16) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { - return 0; - } - _mm_storeu_si128( - (__m128i *)latin1_output, - _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); - latin1_output += 16; - buf += 16; - } - if (buf < end) { - uint16_t mask = uint16_t((1 << (end - buf)) - 1); - __m512i in = _mm512_maskz_loadu_epi32(mask, buf); - if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { - return 0; - } - _mm_mask_storeu_epi8( - latin1_output, mask, - _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); - } - return len; -} - -std::pair -icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *end = buf + len; - const char32_t *start = buf; - __m512i v_0xFF = _mm512_set1_epi32(0xff); - __m512i shufmask = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, - 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0); - while (end - buf >= 16) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { - while (uint32_t(*buf) <= 0xff) { - *latin1_output++ = uint8_t(*buf++); - } - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - latin1_output); - } - _mm_storeu_si128( - (__m128i *)latin1_output, - _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); - latin1_output += 16; - buf += 16; - } - if (buf < end) { - uint16_t mask = uint16_t((1 << (end - buf)) - 1); - __m512i in = _mm512_maskz_loadu_epi32(mask, buf); - if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) { - while (uint32_t(*buf) <= 0xff) { - *latin1_output++ = uint8_t(*buf++); - } - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - latin1_output); - } - _mm_mask_storeu_epi8( - latin1_output, mask, - _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in))); - } - return std::make_pair(result(error_code::SUCCESS, len), latin1_output); -} -/* end file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ -// file included directly - -// Todo: currently, this is just the haswell code, optimize for icelake kernel. -std::pair -avx512_convert_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) { - const char32_t *end = buf + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); - const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); - const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); - const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); - __m256i running_max = _mm256_setzero_si256(); - __m256i forbidden_bytemask = _mm256_setzero_si256(); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); - running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin); - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), - _mm256_and_si256(nextin, v_7fffffff)); - in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); - - // Try to apply UTF-16 => UTF-8 routine on 256 bits - // (haswell/avx2_convert_utf16_to_utf8.cpp) - - if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in_16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in_16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; - - // 6. adjust pointers - buf += 16; - continue; - } - // Must check for overflow in packing - const __m256i saturation_bytemask = _mm256_cmpeq_epi32( - _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); - forbidden_bytemask = _mm256_or_si256( - forbidden_bytemask, - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800)); - - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; - } else { - // case: at least one 32-bit word is larger than 0xFFFF <=> it will - // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD may require - // large, non-trivial tables? - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { // 2-byte - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { // 3-byte - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf8_output); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { // 4-byte - if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf8_output); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - // check for invalid input - const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); - if (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi32( - _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) { - return std::make_pair(nullptr, utf8_output); - } - - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf8_output); - } - - return std::make_pair(buf, utf8_output); -} - -// Todo: currently, this is just the haswell code, optimize for icelake kernel. -std::pair -avx512_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { - const char32_t *end = buf + len; - const char32_t *start = buf; - - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); - const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); - const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); - const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); - const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); - // Check for too large input - const __m256i max_input = - _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff); - if (static_cast(_mm256_movemask_epi8( - _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - utf8_output); - } - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), - _mm256_and_si256(nextin, v_7fffffff)); - in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); - - // Try to apply UTF-16 => UTF-8 routine on 256 bits - // (haswell/avx2_convert_utf16_to_utf8.cpp) - - if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in_16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in_16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; - - // 6. adjust pointers - buf += 16; - continue; - } - // Must check for overflow in packing - const __m256i saturation_bytemask = _mm256_cmpeq_epi32( - _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - - // Check for illegal surrogate code units - const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); - const __m256i forbidden_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800); - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != - 0x0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - utf8_output); - } - - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; - } else { - // case: at least one 32-bit word is larger than 0xFFFF <=> it will - // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD may require - // large, non-trivial tables? - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { // 2-byte - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { // 3-byte - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf8_output); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { // 4-byte - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf8_output); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); -} -/* end file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ -// file included directly - -template -std::pair -avx512_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const char32_t *end = buf + len; - __mmask32 forbidden_bytemask = 0; - const __m512i v_00000000 = _mm512_setzero_si512(); - const __m512i v_ffff0000 = _mm512_set1_epi32((int32_t)0xffff0000); - const __m512i v_f800 = _mm512_set1_epi32((uint32_t)0xf800); - const __m512i v_d800 = _mm512_set1_epi32((uint32_t)0xd800); - const __m512i v_10ffff = _mm512_set1_epi32(0x10FFFF); - const __m512i v_10000 = _mm512_set1_epi32(0x10000); - const __m512i v_3ff0000 = _mm512_set1_epi32(0x3FF0000); - const __m512i v_3ff = _mm512_set1_epi32(0x3FF); - const __m512i v_dc00d800 = _mm512_set1_epi32((int32_t)0xDC00D800); - - while (end - buf >= std::ptrdiff_t(16)) { - __m512i in = _mm512_loadu_si512(buf); - - // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __mmask16 saturation_bitmask = - _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000); - - if (saturation_bitmask == 0xffff) { - forbidden_bytemask |= - _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); - - __m256i utf16_packed = _mm512_cvtepi32_epi16(in); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, - 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); - } - _mm256_storeu_si256((__m256i *)utf16_output, utf16_packed); - utf16_output += 16; - buf += 16; - } else { - // saturation_bitmask == 1 words will generate 1 utf16 char, - // and saturation_bitmask == 0 words will generate 2 utf16 chars assuming - // no errors. Thus we need a output_mask which has the structure b_2i = 1, - // b_2i+1 = !saturation_bitmask_i - const __mmask32 output_mask = ~_pdep_u32(saturation_bitmask, 0xAAAAAAAA); - const __mmask16 surrogate_bitmask = __mmask16(~saturation_bitmask); - __mmask32 error = _mm512_mask_cmpeq_epi32_mask( - saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); - error |= _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); - if (simdutf_unlikely(error)) { - return std::make_pair(nullptr, utf16_output); - } - __m512i v1, v2, v; - // for the bits saturation_bitmask == 0, we need to unpack the 32-bit word - // into two 16 bit words corresponding to high_surrogate and - // low_surrogate. Once the bits are unpacked and merged, the output will - // be compressed as per output_mask. - in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); - v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); - v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); - v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); - v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); - v = _mm512_or_si512(v1, v2); - in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); - if (big_endian) { - const __m512i swap_512 = _mm512_set_epi8( - 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, - 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, - 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, - 2, 3, 0, 1); - in = _mm512_shuffle_epi8(in, swap_512); - } - // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (AMD Zen4 has terrible performance with it, it is effectively broken) - __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); - auto written_out = _mm_popcnt_u32(output_mask); - _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), - compressed); - //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); - utf16_output += written_out; - buf += 16; - } - } - - size_t remaining_len = size_t(end - buf); - if (remaining_len) { - __mmask16 input_mask = __mmask16((1 << remaining_len) - 1); - __m512i in = _mm512_maskz_loadu_epi32(input_mask, buf); - const __mmask16 saturation_bitmask = - _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000) & - input_mask; - if (saturation_bitmask == input_mask) { - forbidden_bytemask |= - _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); - - __m256i utf16_packed = _mm512_cvtepi32_epi16(in); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, - 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); - } - _mm256_mask_storeu_epi16(utf16_output, input_mask, utf16_packed); - utf16_output += remaining_len; - buf += remaining_len; - } else { - const __mmask32 output_max_mask = (1 << (remaining_len * 2)) - 1; - const __mmask32 output_mask = - (~_pdep_u32(saturation_bitmask, 0xAAAAAAAA)) & output_max_mask; - const __mmask16 surrogate_bitmask = - __mmask16(~saturation_bitmask) & input_mask; - __mmask32 error = _mm512_mask_cmpeq_epi32_mask( - saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); - error |= _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); - if (simdutf_unlikely(error)) { - return std::make_pair(nullptr, utf16_output); - } - __m512i v1, v2, v; - in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); - v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); - v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); - v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); - v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); - v = _mm512_or_si512(v1, v2); - in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); - if (big_endian) { - const __m512i swap_512 = _mm512_set_epi8( - 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, - 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, - 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, - 2, 3, 0, 1); - in = _mm512_shuffle_epi8(in, swap_512); - } - // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (AMD Zen4 has terrible performance with it, it is effectively broken) - __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); - auto written_out = _mm_popcnt_u32(output_mask); - _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), - compressed); - //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); - utf16_output += written_out; - buf += remaining_len; - } - } - - // check for invalid input - if (forbidden_bytemask != 0) { - return std::make_pair(nullptr, utf16_output); - } - - return std::make_pair(buf, utf16_output); -} - -template -std::pair -avx512_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const char32_t *start = buf; - const char32_t *end = buf + len; - const __m512i v_00000000 = _mm512_setzero_si512(); - const __m512i v_ffff0000 = _mm512_set1_epi32((int32_t)0xffff0000); - const __m512i v_f800 = _mm512_set1_epi32((uint32_t)0xf800); - const __m512i v_d800 = _mm512_set1_epi32((uint32_t)0xd800); - const __m512i v_10ffff = _mm512_set1_epi32(0x10FFFF); - const __m512i v_10000 = _mm512_set1_epi32(0x10000); - const __m512i v_3ff0000 = _mm512_set1_epi32(0x3FF0000); - const __m512i v_3ff = _mm512_set1_epi32(0x3FF); - const __m512i v_dc00d800 = _mm512_set1_epi32((int32_t)0xDC00D800); - int error_idx = 0; - error_code code = error_code::SUCCESS; - bool err = false; - - while (end - buf >= std::ptrdiff_t(16)) { - __m512i in = _mm512_loadu_si512(buf); - - // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs - const __mmask16 saturation_bitmask = - _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000); - - if (saturation_bitmask == 0xffff) { - __mmask32 forbidden_bytemask = - _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); - - __m256i utf16_packed = _mm512_cvtepi32_epi16(in); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, - 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); - } - if (simdutf_unlikely(forbidden_bytemask)) { - int idx = _tzcnt_u32(forbidden_bytemask); - _mm256_mask_storeu_epi16( - utf16_output, __mmask16(_blsmsk_u32(forbidden_bytemask) >> 1), - utf16_packed); - return std::make_pair(result(error_code::SURROGATE, buf - start + idx), - utf16_output + idx); - } - _mm256_storeu_si256((__m256i *)utf16_output, utf16_packed); - utf16_output += 16; - } else { - __mmask32 output_mask = ~_pdep_u32(saturation_bitmask, 0xAAAAAAAA); - const __mmask16 surrogate_bitmask = __mmask16(~saturation_bitmask); - __mmask32 error_surrogate = _mm512_mask_cmpeq_epi32_mask( - saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); - __mmask32 error_too_large = - _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); - if (simdutf_unlikely(error_surrogate || error_too_large)) { - // Need to find the lowest set bit between the two error masks - // Need to also write the partial chunk until the error index to output. - int large_idx = _tzcnt_u32(error_too_large); - int surrogate_idx = _tzcnt_u32(error_surrogate); - err = true; - if (large_idx < surrogate_idx) { - code = error_code::TOO_LARGE; - error_idx = large_idx; - } else { - code = error_code::SURROGATE; - error_idx = surrogate_idx; - } - output_mask &= ((1 << (2 * error_idx)) - 1); - } - __m512i v1, v2, v; - in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); - v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); - v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); - v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); - v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); - v = _mm512_or_si512(v1, v2); - in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); - if (big_endian) { - const __m512i swap_512 = _mm512_set_epi8( - 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, - 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, - 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, - 2, 3, 0, 1); - in = _mm512_shuffle_epi8(in, swap_512); - } - // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (AMD Zen4 has terrible performance with it, it is effectively broken) - __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); - auto written_out = _mm_popcnt_u32(output_mask); - _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), - compressed); - //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); - utf16_output += written_out; - if (simdutf_unlikely(err)) { - return std::make_pair(result(code, buf - start + error_idx), - utf16_output); - } - } - buf += 16; - } - - size_t remaining_len = size_t(end - buf); - if (remaining_len) { - __mmask16 input_mask = __mmask16((1 << remaining_len) - 1); - __m512i in = _mm512_maskz_loadu_epi32(input_mask, buf); - const __mmask16 saturation_bitmask = - _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_ffff0000), v_00000000) & - input_mask; - if (saturation_bitmask == input_mask) { - __mmask32 forbidden_bytemask = - _mm512_cmpeq_epi32_mask(_mm512_and_si512(in, v_f800), v_d800); - __m256i utf16_packed = _mm512_cvtepi32_epi16(in); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, - 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm256_shuffle_epi8(utf16_packed, swap); - } - if (simdutf_unlikely(forbidden_bytemask)) { - int idx = _tzcnt_u32(forbidden_bytemask); - _mm256_mask_storeu_epi16( - utf16_output, __mmask16(_blsmsk_u32(forbidden_bytemask) >> 1), - utf16_packed); - return std::make_pair(result(error_code::SURROGATE, buf - start + idx), - utf16_output + idx); - } - _mm256_mask_storeu_epi16(utf16_output, input_mask, utf16_packed); - utf16_output += remaining_len; - } else { - const __mmask32 output_max_mask = (1 << (remaining_len * 2)) - 1; - __mmask32 output_mask = - (~_pdep_u32(saturation_bitmask, 0xAAAAAAAA)) & output_max_mask; - const __mmask16 surrogate_bitmask = - __mmask16(~saturation_bitmask) & input_mask; - __mmask32 error_surrogate = _mm512_mask_cmpeq_epi32_mask( - saturation_bitmask, _mm512_and_si512(in, v_f800), v_d800); - __mmask32 error_too_large = - _mm512_mask_cmpgt_epu32_mask(surrogate_bitmask, in, v_10ffff); - if (simdutf_unlikely(error_surrogate || error_too_large)) { - int large_idx = _tzcnt_u32(error_too_large); - int surrogate_idx = _tzcnt_u32(error_surrogate); - err = true; - if (large_idx < surrogate_idx) { - code = error_code::TOO_LARGE; - error_idx = large_idx; - } else { - code = error_code::SURROGATE; - error_idx = surrogate_idx; - } - output_mask &= ((1 << (2 * error_idx)) - 1); - } - __m512i v1, v2, v; - in = _mm512_mask_sub_epi32(in, surrogate_bitmask, in, v_10000); - v1 = _mm512_mask_slli_epi32(in, surrogate_bitmask, in, 16); - v1 = _mm512_mask_and_epi32(in, surrogate_bitmask, v1, v_3ff0000); - v2 = _mm512_mask_srli_epi32(in, surrogate_bitmask, in, 10); - v2 = _mm512_mask_and_epi32(in, surrogate_bitmask, v2, v_3ff); - v = _mm512_or_si512(v1, v2); - in = _mm512_mask_add_epi32(in, surrogate_bitmask, v, v_dc00d800); - if (big_endian) { - const __m512i swap_512 = _mm512_set_epi8( - 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, - 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, - 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, - 2, 3, 0, 1); - in = _mm512_shuffle_epi8(in, swap_512); - } - // we deliberately avoid _mm512_mask_compressstoreu_epi16 for portability - // (AMD Zen4 has terrible performance with it, it is effectively broken) - __m512i compressed = _mm512_maskz_compress_epi16(output_mask, in); - auto written_out = _mm_popcnt_u32(output_mask); - _mm512_mask_storeu_epi16(utf16_output, _bzhi_u32(0xFFFFFFFF, written_out), - compressed); - //_mm512_mask_compressstoreu_epi16(utf16_output, output_mask, in); - utf16_output += written_out; - if (simdutf_unlikely(err)) { - return std::make_pair(result(code, buf - start + error_idx), - utf16_output); - } - } - buf += remaining_len; - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); -} -/* end file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_ASCII -/* begin file src/icelake/icelake_ascii_validation.inl.cpp */ -// file included directly - -bool validate_ascii(const char *buf, size_t len) { - const char *end = buf + len; - const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); - __m512i running_or = _mm512_setzero_si512(); - for (; end - buf >= 64; buf += 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)buf); - running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, - 0xf8); // running_or | (utf8 & ascii) - } - if (buf < end) { - const __m512i utf8 = _mm512_maskz_loadu_epi8( - (uint64_t(1) << (end - buf)) - 1, (const __m512i *)buf); - running_or = _mm512_ternarylogic_epi32(running_or, utf8, ascii, - 0xf8); // running_or | (utf8 & ascii) - } - return (_mm512_test_epi8_mask(running_or, running_or) == 0); -} -/* end file src/icelake/icelake_ascii_validation.inl.cpp */ -#endif // SIMDUTF_FEATURE_ASCII -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/icelake/icelake_utf32_validation.inl.cpp */ -// file included directly - -bool validate_utf32(const char32_t *buf, size_t len) { - if (simdutf_unlikely(len == 0)) { - return true; - } - const char32_t *end = buf + len; - - const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); - __m512i currentmax = _mm512_setzero_si512(); - __m512i currentoffsetmax = _mm512_setzero_si512(); - - while (buf < end - 16) { - __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); - buf += 16; - currentoffsetmax = - _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); - currentmax = _mm512_max_epu32(utf32, currentmax); - } - - __m512i utf32 = - _mm512_maskz_loadu_epi32(__mmask16((1 << (end - buf)) - 1), buf); - currentoffsetmax = - _mm512_max_epu32(_mm512_add_epi32(utf32, offset), currentoffsetmax); - currentmax = _mm512_max_epu32(utf32, currentmax); - - const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); - const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); - const auto outside_range = _mm512_cmpgt_epu32_mask(currentmax, standardmax); - if (outside_range != 0) { - return false; - } - - const auto surrogate = - _mm512_cmpgt_epu32_mask(currentoffsetmax, standardoffsetmax); - if (surrogate != 0) { - return false; - } - - return true; -} -/* end file src/icelake/icelake_utf32_validation.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ -// file included directly - -static inline size_t latin1_to_utf8_avx512_vec(__m512i input, size_t input_len, - char *utf8_output, - int mask_output) { - __mmask64 nonascii = _mm512_movepi8_mask(input); - size_t output_size = input_len + (size_t)count_ones(nonascii); - - // Mask to denote whether the byte is a leading byte that is not ascii - __mmask64 sixth = _mm512_cmpge_epu8_mask( - input, _mm512_set1_epi8(-64)); // binary representation of -64: 1100 0000 - - const uint64_t alternate_bits = UINT64_C(0x5555555555555555); - uint64_t ascii = ~nonascii; - // the bits in ascii are inverted and zeros are interspersed in between them - uint64_t maskA = ~_pdep_u64(ascii, alternate_bits); - uint64_t maskB = ~_pdep_u64(ascii >> 32, alternate_bits); - - // interleave bytes from top and bottom halves (abcd...ABCD -> aAbBcCdD) - __m512i input_interleaved = _mm512_permutexvar_epi8( - _mm512_set_epi32(0x3f1f3e1e, 0x3d1d3c1c, 0x3b1b3a1a, 0x39193818, - 0x37173616, 0x35153414, 0x33133212, 0x31113010, - 0x2f0f2e0e, 0x2d0d2c0c, 0x2b0b2a0a, 0x29092808, - 0x27072606, 0x25052404, 0x23032202, 0x21012000), - input); - - // double size of each byte, and insert the leading byte 1100 0010 - - /* - upscale the bytes to 16-bit value, adding the 0b11000000 leading byte in the - process. We adjust for the bytes that have their two most significant bits. - This takes care of the first 32 bytes, assuming we interleaved the bytes. */ - __m512i outputA = - _mm512_shldi_epi16(input_interleaved, _mm512_set1_epi8(-62), 8); - outputA = _mm512_mask_add_epi16( - outputA, (__mmask32)sixth, outputA, - _mm512_set1_epi16(1 - 0x4000)); // 1- 0x4000 = 1100 0000 0000 0001???? - - // in the second 32-bit half, set first or second option based on whether - // original input is leading byte (second case) or not (first case) - __m512i leadingB = - _mm512_mask_blend_epi16((__mmask32)(sixth >> 32), - _mm512_set1_epi16(0x00c2), // 0000 0000 1101 0010 - _mm512_set1_epi16(0x40c3)); // 0100 0000 1100 0011 - __m512i outputB = _mm512_ternarylogic_epi32( - input_interleaved, leadingB, _mm512_set1_epi16((short)0xff00), - (240 & 170) ^ 204); // (input_interleaved & 0xff00) ^ leadingB - - // prune redundant bytes - outputA = _mm512_maskz_compress_epi8(maskA, outputA); - outputB = _mm512_maskz_compress_epi8(maskB, outputB); - - size_t output_sizeA = (size_t)count_ones((uint32_t)nonascii) + 32; - - if (mask_output) { - if (input_len > 32) { // is the second half of the input vector used? - __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_sizeA); - _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA); - utf8_output += output_sizeA; - write_mask = _bzhi_u64(~0ULL, (unsigned int)(output_size - output_sizeA)); - _mm512_mask_storeu_epi8(utf8_output, write_mask, outputB); - } else { - __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_size); - _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA); - } - } else { - _mm512_storeu_si512(utf8_output, outputA); - utf8_output += output_sizeA; - _mm512_storeu_si512(utf8_output, outputB); - } - return output_size; -} - -static inline size_t latin1_to_utf8_avx512_branch(__m512i input, - char *utf8_output) { - __mmask64 nonascii = _mm512_movepi8_mask(input); - if (nonascii) { - return latin1_to_utf8_avx512_vec(input, 64, utf8_output, 0); - } else { - _mm512_storeu_si512(utf8_output, input); - return 64; - } -} - -size_t latin1_to_utf8_avx512_start(const char *buf, size_t len, - char *utf8_output) { - char *start = utf8_output; - size_t pos = 0; - // if there's at least 128 bytes remaining, we don't need to mask the output - for (; pos + 128 <= len; pos += 64) { - __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos)); - utf8_output += latin1_to_utf8_avx512_branch(input, utf8_output); - } - // in the last 128 bytes, the first 64 may require masking the output - if (pos + 64 <= len) { - __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos)); - utf8_output += latin1_to_utf8_avx512_vec(input, 64, utf8_output, 1); - pos += 64; - } - // with the last 64 bytes, the input also needs to be masked - if (pos < len) { - __mmask64 load_mask = _bzhi_u64(~0ULL, (unsigned int)(len - pos)); - __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos)); - utf8_output += latin1_to_utf8_avx512_vec(input, len - pos, utf8_output, 1); - } - return (size_t)(utf8_output - start); -} -/* end file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ -// file included directly -template -size_t icelake_convert_latin1_to_utf16(const char *latin1_input, size_t len, - char16_t *utf16_output) { - size_t rounded_len = len & ~0x1F; // Round down to nearest multiple of 32 - - __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - for (size_t i = 0; i < rounded_len; i += 32) { - // Load 32 Latin1 characters into a 256-bit register - __m256i in = _mm256_loadu_si256((__m256i *)&latin1_input[i]); - // Zero extend each set of 8 Latin1 characters to 32 16-bit integers - __m512i out = _mm512_cvtepu8_epi16(in); - if (big_endian) { - out = _mm512_shuffle_epi8(out, byteflip); - } - // Store the results back to memory - _mm512_storeu_si512((__m512i *)&utf16_output[i], out); - } - if (rounded_len != len) { - uint32_t mask = uint32_t(1 << (len - rounded_len)) - 1; - __m256i in = _mm256_maskz_loadu_epi8(mask, latin1_input + rounded_len); - - // Zero extend each set of 8 Latin1 characters to 32 16-bit integers - __m512i out = _mm512_cvtepu8_epi16(in); - if (big_endian) { - out = _mm512_shuffle_epi8(out, byteflip); - } - // Store the results back to memory - _mm512_mask_storeu_epi16(utf16_output + rounded_len, mask, out); - } - - return len; -} -/* end file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF32 -/* begin file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ -void avx512_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - while (len >= 16) { - // Load 16 Latin1 characters into a 128-bit register - __m128i in = _mm_loadu_si128((__m128i *)buf); - - // Zero extend each set of 8 Latin1 characters to 16 32-bit integers using - // vpmovzxbd - __m512i out = _mm512_cvtepu8_epi32(in); - - // Store the results back to memory - _mm512_storeu_si512((__m512i *)utf32_output, out); - - len -= 16; - buf += 16; - utf32_output += 16; - } - - __mmask16 mask = __mmask16((1 << len) - 1); - __m128i in = _mm_maskz_loadu_epi8(mask, buf); - __m512i out = _mm512_cvtepu8_epi32(in); - _mm512_mask_storeu_epi32((__m512i *)utf32_output, mask, out); -} -/* end file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/icelake/icelake_base64.inl.cpp */ -// file included directly -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ - -struct block64 { - __m512i chunks[1]; -}; - -template -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // credit: Wojciech Muła - const uint8_t *input = (const uint8_t *)src; - - uint8_t *out = (uint8_t *)dst; - static const char *lookup_tbl = - base64_url - ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" - : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - const __m512i shuffle_input = _mm512_setr_epi32( - 0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10, - 0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122, - 0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e); - const __m512i lookup = - _mm512_loadu_si512(reinterpret_cast(lookup_tbl)); - const __m512i multi_shifts = _mm512_set1_epi64(UINT64_C(0x3036242a1016040a)); - size_t size = srclen; - __mmask64 input_mask = 0xffffffffffff; // (1 << 48) - 1 - while (size >= 48) { - const __m512i v = _mm512_maskz_loadu_epi8( - input_mask, reinterpret_cast(input)); - const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); - const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); - const __m512i result = _mm512_permutexvar_epi8(indices, lookup); - _mm512_storeu_si512(reinterpret_cast<__m512i *>(out), result); - out += 64; - input += 48; - size -= 48; - } - input_mask = ((__mmask64)1 << size) - 1; - const __m512i v = _mm512_maskz_loadu_epi8( - input_mask, reinterpret_cast(input)); - const __m512i in = _mm512_permutexvar_epi8(shuffle_input, v); - const __m512i indices = _mm512_multishift_epi64_epi8(multi_shifts, in); - bool padding_needed = - (((options & base64_url) == 0) ^ - ((options & base64_reverse_padding) == base64_reverse_padding)); - size_t padding_amount = ((size % 3) > 0) ? (3 - (size % 3)) : 0; - size_t output_len = ((size + 2) / 3) * 4; - size_t non_padded_output_len = output_len - padding_amount; - if (!padding_needed) { - output_len = non_padded_output_len; - } - __mmask64 output_mask = output_len == 64 ? (__mmask64)UINT64_MAX - : ((__mmask64)1 << output_len) - 1; - __m512i result = _mm512_mask_permutexvar_epi8( - _mm512_set1_epi8('='), ((__mmask64)1 << non_padded_output_len) - 1, - indices, lookup); - _mm512_mask_storeu_epi8(reinterpret_cast<__m512i *>(out), output_mask, - result); - return (size_t)(out - (uint8_t *)dst) + output_len; -} - -template -static inline uint64_t to_base64_mask(block64 *b, uint64_t *error, - uint64_t input_mask = UINT64_MAX) { - __m512i input = b->chunks[0]; - const __m512i ascii_space_tbl = _mm512_set_epi8( - 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, - 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, - 0, 0, 32, 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32); - __m512i lookup0; - if (base64_url) { - lookup0 = _mm512_set_epi8( - -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, - 52, -128, -128, 62, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, - -128, -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -1); - } else { - lookup0 = _mm512_set_epi8( - -128, -128, -128, -128, -128, -128, 61, 60, 59, 58, 57, 56, 55, 54, 53, - 52, 63, -128, -128, -128, 62, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -1, -128, -128, -128, -128, -128, -128, -128, -128, - -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -1, -128, - -128, -1, -1, -128, -128, -128, -128, -128, -128, -128, -128, -128); - } - __m512i lookup1; - if (base64_url) { - lookup1 = _mm512_set_epi8( - -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, - 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, - 63, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, - 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); - } else { - lookup1 = _mm512_set_epi8( - -128, -128, -128, -128, -128, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, - 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, -128, - -128, -128, -128, -128, -128, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -128); - } - - const __m512i translated = _mm512_permutex2var_epi8(lookup0, input, lookup1); - const __m512i combined = _mm512_or_si512(translated, input); - const __mmask64 mask = _mm512_movepi8_mask(combined) & input_mask; - if (!ignore_garbage && mask) { - const __mmask64 spaces = - _mm512_cmpeq_epi8_mask(_mm512_shuffle_epi8(ascii_space_tbl, input), - input) & - input_mask; - *error = (mask ^ spaces); - } - b->chunks[0] = translated; - - return mask | (~input_mask); -} - -static inline void copy_block(block64 *b, char *output) { - _mm512_storeu_si512(reinterpret_cast<__m512i *>(output), b->chunks[0]); -} - -static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { - uint64_t nmask = ~mask; - __m512i c = _mm512_maskz_compress_epi8(nmask, b->chunks[0]); - _mm512_storeu_si512(reinterpret_cast<__m512i *>(output), c); - return _mm_popcnt_u64(nmask); -} - -// The caller of this function is responsible to ensure that there are 64 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = _mm512_loadu_si512(reinterpret_cast(src)); -} - -static inline void load_block_partial(block64 *b, const char *src, - __mmask64 input_mask) { - b->chunks[0] = _mm512_maskz_loadu_epi8( - input_mask, reinterpret_cast(src)); -} - -// The caller of this function is responsible to ensure that there are 128 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char16_t *src) { - __m512i m1 = _mm512_loadu_si512(reinterpret_cast(src)); - __m512i m2 = _mm512_loadu_si512(reinterpret_cast(src + 32)); - __m512i p = _mm512_packus_epi16(m1, m2); - b->chunks[0] = - _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), p); -} - -static inline void load_block_partial(block64 *b, const char16_t *src, - __mmask64 input_mask) { - __m512i m1 = _mm512_maskz_loadu_epi16((__mmask32)input_mask, - reinterpret_cast(src)); - __m512i m2 = - _mm512_maskz_loadu_epi16((__mmask32)(input_mask >> 32), - reinterpret_cast(src + 32)); - __m512i p = _mm512_packus_epi16(m1, m2); - b->chunks[0] = - _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), p); -} - -static inline void base64_decode(char *out, __m512i str) { - const __m512i merge_ab_and_bc = - _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140)); - const __m512i merged = - _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); - const __m512i pack = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 61, 62, 56, 57, 58, - 52, 53, 54, 48, 49, 50, 44, 45, 46, 40, 41, 42, 36, 37, 38, 32, 33, 34, - 28, 29, 30, 24, 25, 26, 20, 21, 22, 16, 17, 18, 12, 13, 14, 8, 9, 10, 4, - 5, 6, 0, 1, 2); - const __m512i shuffled = _mm512_permutexvar_epi8(pack, merged); - _mm512_mask_storeu_epi8( - (__m512i *)out, 0xffffffffffff, - shuffled); // mask would be 0xffffffffffff since we write 48 bytes. -} -// decode 64 bytes and output 48 bytes -static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, - _mm512_loadu_si512(reinterpret_cast(src))); -} -static inline void base64_decode_block(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); -} - -template -full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - (void)options; - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - size_t equalsigns = 0; - // skip trailing spaces - while (!ignore_garbage && srclen > 0 && - scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - const chartype *const srcinit = src; - const char *const dstinit = dst; - const chartype *const srcend = src + srclen; - - // figure out why block_size == 2 is sometimes best??? - constexpr size_t block_size = 6; - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b; - load_block(&b, src); - src += 64; - uint64_t error = 0; - uint64_t badcharmask = - to_base64_mask(&b, &error); - if (!ignore_garbage && error) { - src -= 64; - size_t error_offset = _tzcnt_u64(error); - return {error_code::INVALID_BASE64_CHARACTER, - size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; - } - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); - } else if (bufferptr != buffer) { - copy_block(&b, bufferptr); - bufferptr += 64; - } else { - base64_decode_block(dst, &b); - dst += 48; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 1); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } - } - - int last_block_len = (int)(srcend - src); - if (last_block_len != 0) { - __mmask64 input_mask = ((__mmask64)1 << last_block_len) - 1; - block64 b; - load_block_partial(&b, src, input_mask); - uint64_t error = 0; - uint64_t badcharmask = - to_base64_mask(&b, &error, input_mask); - if (!ignore_garbage && error) { - size_t error_offset = _tzcnt_u64(error); - return {error_code::INVALID_BASE64_CHARACTER, - size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; - } - src += last_block_len; - bufferptr += compress_block(&b, badcharmask, bufferptr); - } - - char *buffer_start = buffer; - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - base64_decode_block(dst, buffer_start); - dst += 48; - } - - if ((bufferptr - buffer_start) != 0) { - size_t rem = (bufferptr - buffer_start); - int idx = rem % 4; - __mmask64 mask = ((__mmask64)1 << rem) - 1; - __m512i input = _mm512_maskz_loadu_epi8(mask, buffer_start); - size_t output_len = (rem / 4) * 3; - __mmask64 output_mask = mask >> (rem - output_len); - const __m512i merge_ab_and_bc = - _mm512_maddubs_epi16(input, _mm512_set1_epi32(0x01400140)); - const __m512i merged = - _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); - const __m512i pack = _mm512_set_epi8( - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 61, 62, 56, 57, 58, - 52, 53, 54, 48, 49, 50, 44, 45, 46, 40, 41, 42, 36, 37, 38, 32, 33, 34, - 28, 29, 30, 24, 25, 26, 20, 21, 22, 16, 17, 18, 12, 13, 14, 8, 9, 10, 4, - 5, 6, 0, 1, 2); - const __m512i shuffled = _mm512_permutexvar_epi8(pack, merged); - - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::strict && - (idx != 1) && ((idx + equalsigns) & 3) != 0) { - // The partial chunk was at src - idx - _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); - dst += output_len; - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else if (!ignore_garbage && - last_chunk_options == - last_chunk_handling_options::stop_before_partial && - (idx != 1) && ((idx + equalsigns) & 3) != 0) { - // Rewind src to before partial chunk - _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); - dst += output_len; - src -= idx; - } else { - if (idx == 2) { - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::strict) { - uint32_t triple = (uint32_t(bufferptr[-2]) << 3 * 6) + - (uint32_t(bufferptr[-1]) << 2 * 6); - if (triple & 0xffff) { - _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); - dst += output_len; - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - } - output_mask = (output_mask << 1) | 1; - output_len += 1; - _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); - dst += output_len; - } else if (idx == 3) { - if (!ignore_garbage && - last_chunk_options == last_chunk_handling_options::strict) { - uint32_t triple = (uint32_t(bufferptr[-3]) << 3 * 6) + - (uint32_t(bufferptr[-2]) << 2 * 6) + - (uint32_t(bufferptr[-1]) << 1 * 6); - if (triple & 0xff) { - _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); - dst += output_len; - return {BASE64_EXTRA_BITS, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - } - output_mask = (output_mask << 2) | 3; - output_len += 2; - _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); - dst += output_len; - } else if (!ignore_garbage && idx == 1) { - _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); - dst += output_len; - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } else { - _mm512_mask_storeu_epi8((__m512i *)dst, output_mask, shuffled); - dst += output_len; - } - } - - if (!ignore_garbage && last_chunk_options != stop_before_partial && - equalsigns > 0) { - size_t output_count = size_t(dst - dstinit); - if ((output_count % 3 == 0) || - ((output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, output_count}; - } - } - - return {SUCCESS, srclen, size_t(dst - dstinit)}; - } - - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)}; - } - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } - } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} -/* end file src/icelake/icelake_base64.inl.cpp */ -#endif // SIMDUTF_FEATURE_BASE64 - -#include - -} // namespace -} // namespace icelake -} // namespace simdutf - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/generic/utf32.h */ -#include - -namespace simdutf { -namespace icelake { -namespace { -namespace utf32 { - -template T min(T a, T b) { return a <= b ? a : b; } - -simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, - size_t length) { - using vector_u32 = simd32; - - const char32_t *start = input; - - // we add up to three ones in a single iteration (see the vectorized loop in - // section #2 below) - const size_t max_increment = 3; - - const size_t N = vector_u32::ELEMENTS; - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - const auto v_0000007f = vector_u32::splat(0x0000007f); - const auto v_000007ff = vector_u32::splat(0x000007ff); - const auto v_0000ffff = vector_u32::splat(0x0000ffff); -#else - const auto v_ffffff80 = vector_u32::splat(0xffffff80); - const auto v_fffff800 = vector_u32::splat(0xfffff800); - const auto v_ffff0000 = vector_u32::splat(0xffff0000); - const auto one = vector_u32::splat(1); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - size_t counter = 0; - - // 1. vectorized loop unrolled 4 times - { - // we use vector of uint32 counters, this is why this limit is used - const size_t max_iterations = - std::numeric_limits::max() / (max_increment * 4); - size_t blocks = length / (N * 4); - length -= blocks * (N * 4); - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - simd32 acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in0 = vector_u32(input + 0 * N); - const auto in1 = vector_u32(input + 1 * N); - const auto in2 = vector_u32(input + 2 * N); - const auto in3 = vector_u32(input + 3 * N); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in0 > v_0000007f); - acc -= as_vector_u32(in1 > v_0000007f); - acc -= as_vector_u32(in2 > v_0000007f); - acc -= as_vector_u32(in3 > v_0000007f); - - acc -= as_vector_u32(in0 > v_000007ff); - acc -= as_vector_u32(in1 > v_000007ff); - acc -= as_vector_u32(in2 > v_000007ff); - acc -= as_vector_u32(in3 > v_000007ff); - - acc -= as_vector_u32(in0 > v_0000ffff); - acc -= as_vector_u32(in1 > v_0000ffff); - acc -= as_vector_u32(in2 > v_0000ffff); - acc -= as_vector_u32(in3 > v_0000ffff); -#else - acc += min(one, in0 & v_ffffff80); - acc += min(one, in1 & v_ffffff80); - acc += min(one, in2 & v_ffffff80); - acc += min(one, in3 & v_ffffff80); - - acc += min(one, in0 & v_fffff800); - acc += min(one, in1 & v_fffff800); - acc += min(one, in2 & v_fffff800); - acc += min(one, in3 & v_fffff800); - - acc += min(one, in0 & v_ffff0000); - acc += min(one, in1 & v_ffff0000); - acc += min(one, in2 & v_ffff0000); - acc += min(one, in3 & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += 4 * N; - } - - counter += acc.sum(); - } - } - - // 2. vectorized loop for tail - { - const size_t max_iterations = - std::numeric_limits::max() / max_increment; - size_t blocks = length / N; - length -= blocks * N; - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - auto acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in = vector_u32(input); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in > v_0000007f); - acc -= as_vector_u32(in > v_000007ff); - acc -= as_vector_u32(in > v_0000ffff); -#else - acc += min(one, in & v_ffffff80); - acc += min(one, in & v_fffff800); - acc += min(one, in & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += N; - } - - counter += acc.sum(); - } - } - - const size_t consumed = input - start; - if (consumed != 0) { - // We don't count 0th bytes in the vectorized loops above, this - // is why we need to count them in the end. - counter += consumed; - } - - return counter + scalar::utf32::utf8_length_from_utf32(input, length); -} - -} // namespace utf32 -} // unnamed namespace -} // namespace icelake -} // namespace simdutf -/* end file src/generic/utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -namespace simdutf { -namespace icelake { - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - - int out = 0; - uint32_t utf16_err = (length % 2); - uint32_t utf32_err = (length % 4); - uint32_t ends_with_high = 0; - avx512_utf8_checker checker{}; - const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000); - __m512i currentmax = _mm512_setzero_si512(); - __m512i currentoffsetmax = _mm512_setzero_si512(); - const char *ptr = input; - const char *end = ptr + length; - for (; end - ptr >= 64; ptr += 64) { - // utf8 checks - const __m512i data = _mm512_loadu_si512((const __m512i *)ptr); - checker.check_next_input(data); - - // utf16le_checks - __m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates); - ends_with_high = ((highsurrogates & 0x80000000) != 0); - - // utf32le checks - currentoffsetmax = - _mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax); - currentmax = _mm512_max_epu32(data, currentmax); - } - - // last block with 0 <= len < 64 - __mmask64 read_mask = (__mmask64(1) << (end - ptr)) - 1; - const __m512i data = _mm512_maskz_loadu_epi8(read_mask, (const __m512i *)ptr); - checker.check_next_input(data); - - __m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates); - - currentoffsetmax = - _mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax); - currentmax = _mm512_max_epu32(data, currentmax); - - const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff); - const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff); - __m512i is_zero = - _mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax); - utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0); - is_zero = _mm512_xor_si512( - _mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); - utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0); - checker.check_eof(); - bool is_valid_utf8 = !checker.errors(); - if (is_valid_utf8) { - out |= encoding_type::UTF8; - } - if (utf16_err == 0) { - out |= encoding_type::UTF16_LE; - } - if (utf32_err == 0) { - out |= encoding_type::UTF32_LE; - } - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return true; - } - avx512_utf8_checker checker{}; - const char *ptr = buf; - const char *end = ptr + len; - for (; end - ptr >= 64; ptr += 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - checker.check_next_input(utf8); - } - if (end != ptr) { - const __m512i utf8 = _mm512_maskz_loadu_epi8( - ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr); - checker.check_next_input(utf8); - } - checker.check_eof(); - return !checker.errors(); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, len); - } - avx512_utf8_checker checker{}; - const char *ptr = buf; - const char *end = ptr + len; - size_t count{0}; - for (; end - ptr >= 64; ptr += 64) { - const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr); - checker.check_next_input(utf8); - if (checker.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(buf), - reinterpret_cast(buf + count), len - count); - res.count += count; - return res; - } - count += 64; - } - if (end != ptr) { - const __m512i utf8 = _mm512_maskz_loadu_epi8( - ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr); - checker.check_next_input(utf8); - } - checker.check_eof(); - if (checker.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(buf), - reinterpret_cast(buf + count), len - count); - res.count += count; - return res; - } - return result(error_code::SUCCESS, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return icelake::validate_ascii(buf, len); -} - -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - const char *buf_orig = buf; - const char *end = buf + len; - const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80); - for (; end - buf >= 64; buf += 64) { - const __m512i input = _mm512_loadu_si512((const __m512i *)buf); - __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT); - if (notascii) { - return result(error_code::TOO_LARGE, - buf - buf_orig + _tzcnt_u64(notascii)); - } - } - if (end != buf) { - const __m512i input = _mm512_maskz_loadu_epi8( - ~UINT64_C(0) >> (64 - (end - buf)), (const __m512i *)buf); - __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT); - if (notascii) { - return result(error_code::TOO_LARGE, - buf - buf_orig + _tzcnt_u64(notascii)); - } - } - return result(error_code::SUCCESS, len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - const char16_t *end = buf + len; - - for (; end - buf >= 32;) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - return false; - } - bool ends_with_high = ((highsurrogates & 0x80000000) != 0); - if (ends_with_high) { - buf += 31; // advance only by 31 code units so that we start with the - // high surrogate on the next round. - } else { - buf += 32; - } - } else { - buf += 32; - } - } - if (buf < end) { - __m512i in = - _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - return false; - } - } - } - return true; -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - const char16_t *end = buf + len; - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - for (; end - buf >= 32;) { - __m512i in = - _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)buf), byteflip); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - return false; - } - bool ends_with_high = ((highsurrogates & 0x80000000) != 0); - if (ends_with_high) { - buf += 31; // advance only by 31 code units so that we start with the - // high surrogate on the next round. - } else { - buf += 32; - } - } else { - buf += 32; - } - } - if (buf < end) { - __m512i in = _mm512_shuffle_epi8( - _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), - byteflip); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - return false; - } - } - } - return true; -} - -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - const char16_t *start_buf = buf; - const char16_t *end = buf + len; - for (; end - buf >= 32;) { - __m512i in = _mm512_loadu_si512((__m512i *)buf); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); - uint32_t extra_high = - _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); - return result(error_code::SURROGATE, - (buf - start_buf) + - (extra_low < extra_high ? extra_low : extra_high)); - } - bool ends_with_high = ((highsurrogates & 0x80000000) != 0); - if (ends_with_high) { - buf += 31; // advance only by 31 code units so that we start with the - // high surrogate on the next round. - } else { - buf += 32; - } - } else { - buf += 32; - } - } - if (buf < end) { - __m512i in = - _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); - uint32_t extra_high = - _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); - return result(error_code::SURROGATE, - (buf - start_buf) + - (extra_low < extra_high ? extra_low : extra_high)); - } - } - } - return result(error_code::SUCCESS, len); -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - const char16_t *start_buf = buf; - const char16_t *end = buf + len; - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - for (; end - buf >= 32;) { - __m512i in = - _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)buf), byteflip); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); - uint32_t extra_high = - _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); - return result(error_code::SURROGATE, - (buf - start_buf) + - (extra_low < extra_high ? extra_low : extra_high)); - } - bool ends_with_high = ((highsurrogates & 0x80000000) != 0); - if (ends_with_high) { - buf += 31; // advance only by 31 code units so that we start with the - // high surrogate on the next round. - } else { - buf += 32; - } - } else { - buf += 32; - } - } - if (buf < end) { - __m512i in = _mm512_shuffle_epi8( - _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), - byteflip); - __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800))); - __mmask32 surrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800))); - if (surrogates) { - __mmask32 highsurrogates = - _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400))); - __mmask32 lowsurrogates = surrogates ^ highsurrogates; - // high must be followed by low - if ((highsurrogates << 1) != lowsurrogates) { - uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1)); - uint32_t extra_high = - _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1)); - return result(error_code::SURROGATE, - (buf - start_buf) + - (extra_low < extra_high ? extra_low : extra_high)); - } - } - } - return result(error_code::SUCCESS, len); -} - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return utf16fix_avx512(input, len, output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return utf16fix_avx512(input, len, output); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - return icelake::validate_utf32(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - const char32_t *buf_orig = buf; - if (len >= 16) { - const char32_t *end = buf + len - 16; - while (buf <= end) { - __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf); - __mmask16 outside_range = _mm512_cmp_epu32_mask( - utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT); - - __m512i utf32_off = - _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); - - __mmask16 surrogate_range = _mm512_cmp_epu32_mask( - utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT); - if ((outside_range | surrogate_range)) { - auto outside_idx = _tzcnt_u32(outside_range); - auto surrogate_idx = _tzcnt_u32(surrogate_range); - - if (outside_idx < surrogate_idx) { - return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); - } - - return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); - } - - buf += 16; - } - } - if (len > 0) { - __m512i utf32 = _mm512_maskz_loadu_epi32( - __mmask16((1U << (buf_orig + len - buf)) - 1), (const __m512i *)buf); - __mmask16 outside_range = _mm512_cmp_epu32_mask( - utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT); - __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000)); - - __mmask16 surrogate_range = _mm512_cmp_epu32_mask( - utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT); - if ((outside_range | surrogate_range)) { - auto outside_idx = _tzcnt_u32(outside_range); - auto surrogate_idx = _tzcnt_u32(surrogate_range); - - if (outside_idx < surrogate_idx) { - return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx); - } - - return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx); - } - } - - return result(error_code::SUCCESS, len); -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return icelake_convert_latin1_to_utf16(buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return icelake_convert_latin1_to_utf16(buf, len, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - avx512_convert_latin1_to_utf32(buf, len, utf32_output); - return len; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return icelake::utf8_to_latin1_avx512(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - // First, try to convert as much as possible using the SIMD implementation. - const char *obuf = buf; - char *olatin1_output = latin1_output; - size_t written = icelake::utf8_to_latin1_avx512(obuf, len, olatin1_output); - - // If we have completely converted the string - if (obuf == buf + len) { - return {simdutf::SUCCESS, written}; - } - size_t pos = obuf - buf; - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, buf + pos, len - pos, latin1_output); - res.count += pos; - return res; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16_result ret = - fast_avx512_convert_utf8_to_utf16(buf, len, - utf16_output); - if (ret.second == nullptr) { - return 0; - } - return ret.second - utf16_output; -} - -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16( - buf, len, utf16_output); - if (ret.second == nullptr) { - return 0; - } - return ret.second - utf16_output; -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return fast_avx512_convert_utf8_to_utf16_with_errors( - buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return fast_avx512_convert_utf8_to_utf16_with_errors( - buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16_result ret = - icelake::valid_utf8_to_fixed_length( - buf, len, utf16_output); - size_t saved_bytes = ret.second - utf16_output; - const char *end = buf + len; - if (ret.first == end) { - return saved_bytes; - } - - // Note: AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outsiede 16-byte window. - // It meas, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { - ret.first += 1; - } - - if (ret.first != end) { - const size_t scalar_saved_bytes = - scalar::utf8_to_utf16::convert_valid( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16_result ret = - icelake::valid_utf8_to_fixed_length( - buf, len, utf16_output); - size_t saved_bytes = ret.second - utf16_output; - const char *end = buf + len; - if (ret.first == end) { - return saved_bytes; - } - - // Note: AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outsiede 16-byte window. - // It meas, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { - ret.first += 1; - } - - if (ret.first != end) { - const size_t scalar_saved_bytes = - scalar::utf8_to_utf16::convert_valid( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - - return saved_bytes; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_out) const noexcept { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - utf8_to_utf32_result ret = - icelake::validating_utf8_to_fixed_length( - buf, len, utf32_output); - if (ret.second == nullptr) - return 0; - - size_t saved_bytes = ret.second - utf32_output; - const char *end = buf + len; - if (ret.first == end) { - return saved_bytes; - } - - // Note: the AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outside 16-byte window. - // It means, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { - ret.first += 1; - } - if (ret.first != end) { - const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert( - ret.first, len - (ret.first - buf), utf32_out + saved_bytes); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32) const noexcept { - if (simdutf_unlikely(len == 0)) { - return {error_code::SUCCESS, 0}; - } - uint32_t *utf32_output = reinterpret_cast(utf32); - auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks< - endianness::LITTLE, uint32_t>(buf, len, utf32_output); - - if (!std::get<2>(ret)) { - size_t pos = std::get<0>(ret) - buf; - // We might have an error that occurs right before pos. - // This is only a concern if buf[pos] is not a continuation byte. - if ((buf[pos] & 0xc0) != 0x80 && pos >= 64) { - pos -= 1; - } else if ((buf[pos] & 0xc0) == 0x80 && pos >= 64) { - // We must check whether we are the fourth continuation byte - bool c1 = (buf[pos - 1] & 0xc0) == 0x80; - bool c2 = (buf[pos - 2] & 0xc0) == 0x80; - bool c3 = (buf[pos - 3] & 0xc0) == 0x80; - if (c1 && c2 && c3) { - return {simdutf::TOO_LONG, pos}; - } - } - // todo: we reset the output to utf32 instead of using std::get<2.(ret) as - // you'd expect. that is because - // validating_utf8_to_fixed_length_with_constant_checks may have processed - // data beyond the error. - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, buf + pos, len - pos, utf32); - res.count += pos; - return res; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - const char *end = buf + len; - if (std::get<0>(ret) == end) { - return {simdutf::SUCCESS, saved_bytes}; - } - - // Note: the AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outside 16-byte window. - // It means, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (std::get<0>(ret) != end and - ((uint8_t(*std::get<0>(ret)) & 0xc0) == 0x80)) { - std::get<0>(ret) += 1; - } - - if (std::get<0>(ret) != end) { - auto scalar_result = scalar::utf8_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), - reinterpret_cast(utf32_output) + saved_bytes); - if (scalar_result.error != simdutf::SUCCESS) { - scalar_result.count += (std::get<0>(ret) - buf); - } else { - scalar_result.count += saved_bytes; - } - return scalar_result; - } - - return {simdutf::SUCCESS, size_t(std::get<1>(ret) - utf32_output)}; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_out) const noexcept { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - utf8_to_utf32_result ret = - icelake::valid_utf8_to_fixed_length( - buf, len, utf32_output); - size_t saved_bytes = ret.second - utf32_output; - const char *end = buf + len; - if (ret.first == end) { - return saved_bytes; - } - - // Note: AVX512 procedure looks up 4 bytes forward, and - // correctly converts multi-byte chars even if their - // continuation bytes lie outsiede 16-byte window. - // It meas, we have to skip continuation bytes from - // the beginning ret.first, as they were already consumed. - while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) { - ret.first += 1; - } - - if (ret.first != end) { - const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert_valid( - ret.first, len - (ret.first - buf), utf32_out + saved_bytes); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - - return saved_bytes; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf16_to_latin1(buf, len, - latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf16_to_latin1(buf, len, - latin1_output); -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output) - .first; -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output) - .first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement custom function - return convert_utf16be_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement custom function - return convert_utf16le_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - size_t outlen; - size_t inlen = utf16_to_utf8_avx512i( - buf, len, (unsigned char *)utf8_output, &outlen); - if (inlen != len) { - return 0; - } - return outlen; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - size_t outlen; - size_t inlen = utf16_to_utf8_avx512i( - buf, len, (unsigned char *)utf8_output, &outlen); - if (inlen != len) { - return 0; - } - return outlen; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - size_t outlen; - size_t inlen = utf16_to_utf8_avx512i( - buf, len, (unsigned char *)utf8_output, &outlen); - if (inlen != len) { - result res = scalar::utf16_to_utf8::convert_with_errors( - buf + inlen, len - inlen, utf8_output + outlen); - res.count += inlen; - return res; - } - return {simdutf::SUCCESS, outlen}; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - size_t outlen; - size_t inlen = utf16_to_utf8_avx512i( - buf, len, (unsigned char *)utf8_output, &outlen); - if (inlen != len) { - result res = scalar::utf16_to_utf8::convert_with_errors( - buf + inlen, len - inlen, utf8_output + outlen); - res.count += inlen; - return res; - } - return {simdutf::SUCCESS, outlen}; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf32_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf32_to_latin1_with_errors(buf, len, latin1_output) - .first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return icelake_convert_utf32_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - avx512_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - icelake::avx512_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf32_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - avx512_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - avx512_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - avx512_convert_utf32_to_utf16_with_errors( - buf, len, utf16_output); - if (ret.first.error) { - return ret.first; - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - avx512_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.error) { - return ret.first; - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16le(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16be(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, - utf32_output); - if (!std::get<2>(ret)) { - return 0; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, utf32_output); - if (!std::get<2>(ret)) { - return 0; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, - utf32_output); - if (!std::get<2>(ret)) { - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - scalar_res.count += (std::get<0>(ret) - buf); - return scalar_res; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_res.error) { - scalar_res.count += (std::get<0>(ret) - buf); - return scalar_res; - } else { - scalar_res.count += saved_bytes; - return scalar_res; - } - } - return simdutf::result(simdutf::SUCCESS, saved_bytes); -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, utf32_output); - if (!std::get<2>(ret)) { - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - scalar_res.count += (std::get<0>(ret) - buf); - return scalar_res; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_res.error) { - scalar_res.count += (std::get<0>(ret) - buf); - return scalar_res; - } else { - scalar_res.count += saved_bytes; - return scalar_res; - } - } - return simdutf::result(simdutf::SUCCESS, saved_bytes); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, - utf32_output); - if (!std::get<2>(ret)) { - return 0; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::tuple ret = - icelake::convert_utf16_to_utf32(buf, len, utf32_output); - if (!std::get<2>(ret)) { - return 0; - } - size_t saved_bytes = std::get<1>(ret) - utf32_output; - if (std::get<0>(ret) != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret)); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - size_t pos = 0; - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - while (pos + 32 <= length) { - __m512i utf16 = _mm512_loadu_si512((const __m512i *)(input + pos)); - utf16 = _mm512_shuffle_epi8(utf16, byteflip); - _mm512_storeu_si512(output + pos, utf16); - pos += 32; - } - if (pos < length) { - __mmask32 m((1U << (length - pos)) - 1); - __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i *)(input + pos)); - utf16 = _mm512_shuffle_epi8(utf16, byteflip); - _mm512_mask_storeu_epi16(output + pos, m, utf16); - } -} - -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - const char16_t *ptr = input; - size_t count{0}; - - if (length >= 32) { - const char16_t *end = input + length - 32; - - const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00); - const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff); - - while (ptr <= end) { - __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr); - ptr += 32; - uint64_t not_high_surrogate = - static_cast(_mm512_cmpgt_epu16_mask(utf16, high) | - _mm512_cmplt_epu16_mask(utf16, low)); - count += count_ones(not_high_surrogate); - } - } - - return count + scalar::utf16::count_code_points( - ptr, length - (ptr - input)); -} - -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - const char16_t *ptr = input; - size_t count{0}; - if (length >= 32) { - - const char16_t *end = input + length - 32; - - const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00); - const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff); - - const __m512i byteflip = _mm512_setr_epi64( - 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001, - 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, - 0x0607040502030001, 0x0e0f0c0d0a0b0809); - while (ptr <= end) { - __m512i utf16 = - _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)ptr), byteflip); - ptr += 32; - uint64_t not_high_surrogate = - static_cast(_mm512_cmpgt_epu16_mask(utf16, high) | - _mm512_cmplt_epu16_mask(utf16, low)); - count += count_ones(not_high_surrogate); - } - } - - return count + scalar::utf16::count_code_points( - ptr, length - (ptr - input)); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - const uint8_t *str = reinterpret_cast(input); - size_t answer = - length / sizeof(__m512i) * - sizeof(__m512i); // Number of 512-bit chunks that fits into the length. - size_t i = 0; - __m512i unrolled_popcount{0}; - - const __m512i continuation = _mm512_set1_epi8(char(0b10111111)); - - while (i + sizeof(__m512i) <= length) { - size_t iterations = (length - i) / sizeof(__m512i); - - size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i); - for (; i + 8 * sizeof(__m512i) <= max_i; i += 8 * sizeof(__m512i)) { - __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i)); - __m512i input2 = - _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i))); - __m512i input3 = - _mm512_loadu_si512((const __m512i *)(str + i + 2 * sizeof(__m512i))); - __m512i input4 = - _mm512_loadu_si512((const __m512i *)(str + i + 3 * sizeof(__m512i))); - __m512i input5 = - _mm512_loadu_si512((const __m512i *)(str + i + 4 * sizeof(__m512i))); - __m512i input6 = - _mm512_loadu_si512((const __m512i *)(str + i + 5 * sizeof(__m512i))); - __m512i input7 = - _mm512_loadu_si512((const __m512i *)(str + i + 6 * sizeof(__m512i))); - __m512i input8 = - _mm512_loadu_si512((const __m512i *)(str + i + 7 * sizeof(__m512i))); - - __mmask64 mask1 = _mm512_cmple_epi8_mask(input1, continuation); - __mmask64 mask2 = _mm512_cmple_epi8_mask(input2, continuation); - __mmask64 mask3 = _mm512_cmple_epi8_mask(input3, continuation); - __mmask64 mask4 = _mm512_cmple_epi8_mask(input4, continuation); - __mmask64 mask5 = _mm512_cmple_epi8_mask(input5, continuation); - __mmask64 mask6 = _mm512_cmple_epi8_mask(input6, continuation); - __mmask64 mask7 = _mm512_cmple_epi8_mask(input7, continuation); - __mmask64 mask8 = _mm512_cmple_epi8_mask(input8, continuation); - - __m512i mask_register = _mm512_set_epi64(mask8, mask7, mask6, mask5, - mask4, mask3, mask2, mask1); - - unrolled_popcount = _mm512_add_epi64(unrolled_popcount, - _mm512_popcnt_epi64(mask_register)); - } - - for (; i <= max_i; i += sizeof(__m512i)) { - __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i)); - uint64_t continuation_bitmask = static_cast( - _mm512_cmple_epi8_mask(more_input, continuation)); - answer -= count_ones(continuation_bitmask); - } - } - - answer -= _mm512_reduce_add_epi64(unrolled_popcount); - - return answer + scalar::utf8::count_code_points( - reinterpret_cast(str + i), length - i); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return icelake_utf8_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return icelake_utf8_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return implementation::count_utf16le(input, length); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return implementation::count_utf16be(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - const uint8_t *str = reinterpret_cast(input); - size_t answer = length / sizeof(__m512i) * sizeof(__m512i); - size_t i = 0; - if (answer >= 2048) { // long strings optimization - unsigned char v_0xFF = 0xff; - __m512i eight_64bits = _mm512_setzero_si512(); - while (i + sizeof(__m512i) <= length) { - __m512i runner = _mm512_setzero_si512(); - size_t iterations = (length - i) / sizeof(__m512i); - if (iterations > 255) { - iterations = 255; - } - size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i); - for (; i + 4 * sizeof(__m512i) <= max_i; i += 4 * sizeof(__m512i)) { - // Load four __m512i vectors - __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i)); - __m512i input2 = - _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i))); - __m512i input3 = _mm512_loadu_si512( - (const __m512i *)(str + i + 2 * sizeof(__m512i))); - __m512i input4 = _mm512_loadu_si512( - (const __m512i *)(str + i + 3 * sizeof(__m512i))); - - // Generate four masks - __mmask64 mask1 = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input1); - __mmask64 mask2 = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input2); - __mmask64 mask3 = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input3); - __mmask64 mask4 = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input4); - // Apply the masks and subtract from the runner - __m512i not_ascii1 = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask1, v_0xFF); - __m512i not_ascii2 = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask2, v_0xFF); - __m512i not_ascii3 = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask3, v_0xFF); - __m512i not_ascii4 = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask4, v_0xFF); - - runner = _mm512_sub_epi8(runner, not_ascii1); - runner = _mm512_sub_epi8(runner, not_ascii2); - runner = _mm512_sub_epi8(runner, not_ascii3); - runner = _mm512_sub_epi8(runner, not_ascii4); - } - - for (; i <= max_i; i += sizeof(__m512i)) { - __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i)); - - __mmask64 mask = - _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), more_input); - __m512i not_ascii = - _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask, v_0xFF); - runner = _mm512_sub_epi8(runner, not_ascii); - } - - eight_64bits = _mm512_add_epi64( - eight_64bits, _mm512_sad_epu8(runner, _mm512_setzero_si512())); - } - - answer += _mm512_reduce_add_epi64(eight_64bits); - } else if (answer > 0) { - for (; i + sizeof(__m512i) <= length; i += sizeof(__m512i)) { - __m512i latin = _mm512_loadu_si512((const __m512i *)(str + i)); - uint64_t non_ascii = _mm512_movepi8_mask(latin); - answer += count_ones(non_ascii); - } - } - return answer + scalar::latin1::utf8_length_from_latin1( - reinterpret_cast(str + i), length - i); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - size_t pos = 0; - - // UTF-16 char length based on the four most significant bits of UTF-8 bytes - const __m128i utf8_length_128 = _mm_setr_epi8( - // ASCII chars - /* 0000 */ 1, - /* 0001 */ 1, - /* 0010 */ 1, - /* 0011 */ 1, - /* 0100 */ 1, - /* 0101 */ 1, - /* 0110 */ 1, - /* 0111 */ 1, - - // continutation bytes - /* 1000 */ 0, - /* 1001 */ 0, - /* 1010 */ 0, - /* 1011 */ 0, - - // leading bytes - /* 1100 */ 1, // 2-byte UTF-8 char => 1 UTF-16 word - /* 1101 */ 1, // 2-byte UTF-8 char => 1 UTF-16 word - /* 1110 */ 1, // 3-byte UTF-8 char => 1 UTF-16 word - /* 1111 */ 2 // 4-byte UTF-8 char => 2 UTF-16 words (surrogate pair) - ); - - const __m512i char_length = broadcast_128bit_lane(utf8_length_128); - - constexpr size_t max_iterations = 255 / 2; - - size_t iterations = 0; - const auto zero = _mm512_setzero_si512(); - __m512i local = _mm512_setzero_si512(); // byte-wise counters - __m512i counters = _mm512_setzero_si512(); // 64-bit counters - for (; pos + 64 <= length; pos += 64) { - __m512i utf8 = _mm512_loadu_si512((const __m512i *)(input + pos)); - const auto t0 = _mm512_srli_epi32(utf8, 4); - const auto t1 = _mm512_and_si512(t0, _mm512_set1_epi8(0xf)); - const auto t2 = _mm512_shuffle_epi8(char_length, t1); - local = _mm512_add_epi8(local, t2); - - iterations += 1; - if (iterations == max_iterations) { - counters = _mm512_add_epi64(counters, _mm512_sad_epu8(local, zero)); - local = zero; - iterations = 0; - } - } - - size_t count = 0; - - if (pos > 0) { - // don't waste time for short strings - if (iterations > 0) { - counters = _mm512_add_epi64(counters, _mm512_sad_epu8(local, zero)); - } - - const auto l0 = _mm512_extracti32x4_epi32(counters, 0); - const auto l1 = _mm512_extracti32x4_epi32(counters, 1); - const auto l2 = _mm512_extracti32x4_epi32(counters, 2); - const auto l3 = _mm512_extracti32x4_epi32(counters, 3); - - const auto sum = - _mm_add_epi64(_mm_add_epi64(l0, l1), _mm_add_epi64(l2, l3)); - - count = uint64_t(_mm_extract_epi64(sum, 0)) + - uint64_t(_mm_extract_epi64(sum, 1)); - } - - return count + - scalar::utf8::utf16_length_from_utf8(input + pos, length - pos); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return utf32::utf8_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const char32_t *ptr = input; - size_t count{0}; - - if (length >= 16) { - const char32_t *end = input + length - 16; - - const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff); - - while (ptr <= end) { - __m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr); - ptr += 16; - __mmask16 surrogates_bitmask = - _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff); - - count += 16 + count_ones(surrogates_bitmask); - } - } - - return count + - scalar::utf32::utf16_length_from_utf32(ptr, length - (ptr - input)); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return implementation::count_utf8(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - if (options & base64_url) { - return encode_base64(output, input, length, options); - } else { - return encode_base64(output, input, length, options); - } -} -#endif // SIMDUTF_FEATURE_BASE64 - -} // namespace icelake -} // namespace simdutf - -/* begin file src/simdutf/icelake/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/icelake/end.h */ -/* end file src/icelake/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_HASWELL -/* begin file src/haswell/implementation.cpp */ -/* begin file src/simdutf/haswell/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "haswell" -// #define SIMDUTF_IMPLEMENTATION haswell -#define SIMDUTF_SIMD_HAS_BYTEMASK 1 - -#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL -// nothing needed. -#else -SIMDUTF_TARGET_HASWELL -#endif - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -// clang-format off -SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) -// clang-format on -#endif // end of workaround -/* end file src/simdutf/haswell/begin.h */ - -namespace simdutf { -namespace haswell { -namespace { -#ifndef SIMDUTF_HASWELL_H - #error "haswell.h must be included" -#endif -using namespace simd; - -#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ - SIMDUTF_FEATURE_UTF8 -simdutf_really_inline bool is_ascii(const simd8x64 &input) { - return input.reduce_or().is_ascii(); -} -#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || - // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_really_inline simd8 -must_be_2_3_continuation(const simd8 prev2, - const simd8 prev3) { - simd8 is_third_byte = - prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be > 0x80 - simd8 is_fourth_byte = - prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be > 0x80 - return simd8(is_third_byte | is_fourth_byte); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -namespace utf16 { -/* begin file src/haswell/avx2_validate_utf16.cpp */ -template -simd8 utf16_gather_high_bytes(const simd16 &in0, - const simd16 &in1) { - if (big_endian) { - // we want lower bytes - const auto mask = simd16(0x00ff); - const auto t0 = in0 & mask; - const auto t1 = in1 & mask; - - return simd16::pack(t0, t1); - } else { - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - - return simd16::pack(t0, t1); - } -} -/* end file src/haswell/avx2_validate_utf16.cpp */ -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/haswell/avx2_utf16fix.cpp */ -/* - * Process one block of 16 characters. If in_place is false, - * copy the block from in to out. If there is a sequencing - * error in the block, overwrite the illsequenced characters - * with the replacement character. This function reads one - * character before the beginning of the buffer as a lookback. - * If that character is illsequenced, it too is overwritten. - */ -template -void utf16fix_block(char16_t *out, const char16_t *in) { - const char16_t replacement = scalar::utf16::replacement(); - auto swap_if_needed = [](uint16_t c) -> uint16_t { - return !simdutf::match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; - }; - __m256i lookback, block, lb_masked, block_masked, lb_is_high, block_is_low; - __m256i illseq, lb_illseq, block_illseq, lb_illseq_shifted; - - lookback = _mm256_loadu_si256((const __m256i *)(in - 1)); - block = _mm256_loadu_si256((const __m256i *)in); - lb_masked = - _mm256_and_si256(lookback, _mm256_set1_epi16(swap_if_needed(0xfc00u))); - block_masked = - _mm256_and_si256(block, _mm256_set1_epi16(swap_if_needed(0xfc00u))); - lb_is_high = - _mm256_cmpeq_epi16(lb_masked, _mm256_set1_epi16(swap_if_needed(0xd800u))); - block_is_low = _mm256_cmpeq_epi16(block_masked, - _mm256_set1_epi16(swap_if_needed(0xdc00u))); - - illseq = _mm256_xor_si256(lb_is_high, block_is_low); - if (!_mm256_testz_si256(illseq, illseq)) { - int lb; - - /* compute the cause of the illegal sequencing */ - lb_illseq = _mm256_andnot_si256(block_is_low, lb_is_high); - lb_illseq_shifted = - _mm256_or_si256(_mm256_bsrli_epi128(lb_illseq, 2), - _mm256_zextsi128_si256(_mm_bslli_si128( - _mm256_extracti128_si256(lb_illseq, 1), 14))); - block_illseq = _mm256_or_si256( - _mm256_andnot_si256(lb_is_high, block_is_low), lb_illseq_shifted); - - /* fix illegal sequencing in the lookback */ - lb = _mm256_cvtsi256_si32(lb_illseq); - lb = (lb & replacement) | (~lb & out[-1]); - out[-1] = char16_t(lb); - - /* fix illegal sequencing in the main block */ - block = - _mm256_blendv_epi8(block, _mm256_set1_epi16(replacement), block_illseq); - _mm256_storeu_si256((__m256i *)out, block); - } else if (!in_place) { - _mm256_storeu_si256((__m256i *)out, block); - } -} - -template -void utf16fix_block_sse(char16_t *out, const char16_t *in) { - const char16_t replacement = scalar::utf16::replacement(); - auto swap_if_needed = [](uint16_t c) -> uint16_t { - return !simdutf::match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; - }; - - __m128i lookback, block, lb_masked, block_masked, lb_is_high, block_is_low; - __m128i illseq, lb_illseq, block_illseq; - - lookback = _mm_loadu_si128((const __m128i *)(in - 1)); - block = _mm_loadu_si128((const __m128i *)in); - lb_masked = _mm_and_si128(lookback, _mm_set1_epi16(swap_if_needed(0xfc00U))); - block_masked = _mm_and_si128(block, _mm_set1_epi16(swap_if_needed(0xfc00U))); - lb_is_high = - _mm_cmpeq_epi16(lb_masked, _mm_set1_epi16(swap_if_needed(0xd800U))); - block_is_low = - _mm_cmpeq_epi16(block_masked, _mm_set1_epi16(swap_if_needed(0xdc00U))); - - illseq = _mm_xor_si128(lb_is_high, block_is_low); - if (_mm_movemask_epi8(illseq) != 0) { - /* compute the cause of the illegal sequencing */ - lb_illseq = _mm_andnot_si128(block_is_low, lb_is_high); - block_illseq = _mm_or_si128(_mm_andnot_si128(lb_is_high, block_is_low), - _mm_bsrli_si128(lb_illseq, 2)); - /* fix illegal sequencing in the lookback */ - int lb = _mm_cvtsi128_si32(lb_illseq); - lb = (lb & replacement) | (~lb & out[-1]); - out[-1] = char16_t(lb); - /* fix illegal sequencing in the main block */ - block = - _mm_or_si128(_mm_andnot_si128(block_illseq, block), - _mm_and_si128(block_illseq, _mm_set1_epi16(replacement))); - _mm_storeu_si128((__m128i *)out, block); - } else if (!in_place) { - _mm_storeu_si128((__m128i *)out, block); - } -} - -template -void utf16fix_sse(const char16_t *in, size_t n, char16_t *out) { - const char16_t replacement = scalar::utf16::replacement(); - size_t i; - - if (n < 9) { - scalar::utf16::to_well_formed_utf16(in, n, out); - return; - } - - out[0] = - scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; - - /* duplicate code to have the compiler specialise utf16fix_block() */ - if (in == out) { - for (i = 1; i + 8 < n; i += 8) { - utf16fix_block_sse(out + i, in + i); - } - - utf16fix_block_sse(out + n - 8, in + n - 8); - } else { - for (i = 1; i + 8 < n; i += 8) { - utf16fix_block_sse(out + i, in + i); - } - - utf16fix_block_sse(out + n - 8, in + n - 8); - } - - out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) - ? replacement - : out[n - 1]; -} - -template -void utf16fix_avx(const char16_t *in, size_t n, char16_t *out) { - const char16_t replacement = scalar::utf16::replacement(); - size_t i; - - if (n < 17) { - utf16fix_sse(in, n, out); - return; - } - - out[0] = - scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; - - /* duplicate code to have the compiler specialise utf16fix_block() */ - if (in == out) { - for (i = 1; i + 16 < n; i += 16) { - utf16fix_block(out + i, in + i); - } - - utf16fix_block(out + n - 16, in + n - 16); - } else { - for (i = 1; i + 16 < n; i += 16) { - utf16fix_block(out + i, in + i); - } - - utf16fix_block(out + n - 16, in + n - 16); - } - - out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) - ? replacement - : out[n - 1]; -} -/* end file src/haswell/avx2_utf16fix.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/haswell/avx2_convert_latin1_to_utf8.cpp */ -std::pair -avx2_convert_latin1_to_utf8(const char *latin1_input, size_t len, - char *utf8_output) { - const char *end = latin1_input + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); - const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); - const size_t safety_margin = 12; - - while (end - latin1_input >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in8 = _mm_loadu_si128((__m128i *)latin1_input); - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m128i v_80 = _mm_set1_epi8((char)0x80); - if (_mm_testz_si128(in8, v_80)) { // ASCII fast path!!!! - // 1. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, in8); - // 2. adjust pointers - latin1_input += 16; - utf8_output += 16; - continue; // we are done for this round! - } - // We proceed only with the first 16 bytes. - const __m256i in = _mm256_cvtepu8_epi16((in8)); - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0000|aabb|bbbb] x 8 - // expected output : [1100|00aa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [0000|00aa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in, 2); - // t1 = [0000|00aa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [1100|00aa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> 16)] - [0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; - - // 6. adjust pointers - latin1_input += 16; - continue; - - } // while - return std::make_pair(latin1_input, utf8_output); -} -/* end file src/haswell/avx2_convert_latin1_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/haswell/avx2_convert_latin1_to_utf16.cpp */ -template -std::pair -avx2_convert_latin1_to_utf16(const char *latin1_input, size_t len, - char16_t *utf16_output) { - size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - - size_t i = 0; - for (; i < rounded_len; i += 16) { - // Load 16 bytes from the address (input + i) into a xmm register - const __m128i latin1 = - _mm_loadu_si128(reinterpret_cast(latin1_input + i)); - - // Zero extend each byte in `in` to word - __m256i utf16 = _mm256_cvtepu8_epi16(latin1); - - if (big_endian) { - const __m128i swap128 = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m256i swap = _mm256_set_m128i(swap128, swap128); - utf16 = _mm256_shuffle_epi8(utf16, swap); - } - - // Store the contents of xmm1 into the address pointed by (output + i) - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf16_output + i), utf16); - } - - return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); -} -/* end file src/haswell/avx2_convert_latin1_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/haswell/avx2_convert_latin1_to_utf32.cpp */ -std::pair -avx2_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - size_t rounded_len = ((len | 7) ^ 7); // Round down to nearest multiple of 8 - - for (size_t i = 0; i < rounded_len; i += 8) { - // Load 8 Latin1 characters into a 64-bit register - __m128i in = _mm_loadl_epi64((__m128i *)&buf[i]); - - // Zero extend each set of 8 Latin1 characters to 8 32-bit integers using - // vpmovzxbd - __m256i out = _mm256_cvtepu8_epi32(in); - - // Store the results back to memory - _mm256_storeu_si256((__m256i *)&utf32_output[i], out); - } - - // return pointers pointing to where we left off - return std::make_pair(buf + rounded_len, utf32_output + rounded_len); -} -/* end file src/haswell/avx2_convert_latin1_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/haswell/avx2_convert_utf8_to_utf16.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -template -size_t convert_masked_utf8_to_utf16(const char *input, - uint64_t utf8_end_of_code_point_mask, - char16_t *&utf16_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i in = _mm_loadu_si128((__m128i *)input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - __m256i ascii = _mm256_cvtepu8_epi16(in); - if (big_endian) { - const __m256i swap256 = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - ascii = _mm256_shuffle_epi8(ascii, swap256); - } - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf16_output), ascii); - utf16_output += 12; // We wrote 12 16-bit characters. - return 12; // We consumed 12 bytes. - } - if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { - // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte - // UTF-16 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - if (big_endian) - composed = _mm_shuffle_epi8(composed, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed); - utf16_output += 8; // We wrote 16 bytes, 8 code points. - return 16; - } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte - // UTF-16 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - __m128i composed_repacked = _mm_packus_epi32(composed, composed); - if (big_endian) - composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); - utf16_output += 4; - return 12; - } - - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - if (idx < 64) { - // SIX (6) input code-code units - // this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small - // lookup table. - const __m128i sh = _mm_loadu_si128( - (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - if (big_endian) - composed = _mm_shuffle_epi8(composed, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed); - utf16_output += 6; // We wrote 12 bytes, 6 code points. There is a potential - // overflow of 4 bytes. - } else if (idx < 145) { - // FOUR (4) input code-code units - const __m128i sh = _mm_loadu_si128( - (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - __m128i composed_repacked = _mm_packus_epi32(composed, composed); - if (big_endian) - composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); - utf16_output += 4; // Here we overflow by 8 bytes. - } else if (idx < 209) { - // TWO (2) input code-code units - ////////////// - // There might be garbage inputs where a leading byte mascarades as a - // four-byte leading byte (by being followed by 3 continuation byte), but is - // not greater than 0xf0. This could trigger a buffer overflow if we only - // counted leading bytes of the form 0xf0 as generating surrogate pairs, - // without further UTF-8 validation. Thus we must be careful to ensure that - // only leading bytes at least as large as 0xf0 generate surrogate pairs. We - // do as at the cost of an extra mask. - ///////////// - const __m128i sh = _mm_loadu_si128( - (const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); - const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); - // correct for spurious high bit - const __m128i correct = - _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); - middlehighbyte = _mm_xor_si128(correct, middlehighbyte); - const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); - // We deliberately carry the leading four bits in highbyte if they are - // present, we remove them later when computing hightenbits. - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); - // When we need to generate a surrogate pair (leading byte > 0xF0), then - // the corresponding 32-bit value in 'composed' will be greater than - // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the - // location of the surrogate pairs. - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), - _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); - const __m128i composedminus = - _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); - const __m128i lowtenbits = - _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); - // Notice the 0x3ff mask: - const __m128i hightenbits = - _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); - const __m128i lowtenbitsadd = - _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); - const __m128i hightenbitsadd = - _mm_add_epi32(hightenbits, _mm_set1_epi32(0xD800)); - const __m128i lowtenbitsaddshifted = _mm_slli_epi32(lowtenbitsadd, 16); - __m128i surrogates = _mm_or_si128(hightenbitsadd, lowtenbitsaddshifted); - uint32_t basic_buffer[4]; - uint32_t basic_buffer_swap[4]; - if (big_endian) { - _mm_storeu_si128((__m128i *)basic_buffer_swap, - _mm_shuffle_epi8(composed, swap)); - surrogates = _mm_shuffle_epi8(surrogates, swap); - } - _mm_storeu_si128((__m128i *)basic_buffer, composed); - uint32_t surrogate_buffer[4]; - _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); - for (size_t i = 0; i < 3; i++) { - if (basic_buffer[i] > 0x3c00000) { - utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); - utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); - utf16_output += 2; - } else { - utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) - : uint16_t(basic_buffer[i]); - utf16_output++; - } - } - } else { - // here we know that there is an error but we do not handle errors - } - return consumed; -} -/* end file src/haswell/avx2_convert_utf8_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/haswell/avx2_convert_utf8_to_utf32.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_utf32(const char *input, - uint64_t utf8_end_of_code_point_mask, - char32_t *&utf32_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - const __m128i in = _mm_loadu_si128((__m128i *)input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), - _mm256_cvtepu8_epi32(in)); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output + 8), - _mm256_cvtepu8_epi32(_mm_srli_si128(in, 8))); - utf32_output += 12; // We wrote 12 32-bit characters. - return 12; // We consumed 12 bytes. - } - if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { - // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte - // UTF-32 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - _mm256_storeu_si256((__m256i *)utf32_output, - _mm256_cvtepu16_epi32(composed)); - utf32_output += 8; // We wrote 16 bytes, 8 code points. - return 16; - } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte - // UTF-32 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 4; - return 12; - } - /// We do not have a fast path available, so we fallback. - - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - if (idx < 64) { - // SIX (6) input code-code units - // this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small - // lookup table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - _mm256_storeu_si256((__m256i *)utf32_output, - _mm256_cvtepu16_epi32(composed)); - utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential - // overflow of 32 - 24 = 8 bytes. - } else if (idx < 145) { - // FOUR (4) input code-code units - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 4; - } else if (idx < 209) { - // TWO (2) input code-code units - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); - const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); - // correct for spurious high bit - const __m128i correct = - _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); - middlehighbyte = _mm_xor_si128(correct, middlehighbyte); - const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), - _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += - 3; // We wrote 3 * 4 bytes, there is a potential overflow of 4 bytes. - } else { - // here we know that there is an error but we do not handle errors - } - return consumed; -} -/* end file src/haswell/avx2_convert_utf8_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/haswell/avx2_convert_utf16_to_latin1.cpp */ -template -std::pair -avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - while (end - buf >= 32) { - // Load 16 UTF-16 characters into 256-bit AVX2 register - __m256i in0 = _mm256_loadu_si256(reinterpret_cast(buf)); - __m256i in1 = - _mm256_loadu_si256(reinterpret_cast(buf + 16)); - - if (!match_system(big_endian)) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in0 = _mm256_shuffle_epi8(in0, swap); - in1 = _mm256_shuffle_epi8(in1, swap); - } - - __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); - if (_mm256_testz_si256(_mm256_or_si256(in0, in1), high_byte_mask)) { - // Pack 16-bit characters into 8-bit and store in latin1_output - const __m256i packed = _mm256_packus_epi16(in0, in1); - - const __m256i result = _mm256_permute4x64_epi64(packed, 0b11011000); - - _mm256_storeu_si256(reinterpret_cast<__m256i *>(latin1_output), result); - // Adjust pointers for the next iteration - buf += 32; - latin1_output += 32; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} - -template -std::pair -avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - while (end - buf >= 16) { - __m256i in = _mm256_loadu_si256(reinterpret_cast(buf)); - - if (!match_system(big_endian)) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); - } - - __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00); - if (_mm256_testz_si256(in, high_byte_mask)) { - __m128i lo = _mm256_extractf128_si256(in, 0); - __m128i hi = _mm256_extractf128_si256(in, 1); - __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo); - __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), - latin1_packed_lo); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8), - latin1_packed_hi); - buf += 16; - latin1_output += 16; - } else { - // Fallback to scalar code for handling errors - for (int k = 0; k < 16; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair( - result{error_code::TOO_LARGE, (size_t)(buf - start + k)}, - latin1_output); - } - } - buf += 16; - } - } // while - return std::make_pair(result{error_code::SUCCESS, (size_t)(buf - start)}, - latin1_output); -} -/* end file src/haswell/avx2_convert_utf16_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/haswell/avx2_convert_utf16_to_utf8.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ - -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::pair -avx2_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { - const char16_t *end = buf + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); - const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); - } - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); - if (_mm256_testz_si256(in, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in), _mm256_extractf128_si256(in, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; - - // 6. adjust pointers - buf += 16; - continue; - } - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m256i surrogates_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint32_t surrogates_bitmask = - static_cast(_mm256_movemask_epi8(surrogates_bytemask)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x00000000) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, utf8_output); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - return std::make_pair(buf, utf8_output); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -avx2_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); - const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); - } - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80); - if (_mm256_testz_si256(in, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in), _mm256_extractf128_si256(in, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; - - // 6. adjust pointers - buf += 16; - continue; - } - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m256i surrogates_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint32_t surrogates_bitmask = - static_cast(_mm256_movemask_epi8(surrogates_bytemask)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x00000000) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - utf8_output); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); -} -/* end file src/haswell/avx2_convert_utf16_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/haswell/avx2_convert_utf16_to_utf32.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ - -/* - Returns a pair: the first unprocessed byte from buf and utf32_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::pair -avx2_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const char16_t *end = buf + len; - const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); - - while (end - buf >= 16) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); - } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m256i surrogates_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint32_t surrogates_bitmask = - static_cast(_mm256_movemask_epi8(surrogates_bytemask)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x00000000) { - // case: we extend all sixteen 16-bit code units to sixteen 32-bit code - // units - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), - _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in))); - _mm256_storeu_si256( - reinterpret_cast<__m256i *>(utf32_output + 8), - _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in, 1))); - utf32_output += 16; - buf += 16; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, utf32_output); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(buf, utf32_output); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -avx2_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); - - while (end - buf >= 16) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - if (big_endian) { - const __m256i swap = _mm256_setr_epi8( - 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, - 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30); - in = _mm256_shuffle_epi8(in, swap); - } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m256i surrogates_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint32_t surrogates_bitmask = - static_cast(_mm256_movemask_epi8(surrogates_bytemask)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x00000000) { - // case: we extend all sixteen 16-bit code units to sixteen 32-bit code - // units - _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), - _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in))); - _mm256_storeu_si256( - reinterpret_cast<__m256i *>(utf32_output + 8), - _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in, 1))); - utf32_output += 16; - buf += 16; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - // No surrogate pair - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - utf32_output); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); -} -/* end file src/haswell/avx2_convert_utf16_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/haswell/avx2_convert_utf32_to_latin1.cpp */ -std::pair -avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const size_t rounded_len = - len & ~0x1F; // Round down to nearest multiple of 32 - - const __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); - - for (size_t i = 0; i < rounded_len; i += 4 * 8) { - __m256i a = _mm256_loadu_si256((__m256i *)(buf + 0 * 8)); - __m256i b = _mm256_loadu_si256((__m256i *)(buf + 1 * 8)); - __m256i c = _mm256_loadu_si256((__m256i *)(buf + 2 * 8)); - __m256i d = _mm256_loadu_si256((__m256i *)(buf + 3 * 8)); - - const __m256i check_combined = - _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); - - if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { - return std::make_pair(nullptr, latin1_output); - } - - b = _mm256_slli_epi32(b, 1 * 8); - c = _mm256_slli_epi32(c, 2 * 8); - d = _mm256_slli_epi32(d, 3 * 8); - - // clang-format off - - // a = [.. .. .. a7|.. .. .. a6|.. .. .. a5|.. .. .. a4||.. .. .. a3|.. .. .. a2|.. .. .. a1|.. .. .. a0] - // b = [.. .. b7 ..|.. .. b6 ..|.. .. b5 ..|.. .. b4 ..||.. .. b3 ..|.. .. b2 ..|.. .. b1 ..|.. .. b0 ..] - // c = [.. c7 .. ..|.. c6 .. ..|.. c5 .. ..|.. c4 .. ..||.. c3 .. ..|.. c2 .. ..|.. c1 .. ..|.. c0 .. ..] - // d = [d7 .. .. ..|d6 .. .. ..|d5 .. .. ..|d4 .. .. ..||d3 .. .. ..|d2 .. .. ..|d1 .. .. ..|d0 .. .. ..] - - // t0 = [d7 c7 b7 a7|d6 c6 b6 a6|d5 c5 b5 a5|d4 c4 b4 a4||d3 c3 b3 a3|d2 c2 b2 a2|d1 c1 b1 a1|d0 c0 b0 a0] - const __m256i t0 = - _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); - - // shuffle bytes within 128-bit lanes - // t1 = [d7 d6 d5 d4|c7 c6 c5 c4|b7 b6 b5 b4|a7 a6 a5 a4||d3 d2 d1 d0|c3 c2 c1 c0|b3 b2 b1 b0|a3 a2 a1 a0] - const __m256i shuffle_bytes = - _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, - 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); - - const __m256i t1 = _mm256_shuffle_epi8(t0, shuffle_bytes); - - // reshuffle dwords - // t2 = [d7 d6 d5 d4|d3 d2 d1 d0|c7 c6 c5 c4|c3 c2 c1 c0||b7 b6 b5 b4|b3 b2 b1 b0|a7 a6 a5 a4|a3 a2 a1 a0] - const __m256i shuffle_dwords = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); - const __m256i t2 = _mm256_permutevar8x32_epi32(t1, shuffle_dwords); -// clang format on - - _mm256_storeu_si256((__m256i *)latin1_output, t2); - - latin1_output += 32; - buf += 32; - } - - return std::make_pair(buf, latin1_output); -} - -std::pair -avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const size_t rounded_len = - len & ~0x1F; // Round down to nearest multiple of 32 - - const char32_t *start = buf; - - const __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00); - - for (size_t i = 0; i < rounded_len; i += 4 * 8) { - __m256i a = _mm256_loadu_si256((__m256i *)(buf + 0 * 8)); - __m256i b = _mm256_loadu_si256((__m256i *)(buf + 1 * 8)); - __m256i c = _mm256_loadu_si256((__m256i *)(buf + 2 * 8)); - __m256i d = _mm256_loadu_si256((__m256i *)(buf + 3 * 8)); - - const __m256i check_combined = - _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); - - if (!_mm256_testz_si256(check_combined, high_bytes_mask)) { - // Fallback to scalar code for handling errors - for (int k = 0; k < 4 * 8; k++) { - char32_t codepoint = buf[k]; - if (codepoint <= 0xFF) { - *latin1_output++ = static_cast(codepoint); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - } - - b = _mm256_slli_epi32(b, 1 * 8); - c = _mm256_slli_epi32(c, 2 * 8); - d = _mm256_slli_epi32(d, 3 * 8); - - const __m256i t0 = - _mm256_or_si256(_mm256_or_si256(a, b), _mm256_or_si256(c, d)); - - const __m256i shuffle_bytes = - _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, - 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); - - const __m256i t1 = _mm256_shuffle_epi8(t0, shuffle_bytes); - - const __m256i shuffle_dwords = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); - const __m256i t2 = _mm256_permutevar8x32_epi32(t1, shuffle_dwords); - - _mm256_storeu_si256((__m256i *)latin1_output, t2); - - latin1_output += 32; - buf += 32; - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/haswell/avx2_convert_utf32_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/haswell/avx2_convert_utf32_to_utf8.cpp */ -std::pair -avx2_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { - const char32_t *end = buf + len; - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); - const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); - const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); - const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); - __m256i running_max = _mm256_setzero_si256(); - __m256i forbidden_bytemask = _mm256_setzero_si256(); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); - running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin); - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), - _mm256_and_si256(nextin, v_7fffffff)); - in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); - - // Try to apply UTF-16 => UTF-8 routine on 256 bits - // (haswell/avx2_convert_utf16_to_utf8.cpp) - - if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in_16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in_16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; - - // 6. adjust pointers - buf += 16; - continue; - } - // Must check for overflow in packing - const __m256i saturation_bytemask = _mm256_cmpeq_epi32( - _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); - forbidden_bytemask = _mm256_or_si256( - forbidden_bytemask, - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800)); - - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; - } else { - // case: at least one 32-bit word is larger than 0xFFFF <=> it will - // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD may require - // large, non-trivial tables? - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { // 2-byte - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { // 3-byte - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf8_output); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { // 4-byte - if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf8_output); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - // check for invalid input - const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); - if (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi32( - _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) { - return std::make_pair(nullptr, utf8_output); - } - - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf8_output); - } - - return std::make_pair(buf, utf8_output); -} - -std::pair -avx2_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { - const char32_t *end = buf + len; - const char32_t *start = buf; - - const __m256i v_0000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80); - const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800); - const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080); - const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); - const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = _mm256_loadu_si256((__m256i *)buf); - __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1); - // Check for too large input - const __m256i max_input = - _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff); - if (static_cast(_mm256_movemask_epi8( - _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - utf8_output); - } - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), - _mm256_and_si256(nextin, v_7fffffff)); - in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000); - - // Try to apply UTF-16 => UTF-8 routine on 256 bits - // (haswell/avx2_convert_utf16_to_utf8.cpp) - - if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16( - _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1)); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - // no bits set above 7th bit - const __m256i one_byte_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000); - const uint32_t one_byte_bitmask = - static_cast(_mm256_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m256i one_or_two_bytes_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000); - const uint32_t one_or_two_bytes_bitmask = - static_cast(_mm256_movemask_epi8(one_or_two_bytes_bytemask)); - if (one_or_two_bytes_bitmask == 0xffffffff) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00); - const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = _mm256_slli_epi16(in_16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = _mm256_and_si256(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = _mm256_and_si256(in_16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = _mm256_or_si256(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = _mm256_or_si256(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m256i utf8_unpacked = - _mm256_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - const uint32_t M0 = one_byte_bitmask & 0x55555555; - const uint32_t M1 = M0 >> 7; - const uint32_t M2 = (M1 | M0) & 0x00ff00ff; - // 4. pack the bytes - - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0]; - const uint8_t *row_2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> - 16)][0]; - - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1)); - - const __m256i utf8_packed = _mm256_shuffle_epi8( - utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2)); - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_castsi256_si128(utf8_packed)); - utf8_output += row[0]; - _mm_storeu_si128((__m128i *)utf8_output, - _mm256_extractf128_si256(utf8_packed, 1)); - utf8_output += row_2[0]; - - // 6. adjust pointers - buf += 16; - continue; - } - // Must check for overflow in packing - const __m256i saturation_bytemask = _mm256_cmpeq_epi32( - _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm256_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffffffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - - // Check for illegal surrogate code units - const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800); - const __m256i forbidden_bytemask = - _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800); - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != - 0x0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - utf8_output); - } - - const __m256i dup_even = _mm256_setr_epi16( - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e, - 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm256_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m256i s0 = _mm256_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); - const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m256i s4 = _mm256_xor_si256(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); - const __m256i out1 = _mm256_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint32_t mask = (one_byte_bitmask & 0x55555555) | - (one_or_two_bytes_bitmask & 0xaaaaaaaa); - // Due to the wider registers, the following path is less likely to be - // useful. - /*if(mask == 0) { - // We only have three-byte code units. Use fast path. - const __m256i shuffle = - _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, - 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 = - _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 = - _mm256_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1)); - utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12; - _mm_storeu_si128((__m128i*)utf8_output, - _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16; - continue; - }*/ - const uint8_t mask0 = uint8_t(mask); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = - _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1); - - const uint8_t mask2 = static_cast(mask >> 16); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0]; - const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1)); - const __m128i utf8_2 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2); - - const uint8_t mask3 = static_cast(mask >> 24); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0]; - const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1)); - const __m128i utf8_3 = - _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_2); - utf8_output += row2[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_3); - utf8_output += row3[0]; - buf += 16; - } else { - // case: at least one 32-bit word is larger than 0xFFFF <=> it will - // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD may require - // large, non-trivial tables? - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII) - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { // 2-byte - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { // 3-byte - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf8_output); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { // 4-byte - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf8_output); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); -} -/* end file src/haswell/avx2_convert_utf32_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/haswell/avx2_convert_utf32_to_utf16.cpp */ -template -std::pair -avx2_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const char32_t *end = buf + len; - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - __m256i forbidden_bytemask = _mm256_setzero_si256(); - - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); - - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - const __m256i in = _mm256_loadu_si256((__m256i *)buf); - - if (simdutf_likely(_mm256_testz_si256(in, v_ffff0000))) { - // no bits set above 16th bit <=> can pack to UTF16 - // without surrogate pairs - forbidden_bytemask = _mm256_or_si256( - forbidden_bytemask, - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800)); - - __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), - _mm256_extractf128_si256(in, 1)); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); - } - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 7; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf16_output); - } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf16_output); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { - high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } - - // check for invalid input - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf16_output); - } - - return std::make_pair(buf, utf16_output); -} - -template -std::pair -avx2_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const char32_t *start = buf; - const char32_t *end = buf + len; - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000); - const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800); - const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800); - - while (end - buf >= std::ptrdiff_t(8 + safety_margin)) { - const __m256i in = _mm256_loadu_si256((__m256i *)buf); - - if (simdutf_likely(_mm256_testz_si256(in, v_ffff0000))) { - // no bits set above 16th bit <=> can pack to UTF16 without surrogate - // pairs - const __m256i forbidden_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800); - if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != - 0x0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - utf16_output); - } - - __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in), - _mm256_extractf128_si256(in, 1)); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); - } - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 7; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf16_output); - } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf16_output); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { - high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); -} -/* end file src/haswell/avx2_convert_utf32_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/haswell/avx2_convert_utf8_to_latin1.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_latin1(const char *input, - uint64_t utf8_end_of_code_point_mask, - char *&latin1_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - const __m128i in = _mm_loadu_si128((__m128i *)input); - - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & - 0xfff; // we are only processing 12 bytes in case it is not all ASCII - - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); - latin1_output += 12; // We wrote 12 characters. - return 12; // We consumed 1 bytes. - } - /// We do not have a fast path available, so we fallback. - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - // this indicates an invalid input: - if (idx >= 64) { - return consumed; - } - // Here we should have (idx < 64), if not, there is a bug in the validation or - // elsewhere. SIX (6) input code-code units this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small lookup - // table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - const __m128i latin1_packed = _mm_packus_epi16(composed, composed); - // writing 8 bytes even though we only care about the first 6 bytes. - // performance note: it would be faster to use _mm_storeu_si128, we should - // investigate. - _mm_storel_epi64((__m128i *)latin1_output, latin1_packed); - latin1_output += 6; // We wrote 6 bytes. - return consumed; -} -/* end file src/haswell/avx2_convert_utf8_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/haswell/avx2_base64.cpp */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ - -template -simdutf_really_inline __m256i lookup_pshufb_improved(const __m256i input) { - // credit: Wojciech Muła - __m256i result = _mm256_subs_epu8(input, _mm256_set1_epi8(51)); - const __m256i less = _mm256_cmpgt_epi8(_mm256_set1_epi8(26), input); - result = - _mm256_or_si256(result, _mm256_and_si256(less, _mm256_set1_epi8(13))); - __m256i shift_LUT; - if (base64_url) { - shift_LUT = _mm256_setr_epi8( - 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0, - - 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0); - } else { - shift_LUT = _mm256_setr_epi8( - 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0, - - 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); - } - - result = _mm256_shuffle_epi8(shift_LUT, result); - return _mm256_add_epi8(result, input); -} - -template -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // credit: Wojciech Muła - const uint8_t *input = (const uint8_t *)src; - - uint8_t *out = (uint8_t *)dst; - const __m256i shuf = - _mm256_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1, - - 10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); - size_t i = 0; - for (; i + 100 <= srclen; i += 96) { - const __m128i lo0 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 0)); - const __m128i hi0 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 1)); - const __m128i lo1 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 2)); - const __m128i hi1 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 3)); - const __m128i lo2 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 4)); - const __m128i hi2 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 5)); - const __m128i lo3 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 6)); - const __m128i hi3 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 7)); - - __m256i in0 = _mm256_shuffle_epi8(_mm256_set_m128i(hi0, lo0), shuf); - __m256i in1 = _mm256_shuffle_epi8(_mm256_set_m128i(hi1, lo1), shuf); - __m256i in2 = _mm256_shuffle_epi8(_mm256_set_m128i(hi2, lo2), shuf); - __m256i in3 = _mm256_shuffle_epi8(_mm256_set_m128i(hi3, lo3), shuf); - - const __m256i t0_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x0fc0fc00)); - const __m256i t0_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x0fc0fc00)); - const __m256i t0_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x0fc0fc00)); - const __m256i t0_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x0fc0fc00)); - - const __m256i t1_0 = - _mm256_mulhi_epu16(t0_0, _mm256_set1_epi32(0x04000040)); - const __m256i t1_1 = - _mm256_mulhi_epu16(t0_1, _mm256_set1_epi32(0x04000040)); - const __m256i t1_2 = - _mm256_mulhi_epu16(t0_2, _mm256_set1_epi32(0x04000040)); - const __m256i t1_3 = - _mm256_mulhi_epu16(t0_3, _mm256_set1_epi32(0x04000040)); - - const __m256i t2_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x003f03f0)); - const __m256i t2_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x003f03f0)); - const __m256i t2_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x003f03f0)); - const __m256i t2_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x003f03f0)); - - const __m256i t3_0 = - _mm256_mullo_epi16(t2_0, _mm256_set1_epi32(0x01000010)); - const __m256i t3_1 = - _mm256_mullo_epi16(t2_1, _mm256_set1_epi32(0x01000010)); - const __m256i t3_2 = - _mm256_mullo_epi16(t2_2, _mm256_set1_epi32(0x01000010)); - const __m256i t3_3 = - _mm256_mullo_epi16(t2_3, _mm256_set1_epi32(0x01000010)); - - const __m256i input0 = _mm256_or_si256(t1_0, t3_0); - const __m256i input1 = _mm256_or_si256(t1_1, t3_1); - const __m256i input2 = _mm256_or_si256(t1_2, t3_2); - const __m256i input3 = _mm256_or_si256(t1_3, t3_3); - - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(input0)); - out += 32; - - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(input1)); - out += 32; - - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(input2)); - out += 32; - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(input3)); - out += 32; - } - for (; i + 28 <= srclen; i += 24) { - // lo = [xxxx|DDDC|CCBB|BAAA] - // hi = [xxxx|HHHG|GGFF|FEEE] - const __m128i lo = - _mm_loadu_si128(reinterpret_cast(input + i)); - const __m128i hi = - _mm_loadu_si128(reinterpret_cast(input + i + 4 * 3)); - - // bytes from groups A, B and C are needed in separate 32-bit lanes - // in = [0HHH|0GGG|0FFF|0EEE[0DDD|0CCC|0BBB|0AAA] - __m256i in = _mm256_shuffle_epi8(_mm256_set_m128i(hi, lo), shuf); - - // this part is well commented in encode.sse.cpp - - const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); - const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); - const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); - const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); - const __m256i indices = _mm256_or_si256(t1, t3); - - _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), - lookup_pshufb_improved(indices)); - out += 32; - } - return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, - srclen - i, options); -} - -static inline void compress(__m128i data, uint16_t mask, char *output) { - if (mask == 0) { - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); - return; - } - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - - __m128i shufmask = _mm_set_epi64x(tables::base64::thintable_epi8[mask2], - tables::base64::thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(data, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = tables::base64::BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = _mm_loadu_si128(reinterpret_cast( - tables::base64::pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); -} - -// --- decoding ----------------------------------------------- - -template -simdutf_really_inline void compress(__m256i data, uint32_t mask, char *output) { - if (mask == 0) { - _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), data); - return; - } - compress(_mm256_castsi256_si128(data), uint16_t(mask), output); - compress(_mm256_extracti128_si256(data, 1), uint16_t(mask >> 16), - output + count_ones(~mask & 0xFFFF)); -} - -template -simdutf_really_inline void base64_decode(char *out, __m256i str) { - // credit: aqrit - const __m256i pack_shuffle = - _mm256_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, - 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); - const __m256i t0 = _mm256_maddubs_epi16(str, _mm256_set1_epi32(0x01400140)); - const __m256i t1 = _mm256_madd_epi16(t0, _mm256_set1_epi32(0x00011000)); - const __m256i t2 = _mm256_shuffle_epi8(t1, pack_shuffle); - - // Store the output: - _mm_storeu_si128((__m128i *)out, _mm256_castsi256_si128(t2)); - _mm_storeu_si128((__m128i *)(out + 12), _mm256_extracti128_si256(t2, 1)); -} - -template -simdutf_really_inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, - _mm256_loadu_si256(reinterpret_cast(src))); - base64_decode(out + 24, _mm256_loadu_si256( - reinterpret_cast(src + 32))); -} - -template -simdutf_really_inline void base64_decode_block_safe(char *out, - const char *src) { - base64_decode(out, - _mm256_loadu_si256(reinterpret_cast(src))); - char buffer[32]; // We enforce safety with a buffer. - base64_decode( - buffer, _mm256_loadu_si256(reinterpret_cast(src + 32))); - std::memcpy(out + 24, buffer, 24); -} - -// --- decoding - base64 class -------------------------------- - -class block64 { - __m256i chunks[2]; - -public: - // The caller of this function is responsible to ensure that there are 64 - // bytes available from reading at src. - simdutf_really_inline block64(const char *src) { - chunks[0] = _mm256_loadu_si256(reinterpret_cast(src)); - chunks[1] = _mm256_loadu_si256(reinterpret_cast(src + 32)); - } - - // The caller of this function is responsible to ensure that there are 128 - // bytes available from reading at src. - simdutf_really_inline block64(const char16_t *src) { - const auto m1 = _mm256_loadu_si256(reinterpret_cast(src)); - const auto m2 = - _mm256_loadu_si256(reinterpret_cast(src + 16)); - const auto m3 = - _mm256_loadu_si256(reinterpret_cast(src + 32)); - const auto m4 = - _mm256_loadu_si256(reinterpret_cast(src + 48)); - - const auto m1p = _mm256_permute2x128_si256(m1, m2, 0x20); - const auto m2p = _mm256_permute2x128_si256(m1, m2, 0x31); - const auto m3p = _mm256_permute2x128_si256(m3, m4, 0x20); - const auto m4p = _mm256_permute2x128_si256(m3, m4, 0x31); - - chunks[0] = _mm256_packus_epi16(m1p, m2p); - chunks[1] = _mm256_packus_epi16(m3p, m4p); - } - - simdutf_really_inline void copy_block(char *output) { - _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), chunks[0]); - _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + 32), chunks[1]); - } - - // decode 64 bytes and output 48 bytes - simdutf_really_inline void base64_decode_block(char *out) { - base64_decode(out, chunks[0]); - base64_decode(out + 24, chunks[1]); - } - - simdutf_really_inline void base64_decode_block_safe(char *out) { - base64_decode(out, chunks[0]); - char buffer[32]; // We enforce safety with a buffer. - base64_decode(buffer, chunks[1]); - std::memcpy(out + 24, buffer, 24); - } - - template - simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { - uint32_t err0 = 0; - uint32_t err1 = 0; - uint64_t m0 = to_base64_mask(&chunks[0], &err0); - uint64_t m1 = to_base64_mask(&chunks[1], &err1); - if (!ignore_garbage) { - *error = err0 | ((uint64_t)err1 << 32); - } - return m0 | (m1 << 32); - } - - template - simdutf_really_inline uint32_t to_base64_mask(__m256i *src, uint32_t *error) { - const __m256i ascii_space_tbl = - _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, - 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0); - // credit: aqrit - __m256i delta_asso; - if (base64_url) { - delta_asso = _mm256_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, - 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF, 0x1, 0x1, - 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0, - 0x0, 0x0, 0xF, 0x0, 0xF); - } else { - delta_asso = _mm256_setr_epi8( - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x0F, 0x00, 0x0F, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); - } - - __m256i delta_values; - if (base64_url) { - delta_values = _mm256_setr_epi8( - 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9), - uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), uint8_t(0xE0), - uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), - uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), - uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); - } else { - delta_values = _mm256_setr_epi8( - int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), int8_t(0x04), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), int8_t(0x00), - int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), - int8_t(0x00), int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), - int8_t(0xB9), int8_t(0xB9)); - } - - __m256i check_asso; - if (base64_url) { - check_asso = _mm256_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6, 0xD, 0x1, - 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x3, - 0x7, 0xB, 0xE, 0xB, 0x6); - } else { - check_asso = _mm256_setr_epi8( - 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03, - 0x07, 0x0B, 0x0B, 0x0B, 0x0F, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); - } - __m256i check_values; - if (base64_url) { - check_values = _mm256_setr_epi8( - uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), uint8_t(0xA6), - uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, uint8_t(0x80), - 0x0, uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), - uint8_t(0xA6), uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, - uint8_t(0x80), 0x0, uint8_t(0x80)); - } else { - check_values = _mm256_setr_epi8( - int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0xCF), - int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), int8_t(0x86), - int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), int8_t(0x91), - int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), - int8_t(0x86), int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), - int8_t(0x91), int8_t(0x80)); - } - const __m256i shifted = _mm256_srli_epi32(*src, 3); - const __m256i delta_hash = - _mm256_avg_epu8(_mm256_shuffle_epi8(delta_asso, *src), shifted); - const __m256i check_hash = - _mm256_avg_epu8(_mm256_shuffle_epi8(check_asso, *src), shifted); - const __m256i out = - _mm256_adds_epi8(_mm256_shuffle_epi8(delta_values, delta_hash), *src); - const __m256i chk = - _mm256_adds_epi8(_mm256_shuffle_epi8(check_values, check_hash), *src); - const int mask = _mm256_movemask_epi8(chk); - if (!ignore_garbage && mask) { - __m256i ascii_space = - _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src); - *error = (mask ^ _mm256_movemask_epi8(ascii_space)); - } - *src = out; - return (uint32_t)mask; - } - - simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { - if (is_power_of_two(mask)) { - return compress_block_single(mask, output); - } - - uint64_t nmask = ~mask; - compress(chunks[0], uint32_t(mask), output); - compress(chunks[1], uint32_t(mask >> 32), - output + count_ones(nmask & 0xFFFFFFFF)); - return count_ones(nmask); - } - - simdutf_really_inline size_t compress_block_single(uint64_t mask, - char *output) { - const size_t pos64 = trailing_zeroes(mask); - const int8_t pos = pos64 & 0xf; - switch (pos64 >> 4) { - case 0b00: { - const __m128i lane0 = _mm256_extracti128_si256(chunks[0], 0); - const __m128i lane1 = _mm256_extracti128_si256(chunks[0], 1); - - const __m128i v0 = _mm_set1_epi8(char(pos - 1)); - const __m128i v1 = - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const __m128i v2 = _mm_cmpgt_epi8(v1, v0); - const __m128i sh = _mm_sub_epi8(v1, v2); - const __m128i compressed = _mm_shuffle_epi8(lane0, sh); - - _mm_storeu_si128((__m128i *)(output + 0 * 16), compressed); - _mm_storeu_si128((__m128i *)(output + 1 * 16 - 1), lane1); - _mm256_storeu_si256((__m256i *)(output + 2 * 16 - 1), chunks[1]); - } break; - case 0b01: { - const __m128i lane0 = _mm256_extracti128_si256(chunks[0], 0); - const __m128i lane1 = _mm256_extracti128_si256(chunks[0], 1); - _mm_storeu_si128((__m128i *)(output + 0 * 16), lane0); - - const __m128i v0 = _mm_set1_epi8(char(pos - 1)); - const __m128i v1 = - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const __m128i v2 = _mm_cmpgt_epi8(v1, v0); - const __m128i sh = _mm_sub_epi8(v1, v2); - const __m128i compressed = _mm_shuffle_epi8(lane1, sh); - - _mm_storeu_si128((__m128i *)(output + 1 * 16), compressed); - _mm256_storeu_si256((__m256i *)(output + 2 * 16 - 1), chunks[1]); - } break; - case 0b10: { - const __m128i lane2 = _mm256_extracti128_si256(chunks[1], 0); - const __m128i lane3 = _mm256_extracti128_si256(chunks[1], 1); - - _mm256_storeu_si256((__m256i *)(output + 0 * 16), chunks[0]); - - const __m128i v0 = _mm_set1_epi8(char(pos - 1)); - const __m128i v1 = - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const __m128i v2 = _mm_cmpgt_epi8(v1, v0); - const __m128i sh = _mm_sub_epi8(v1, v2); - const __m128i compressed = _mm_shuffle_epi8(lane2, sh); - - _mm_storeu_si128((__m128i *)(output + 2 * 16), compressed); - _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), lane3); - } break; - case 0b11: { - const __m128i lane2 = _mm256_extracti128_si256(chunks[1], 0); - const __m128i lane3 = _mm256_extracti128_si256(chunks[1], 1); - - _mm256_storeu_si256((__m256i *)(output + 0 * 16), chunks[0]); - _mm_storeu_si128((__m128i *)(output + 2 * 16), lane2); - - const __m128i v0 = _mm_set1_epi8(char(pos - 1)); - const __m128i v1 = - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const __m128i v2 = _mm_cmpgt_epi8(v1, v0); - const __m128i sh = _mm_sub_epi8(v1, v2); - const __m128i compressed = _mm_shuffle_epi8(lane3, sh); - - _mm_storeu_si128((__m128i *)(output + 3 * 16), compressed); - } break; - } - - return 63; - } -}; -/* end file src/haswell/avx2_base64.cpp */ -#endif // SIMDUTF_FEATURE_BASE64 - -} // unnamed namespace -} // namespace haswell -} // namespace simdutf - -/* begin file src/generic/buf_block_reader.h */ -namespace simdutf { -namespace haswell { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with -// spaces -template struct buf_block_reader { -public: - simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdutf_really_inline size_t block_index(); - simdutf_really_inline bool has_full_block() const; - simdutf_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 - * (in which case this function fills the buffer with spaces and returns 0. In - * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder - * block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdutf_really_inline size_t get_remainder(uint8_t *dst) const; - simdutf_really_inline void advance(); - -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text_64(const uint8_t *text) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text(const simd8x64 &in) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - in.store(reinterpret_cast(buf)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - if (buf[i] < ' ') { - buf[i] = '_'; - } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdutf_unused static char *format_mask(uint64_t mask) { - static char *buf = reinterpret_cast(malloc(64 + 1)); - for (size_t i = 0; i < 64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdutf_really_inline -buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) - : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, - idx{0} {} - -template -simdutf_really_inline size_t buf_block_reader::block_index() { - return idx; -} - -template -simdutf_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdutf_really_inline const uint8_t * -buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdutf_really_inline size_t -buf_block_reader::get_remainder(uint8_t *dst) const { - if (len == idx) { - return 0; - } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, - STEP_SIZE); // std::memset STEP_SIZE because it is more efficient - // to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdutf_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/buf_block_reader.h */ -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8_validation { - -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = {255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 0b11110000u - 1, - 0b11100000u - 1, - 0b11000000u - 1}; - const simd8 max_value( - &max_array[sizeof(max_array) - sizeof(simd8)]); - return input.gt_bits(max_value); -} - -struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error |= this->prev_incomplete; - } - - simdutf_really_inline void check_next_input(const simd8x64 &input) { - if (simdutf_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = - is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; - } - } - - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -/* begin file src/generic/utf8_validation/utf8_validator.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8_validation { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return !c.errors(); -} - -bool generic_validate_utf8(const char *input, size_t length) { - return generic_validate_utf8( - reinterpret_cast(input), length); -} - -/** - * Validates that the string is actual UTF-8 and stops on errors. - */ -template -result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input + count), length - count); - res.count += count; - return res; - } - reader.advance(); - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input) + count, length - count); - res.count += count; - return res; - } else { - return result(error_code::SUCCESS, length); - } -} - -result generic_validate_utf8_with_errors(const char *input, size_t length) { - return generic_validate_utf8_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_ASCII -/* begin file src/generic/ascii_validation.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace ascii_validation { - -bool generic_validate_ascii(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} - -result generic_validate_ascii_with_errors(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } - reader.advance(); - - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} - -} // namespace ascii_validation -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/ascii_validation.h */ -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - // transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8_to_utf16 { - -using namespace simd; - -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8_to_utf16 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - template - simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); - if (howmany == 0) { - return 0; - } - utf16_output += howmany; - } - return utf16_output - start; - } - - template - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf16_output += res.count; - } - } - return result(error_code::SUCCESS, utf16_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -/* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 2; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - - size_t iterations = 0; - size_t pos = 0; - size_t count = 0; - for (; pos + N <= size; pos += N) { - const auto input = - vector_i8::load(reinterpret_cast(in + pos)); - - const auto continuation = input > int8_t(-65); - const auto utf_4bytes = vector_u8(input.value) >= uint8_t(240); - - local -= vector_u8(continuation); - local -= vector_u8(utf_4bytes); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - // transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8_to_utf32 { - -using namespace simd; - -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { - size_t pos = 0; - char32_t *start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - size_t max_starting_point = (pos + 64) - 12; - while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - } - } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8_to_utf32 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // we have an error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); - if (howmany == 0) { - return 0; - } - utf32_output += howmany; - } - return utf32_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf32_output += res.count; - } - } - return result(error_code::SUCCESS, utf32_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -/* begin file src/generic/utf32.h */ -#include - -namespace simdutf { -namespace haswell { -namespace { -namespace utf32 { - -template T min(T a, T b) { return a <= b ? a : b; } - -simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, - size_t length) { - using vector_u32 = simd32; - - const char32_t *start = input; - - // we add up to three ones in a single iteration (see the vectorized loop in - // section #2 below) - const size_t max_increment = 3; - - const size_t N = vector_u32::ELEMENTS; - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - const auto v_0000007f = vector_u32::splat(0x0000007f); - const auto v_000007ff = vector_u32::splat(0x000007ff); - const auto v_0000ffff = vector_u32::splat(0x0000ffff); -#else - const auto v_ffffff80 = vector_u32::splat(0xffffff80); - const auto v_fffff800 = vector_u32::splat(0xfffff800); - const auto v_ffff0000 = vector_u32::splat(0xffff0000); - const auto one = vector_u32::splat(1); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - size_t counter = 0; - - // 1. vectorized loop unrolled 4 times - { - // we use vector of uint32 counters, this is why this limit is used - const size_t max_iterations = - std::numeric_limits::max() / (max_increment * 4); - size_t blocks = length / (N * 4); - length -= blocks * (N * 4); - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - simd32 acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in0 = vector_u32(input + 0 * N); - const auto in1 = vector_u32(input + 1 * N); - const auto in2 = vector_u32(input + 2 * N); - const auto in3 = vector_u32(input + 3 * N); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in0 > v_0000007f); - acc -= as_vector_u32(in1 > v_0000007f); - acc -= as_vector_u32(in2 > v_0000007f); - acc -= as_vector_u32(in3 > v_0000007f); - - acc -= as_vector_u32(in0 > v_000007ff); - acc -= as_vector_u32(in1 > v_000007ff); - acc -= as_vector_u32(in2 > v_000007ff); - acc -= as_vector_u32(in3 > v_000007ff); - - acc -= as_vector_u32(in0 > v_0000ffff); - acc -= as_vector_u32(in1 > v_0000ffff); - acc -= as_vector_u32(in2 > v_0000ffff); - acc -= as_vector_u32(in3 > v_0000ffff); -#else - acc += min(one, in0 & v_ffffff80); - acc += min(one, in1 & v_ffffff80); - acc += min(one, in2 & v_ffffff80); - acc += min(one, in3 & v_ffffff80); - - acc += min(one, in0 & v_fffff800); - acc += min(one, in1 & v_fffff800); - acc += min(one, in2 & v_fffff800); - acc += min(one, in3 & v_fffff800); - - acc += min(one, in0 & v_ffff0000); - acc += min(one, in1 & v_ffff0000); - acc += min(one, in2 & v_ffff0000); - acc += min(one, in3 & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += 4 * N; - } - - counter += acc.sum(); - } - } - - // 2. vectorized loop for tail - { - const size_t max_iterations = - std::numeric_limits::max() / max_increment; - size_t blocks = length / N; - length -= blocks * N; - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - auto acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in = vector_u32(input); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in > v_0000007f); - acc -= as_vector_u32(in > v_000007ff); - acc -= as_vector_u32(in > v_0000ffff); -#else - acc += min(one, in & v_ffffff80); - acc += min(one, in & v_fffff800); - acc += min(one, in & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += N; - } - - counter += acc.sum(); - } - } - - const size_t consumed = input - start; - if (consumed != 0) { - // We don't count 0th bytes in the vectorized loops above, this - // is why we need to count them in the end. - counter += consumed; - } - - return counter + scalar::utf32::utf8_length_from_utf32(input, length); -} - -} // namespace utf32 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -// other functions -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/generic/utf8.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); - } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} - -#ifdef SIMDUTF_SIMD_HAS_BYTEMASK -simdutf_really_inline size_t count_code_points_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 4; - - size_t pos = 0; - size_t count = 0; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - size_t iterations = 0; - for (; pos + 4 * N <= size; pos += 4 * N) { - const auto input0 = - simd8::load(reinterpret_cast(in + pos + 0 * N)); - const auto input1 = - simd8::load(reinterpret_cast(in + pos + 1 * N)); - const auto input2 = - simd8::load(reinterpret_cast(in + pos + 2 * N)); - const auto input3 = - simd8::load(reinterpret_cast(in + pos + 3 * N)); - const auto mask0 = input0 > int8_t(-65); - const auto mask1 = input1 > int8_t(-65); - const auto mask2 = input2 > int8_t(-65); - const auto mask3 = input3 > int8_t(-65); - - local -= vector_u8(mask0); - local -= vector_u8(mask1); - local -= vector_u8(mask2); - local -= vector_u8(mask3); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} -#endif // SIMDUTF_SIMD_HAS_BYTEMASK - -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/generic/utf16.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf16 { - -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); - count += count_ones(not_pair) / 2; - } - return count + - scalar::utf16::count_code_points(in + pos, size - pos); -} - -template -simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t ascii_mask = input.lteq(0x7F); - uint64_t twobyte_mask = input.lteq(0x7FF); - uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); - - size_t ascii_count = count_ones(ascii_mask) / 2; - size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; - size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; - size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; - count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + - ascii_count; - } - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} - -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; - - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; - } - - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf16.h */ -/* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf16 { - -using namespace simd; - -template -simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, - size_t size) { - size_t pos = 0; - - using vector_u16 = simd16; - constexpr size_t N = vector_u16::ELEMENTS; - - const auto one = vector_u16::splat(1); - - auto v_count = vector_u16::zero(); - - // each char16 yields at least one byte - size_t count = size / N * N; - - // in a single iteration the increment is 0, 1 or 2, despite we have - // three additions - constexpr size_t max_iterations = 65535 / 2; - size_t iteration = max_iterations; - - for (; pos < size / N * N; pos += N) { - auto input = vector_u16::load(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input = input.swap_bytes(); - } - // 0xd800 .. 0xdbff - low surrogate - // 0xdc00 .. 0xdfff - high surrogate - const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); - - // c0 - chars that yield 2- or 3-byte UTF-8 codes - const auto c0 = min(input & uint16_t(0xff80), one); - - // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) - const auto c1 = min(input & uint16_t(0xf800), one); - - /* - Explanation how the counting works. - - In the case of a non-surrogate character we count: - * always 1 -- see how `count` is initialized above; - * c0 = 1 if the current char yields 2 or 3 bytes; - * c1 = 1 if the current char yields 3 bytes. - - Thus, we always have correct count for the current char: - from 1, 2 or 3 bytes. - - A trickier part is how we count surrogate pairs. Whether - we encounter a surrogate (low or high), we count it as - 3 chars and then minus 1 (`is_surrogate` is -1 or 0). - Each surrogate char yields 2. A surrogate pair, that - is a low surrogate followed by a high one, yields - the expected 4 bytes. - - It also correctly handles cases when low surrogate is - processed by the this loop, but high surrogate is counted - by the scalar procedure. The scalar procedure uses exactly - the described approach, thanks to that for valid UTF-16 - strings it always count correctly. - */ - v_count += c0; - v_count += c1; - v_count += vector_u16(is_surrogate); - - iteration -= 1; - if (iteration == 0) { - count += v_count.sum(); - v_count = vector_u16::zero(); - iteration = max_iterations; - } - } - - if (iteration > 0) { - count += v_count.sum(); - } - - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/validate_utf16.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf16 { -/* - UTF-16 validation - -------------------------------------------------- - - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We are going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ -template -const result validate_utf16_with_errors(const char16_t *input, size_t size) { - if (simdutf_unlikely(size == 0)) { - return result(error_code::SUCCESS, 0); - } - - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 - // and yields a single vector having only higher bytes of characters. - const auto in = utf16_gather_high_bytes(in0, in1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/validate_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - // transcoding from UTF-8 to Latin 1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // For UTF-8 to Latin 1, we can allow any ASCII character, and any - // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or - // 0b11000010 and nothing else. - // - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - constexpr const uint8_t FORBIDDEN = 0xff; - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - FORBIDDEN, - // 1110____ ________ - FORBIDDEN, - // 1111____ ________ - FORBIDDEN); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - FORBIDDEN, - // ____0101 ________ - FORBIDDEN, - // ____011_ ________ - FORBIDDEN, FORBIDDEN, - - // ____1___ ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, - // ____1101 ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 16; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); - if (howmany == 0) { - return 0; - } - latin1_output += howmany; - } - return latin1_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - latin1_output += res.count; - } - } - return result(error_code::SUCCESS, latin1_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline size_t convert_valid(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last - // 16 bytes, and if the data is valid, then it is entirely safe because 16 - // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally - // assume that you have valid UTF-8 input, so we are going to go back from the - // end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (pos < size) { - size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, - latin1_output); - latin1_output += howmany; - } - return latin1_output - start; -} - -} // namespace utf8_to_latin1 -} // namespace -} // namespace haswell -} // namespace simdutf - // namespace simdutf -/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/validate_utf32.h */ -namespace simdutf { -namespace haswell { -namespace { -namespace utf32 { - -simdutf_really_inline bool validate(const char32_t *input, size_t size) { - if (simdutf_unlikely(size == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return true; - } - - const char32_t *end = input + size; - - using vector_u32 = simd32; - - const auto standardmax = vector_u32::splat(0x10ffff); - const auto offset = vector_u32::splat(0xffff2000); - const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); - auto currentmax = vector_u32::zero(); - auto currentoffsetmax = vector_u32::zero(); - - constexpr size_t N = vector_u32::ELEMENTS; - - while (input + N < end) { - auto in = vector_u32(input); - if (!match_system(endianness::BIG)) { - in.swap_bytes(); - } - - currentmax = max(currentmax, in); - currentoffsetmax = max(currentoffsetmax, in + offset); - input += N; - } - - const auto too_large = currentmax > standardmax; - if (too_large.any()) { - return false; - } - - const auto surrogate = currentoffsetmax > standardoffsetmax; - if (surrogate.any()) { - return false; - } - - return scalar::utf32::validate(input, end - input); -} - -simdutf_really_inline result validate_with_errors(const char32_t *input, - size_t size) { - if (simdutf_unlikely(size == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return result(error_code::SUCCESS, 0); - } - - const char32_t *start = input; - const char32_t *end = input + size; - - using vector_u32 = simd32; - - const auto standardmax = vector_u32::splat(0x10ffff + 1); - const auto surrogate_mask = vector_u32::splat(0xfffff800); - const auto surrogate_byte = vector_u32::splat(0x0000d800); - - constexpr size_t N = vector_u32::ELEMENTS; - - while (input + N < end) { - auto in = vector_u32(input); - if (!match_system(endianness::BIG)) { - in.swap_bytes(); - } - - const auto too_large = in >= standardmax; - const auto surrogate = (in & surrogate_mask) == surrogate_byte; - - const auto combined = too_large | surrogate; - if (simdutf_unlikely(combined.any())) { - const size_t consumed = input - start; - auto sr = scalar::utf32::validate_with_errors(input, end - input); - sr.count += consumed; - - return sr; - } - - input += N; - } - - const size_t consumed = input - start; - auto sr = scalar::utf32::validate_with_errors(input, end - input); - sr.count += consumed; - - return sr; -} - -} // namespace utf32 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/validate_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/generic/base64.h */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ -namespace simdutf { -namespace haswell { -namespace { -namespace base64 { - -/* - The following template function implements API for Base64 decoding. - - An implementation is responsible for providing the `block64` type and - associated methods that perform actual conversion. Please refer - to any vectorized implementation to learn the API of these procedures. -*/ -template -full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (!ignore_garbage && srclen > 0 && - scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - char *end_of_safe_64byte_zone = - (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; - - const chartype *const srcinit = src; - const char *const dstinit = dst; - const chartype *const srcend = src + srclen; - - constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block_size must be at least two"); - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b(src); - src += 64; - uint64_t error = 0; - const uint64_t badcharmask = - b.to_base64_mask(&error); - if (!ignore_garbage && error) { - src -= 64; - const size_t error_offset = trailing_zeroes(error); - return {error_code::INVALID_BASE64_CHARACTER, - size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; - } - if (badcharmask != 0) { - bufferptr += b.compress_block(badcharmask, bufferptr); - } else if (bufferptr != buffer) { - b.copy_block(bufferptr); - bufferptr += 64; - } else { - if (dst >= end_of_safe_64byte_zone) { - b.base64_decode_block_safe(dst); - } else { - b.base64_decode_block(dst); - } - dst += 48; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 2); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); - } else { - base64_decode_block(dst, buffer + (block_size - 2) * 64); - } - dst += 48; - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } - } - - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { - - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if (!ignore_garbage && - (!scalar::base64::is_eight_byte(*src) || val > 64)) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } - } - - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer_start); - } else { - base64_decode_block(dst, buffer_start); - } - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; -#if !SIMDUTF_IS_BIG_ENDIAN - triple = scalar::u32_swap_bytes(triple); -#endif - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; -#if !SIMDUTF_IS_BIG_ENDIAN - triple = scalar::u32_swap_bytes(triple); -#endif - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - if (!ignore_garbage) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - } else { - while (to_base64[uint8_t(*(src - 1))] >= 64) { - src--; - } - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - r.input_count += size_t(src - srcinit); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - return r; - } else { - r.output_count += size_t(dst - dstinit); - } - if (!ignore_garbage && last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } - } - return r; - } - if (!ignore_garbage && equalsigns > 0) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } - } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} - -} // namespace base64 -} // unnamed namespace -} // namespace haswell -} // namespace simdutf -/* end file src/generic/base64.h */ -#endif // SIMDUTF_FEATURE_BASE64 - -namespace simdutf { -namespace haswell { - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - - int out = 0; - uint32_t utf16_err = (length % 2); - uint32_t utf32_err = (length % 4); - uint32_t ends_with_high = 0; - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - const __m256i standardmax = _mm256_set1_epi32(0x10ffff); - const __m256i offset = _mm256_set1_epi32(0xffff2000); - const __m256i standardoffsetmax = _mm256_set1_epi32(0xfffff7ff); - __m256i currentmax = _mm256_setzero_si256(); - __m256i currentoffsetmax = _mm256_setzero_si256(); - - utf8_checker c{}; - buf_block_reader<64> reader(reinterpret_cast(input), length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - // utf8 checks - c.check_next_input(in); - - // utf16le checks - auto in0 = simd16(in.chunks[0]); - auto in1 = simd16(in.chunks[1]); - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - const auto in2 = simd16::pack(t0, t1); - const auto surrogates_wordmask = (in2 & v_f8) == v_d8; - const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); - const auto vL = (in2 & v_fc) == v_dc; - const uint32_t L = vL.to_bitmask(); - const uint32_t H = L ^ surrogates_bitmask; - utf16_err |= (((H << 1) | ends_with_high) != L); - ends_with_high = (H & 0x80000000) != 0; - - // utf32le checks - currentmax = _mm256_max_epu32(in.chunks[0], currentmax); - currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[0], offset), - currentoffsetmax); - currentmax = _mm256_max_epu32(in.chunks[1], currentmax); - currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[1], offset), - currentoffsetmax); - - reader.advance(); - } - - uint8_t block[64]{}; - size_t idx = reader.block_index(); - std::memcpy(block, &input[idx], length - idx); - simd::simd8x64 in(block); - c.check_next_input(in); - - // utf16le last block check - auto in0 = simd16(in.chunks[0]); - auto in1 = simd16(in.chunks[1]); - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - const auto in2 = simd16::pack(t0, t1); - const auto surrogates_wordmask = (in2 & v_f8) == v_d8; - const uint32_t surrogates_bitmask = surrogates_wordmask.to_bitmask(); - const auto vL = (in2 & v_fc) == v_dc; - const uint32_t L = vL.to_bitmask(); - const uint32_t H = L ^ surrogates_bitmask; - utf16_err |= (((H << 1) | ends_with_high) != L); - // this is required to check for last byte ending in high and end of input - // is reached - ends_with_high = (H & 0x80000000) != 0; - utf16_err |= ends_with_high; - - // utf32le last block check - currentmax = _mm256_max_epu32(in.chunks[0], currentmax); - currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[0], offset), - currentoffsetmax); - currentmax = _mm256_max_epu32(in.chunks[1], currentmax); - currentoffsetmax = _mm256_max_epu32(_mm256_add_epi32(in.chunks[1], offset), - currentoffsetmax); - - reader.advance(); - - c.check_eof(); - bool is_valid_utf8 = !c.errors(); - __m256i is_zero = - _mm256_xor_si256(_mm256_max_epu32(currentmax, standardmax), standardmax); - utf32_err |= (_mm256_testz_si256(is_zero, is_zero) == 0); - - is_zero = _mm256_xor_si256( - _mm256_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax); - utf32_err |= (_mm256_testz_si256(is_zero, is_zero) == 0); - if (is_valid_utf8) { - out |= encoding_type::UTF8; - } - if (utf16_err == 0) { - out |= encoding_type::UTF16_LE; - } - if (utf32_err == 0) { - out |= encoding_type::UTF32_LE; - } - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return haswell::utf8_validation::generic_validate_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return haswell::utf8_validation::generic_validate_utf8_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return haswell::ascii_validation::generic_validate_ascii(buf, len); -} - -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return haswell::ascii_validation::generic_validate_ascii_with_errors(buf, - len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-16. protect the implementation from - // handling nullptr - return true; - } - const auto res = - haswell::utf16::validate_utf16_with_errors(buf, len); - if (res.is_err()) { - return false; - } - - if (res.count == len) { - return true; - } - - return scalar::utf16::validate(buf + res.count, - len - res.count); -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-16. protect the implementation from - // handling nullptr - return true; - } - const auto res = - haswell::utf16::validate_utf16_with_errors(buf, len); - if (res.is_err()) { - return false; - } - - if (res.count == len) { - return true; - } - - return scalar::utf16::validate(buf + res.count, - len - res.count); -} - -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - - const result res = - haswell::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - const result scalar_res = - scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - const result res = - haswell::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - const result scalar_res = - scalar::utf16::validate_with_errors(buf + res.count, - len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return utf16fix_avx(input, len, output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return utf16fix_avx(input, len, output); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - return utf32::validate(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - return utf32::validate_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - avx2_convert_latin1_to_utf8(buf, len, utf8_output); - size_t converted_chars = ret.second - utf8_output; - - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - avx2_convert_latin1_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } - converted_chars += scalar_converted_chars; - } - return converted_chars; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - avx2_convert_latin1_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - avx2_convert_latin1_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t converted_chars = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert_with_errors(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *input, size_t size, char *latin1_output) const noexcept { - return utf8_to_latin1::convert_valid(input, size, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert(buf, len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return utf8_to_utf32::convert_valid(input, size, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - haswell::avx2_convert_utf16_to_latin1(buf, len, - latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - haswell::avx2_convert_utf16_to_latin1(buf, len, - latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - avx2_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - avx2_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function - return convert_utf16be_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function - return convert_utf16le_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - haswell::avx2_convert_utf16_to_utf8(buf, len, - utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - haswell::avx2_convert_utf16_to_utf8(buf, len, - utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - haswell::avx2_convert_utf16_to_utf8_with_errors( - buf, len, utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - haswell::avx2_convert_utf16_to_utf8_with_errors( - buf, len, utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - avx2_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - avx2_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - avx2_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return convert_utf32_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - haswell::avx2_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - haswell::avx2_convert_utf16_to_utf32(buf, len, - utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - haswell::avx2_convert_utf16_to_utf32(buf, len, - utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - haswell::avx2_convert_utf16_to_utf32_with_errors( - buf, len, utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - haswell::avx2_convert_utf16_to_utf32_with_errors( - buf, len, utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf32_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - avx2_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - avx2_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - haswell::avx2_convert_utf32_to_utf16_with_errors( - buf, len, utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - haswell::avx2_convert_utf32_to_utf16_with_errors( - buf, len, utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16le(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16be(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16le_to_utf32(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16be_to_utf32(buf, len, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - utf16::change_endianness_utf16(input, length, output); -} - -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} - -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *in, size_t size) const noexcept { - return utf8::count_code_points_bytemask(in, size); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16_bytemask(input, - length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::utf16_length_from_utf8_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t len) const noexcept { - const uint8_t *data = reinterpret_cast(input); - size_t answer = len / sizeof(__m256i) * sizeof(__m256i); - size_t i = 0; - if (answer >= 2048) { // long strings optimization - __m256i four_64bits = _mm256_setzero_si256(); - while (i + sizeof(__m256i) <= len) { - __m256i runner = _mm256_setzero_si256(); - // We can do up to 255 loops without overflow. - size_t iterations = (len - i) / sizeof(__m256i); - if (iterations > 255) { - iterations = 255; - } - size_t max_i = i + iterations * sizeof(__m256i) - sizeof(__m256i); - for (; i + 4 * sizeof(__m256i) <= max_i; i += 4 * sizeof(__m256i)) { - __m256i input1 = _mm256_loadu_si256((const __m256i *)(data + i)); - __m256i input2 = - _mm256_loadu_si256((const __m256i *)(data + i + sizeof(__m256i))); - __m256i input3 = _mm256_loadu_si256( - (const __m256i *)(data + i + 2 * sizeof(__m256i))); - __m256i input4 = _mm256_loadu_si256( - (const __m256i *)(data + i + 3 * sizeof(__m256i))); - __m256i input12 = - _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input1), - _mm256_cmpgt_epi8(_mm256_setzero_si256(), input2)); - __m256i input23 = - _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input3), - _mm256_cmpgt_epi8(_mm256_setzero_si256(), input4)); - __m256i input1234 = _mm256_add_epi8(input12, input23); - runner = _mm256_sub_epi8(runner, input1234); - } - for (; i <= max_i; i += sizeof(__m256i)) { - __m256i input_256_chunk = - _mm256_loadu_si256((const __m256i *)(data + i)); - runner = _mm256_sub_epi8( - runner, _mm256_cmpgt_epi8(_mm256_setzero_si256(), input_256_chunk)); - } - four_64bits = _mm256_add_epi64( - four_64bits, _mm256_sad_epu8(runner, _mm256_setzero_si256())); - } - answer += _mm256_extract_epi64(four_64bits, 0) + - _mm256_extract_epi64(four_64bits, 1) + - _mm256_extract_epi64(four_64bits, 2) + - _mm256_extract_epi64(four_64bits, 3); - } else if (answer > 0) { - for (; i + sizeof(__m256i) <= len; i += sizeof(__m256i)) { - __m256i latin = _mm256_loadu_si256((const __m256i *)(data + i)); - uint32_t non_ascii = _mm256_movemask_epi8(latin); - answer += count_ones(non_ascii); - } - } - return answer + scalar::latin1::utf8_length_from_latin1( - reinterpret_cast(data + i), len - i); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return utf32::utf8_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const __m256i v_00000000 = _mm256_setzero_si256(); - const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000); - size_t pos = 0; - size_t count = 0; - for (; pos + 8 <= length; pos += 8) { - __m256i in = _mm256_loadu_si256((__m256i *)(input + pos)); - const __m256i surrogate_bytemask = - _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000); - const uint32_t surrogate_bitmask = - static_cast(_mm256_movemask_epi8(surrogate_bytemask)); - size_t surrogate_count = (32 - count_ones(surrogate_bitmask)) / 4; - count += 8 + surrogate_count; - } - return count + - scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - if (options & base64_url) { - return encode_base64(output, input, length, options); - } else { - return encode_base64(output, input, length, options); - } -} -#endif // SIMDUTF_FEATURE_BASE64 -} // namespace haswell -} // namespace simdutf - -/* begin file src/simdutf/haswell/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_HASWELL -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -#undef SIMDUTF_SIMD_HAS_BYTEMASK - -#if SIMDUTF_GCC11ORMORE // workaround for - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593 -SIMDUTF_POP_DISABLE_WARNINGS -#endif // end of workaround -/* end file src/simdutf/haswell/end.h */ -/* end file src/haswell/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_PPC64 -/* begin file src/ppc64/implementation.cpp */ -/* begin file src/simdutf/ppc64/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "ppc64" -// #define SIMDUTF_IMPLEMENTATION ppc64 -/* end file src/simdutf/ppc64/begin.h */ - -/* begin file src/ppc64/ppc64_utf16_to_utf8_tables.h */ -// Code generated automatically; DO NOT EDIT -// file generated by scripts/ppc64_convert_utf16_to_utf8.py -#ifndef PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H -#define PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H - -namespace simdutf { -namespace { -namespace tables { -namespace ppc64_utf16_to_utf8 { - -#if SIMDUTF_IS_BIG_ENDIAN -// 1 byte for length, 16 bytes for mask -const uint8_t pack_1_2_3_utf8_bytes[256][17] = { - {12, 1, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80}, - {9, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 17, 3, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 1, 0, 16, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {11, 1, 0, 16, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 0, 16, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 17, 2, 18, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 1, 0, 16, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 16, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 17, 19, 5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 0, 16, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 3, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 1, 0, 16, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 1, 0, 16, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 2, 18, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 0, 16, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 19, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {11, 1, 0, 16, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 0, 16, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 17, 3, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 1, 0, 16, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 1, 0, 16, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 16, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 17, 2, 18, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 0, 16, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 19, 4, 20, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 1, 0, 16, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 16, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 17, 3, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 1, 0, 16, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 1, 0, 16, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 2, 18, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 1, 0, 16, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 19, 21, 7, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 1, 0, 16, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 3, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 1, 0, 16, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 1, 0, 16, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 2, 18, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 0, 16, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 19, 5, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 1, 0, 16, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 3, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 1, 0, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {2, 0, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {5, 1, 0, 16, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 0, 16, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 17, 2, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 1, 0, 16, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 0, 16, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 17, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {8, 1, 0, 16, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 3, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 1, 0, 16, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 0, 16, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 17, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 1, 0, 16, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 2, 18, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 1, 0, 16, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 19, 4, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 1, 0, 16, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 1, 0, 16, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 0, 16, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 17, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {6, 1, 0, 16, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 2, 18, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 0, 16, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {4, 0, 16, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 17, 19, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {11, 1, 0, 16, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 0, 16, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 17, 3, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 1, 0, 16, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 1, 0, 16, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 16, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 17, 2, 18, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 0, 16, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 19, 5, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 1, 0, 16, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 3, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 1, 0, 16, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 0, 16, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 17, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 1, 0, 16, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 2, 18, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 1, 0, 16, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 19, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {10, 1, 0, 16, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 16, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 17, 3, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 1, 0, 16, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 1, 0, 16, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 2, 18, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 1, 0, 16, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 19, 4, 20, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 1, 0, 16, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 3, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 1, 0, 16, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 1, 0, 16, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 2, 18, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 0, 16, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 19, 21, 6, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 1, 0, 16, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 16, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 17, 3, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 1, 0, 16, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 1, 0, 16, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 2, 18, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 1, 0, 16, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 19, 5, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 0, 16, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 1, 0, 16, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 0, 16, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 17, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {6, 1, 0, 16, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 2, 18, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 0, 16, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {4, 0, 16, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 17, 19, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {9, 1, 0, 16, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 16, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 17, 3, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 1, 0, 16, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 1, 0, 16, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 2, 18, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 0, 16, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 19, 4, 20, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 0, 16, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 16, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 17, 3, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 1, 0, 16, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {4, 0, 16, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 17, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 1, 0, 16, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 0, 16, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 17, 2, 18, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 1, 0, 16, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 16, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 17, 19, 21, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, -}; -#else -// 1 byte for length, 16 bytes for mask -const uint8_t pack_1_2_3_utf8_bytes[256][17] = { - {12, 0, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80}, - {9, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {11, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, - {10, 16, 2, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 0, 1, 17, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {11, 0, 1, 17, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 1, 17, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 16, 3, 19, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 0, 1, 17, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 17, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 16, 18, 4, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 1, 17, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 2, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 1, 17, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 0, 1, 17, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 3, 19, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 1, 17, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 18, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {11, 0, 1, 17, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 1, 17, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 16, 2, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 1, 17, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 0, 1, 17, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 17, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 16, 3, 19, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 1, 17, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 18, 5, 21, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 0, 1, 17, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 17, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 16, 2, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 1, 17, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 0, 1, 17, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 3, 19, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 1, 17, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 18, 20, 6, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 0, 1, 17, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {6, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 2, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 1, 17, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 0, 1, 17, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 3, 19, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 1, 17, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 18, 4, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 0, 1, 17, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 0, 1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {2, 1, 17, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 16, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {5, 0, 1, 17, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 1, 17, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 16, 3, 19, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 0, 1, 17, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 1, 17, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 16, 18, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {8, 0, 1, 17, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 2, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 1, 17, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 1, 17, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 16, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 0, 1, 17, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 3, 19, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 0, 1, 17, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 18, 5, 21, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 0, 1, 17, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 2, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 1, 17, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 1, 17, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 16, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {6, 0, 1, 17, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 3, 19, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 1, 17, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {4, 1, 17, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 16, 18, 20, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {11, 0, 1, 17, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80}, - {8, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {10, 1, 17, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {9, 16, 2, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 1, 17, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 0, 1, 17, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 17, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 16, 3, 19, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 0, 1, 17, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 18, 4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 1, 17, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 2, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 1, 17, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {4, 1, 17, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 16, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 0, 1, 17, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 3, 19, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 0, 1, 17, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 18, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {10, 0, 1, 17, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 17, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 16, 2, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 1, 17, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 0, 1, 17, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 3, 19, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 1, 17, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 18, 5, 21, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 0, 1, 17, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 2, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 1, 17, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 0, 1, 17, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 3, 19, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 1, 17, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 18, 20, 7, 23, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {10, 0, 1, 17, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, - {7, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {9, 1, 17, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 16, 2, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 0, 1, 17, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {9, 0, 1, 17, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 3, 19, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {8, 0, 1, 17, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 18, 4, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 1, 17, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 2, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 0, 1, 17, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {1, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {3, 1, 17, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {2, 16, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {6, 0, 1, 17, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 3, 19, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 0, 1, 17, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {4, 1, 17, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 16, 18, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {9, 0, 1, 17, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 1, 17, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {7, 16, 2, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 0, 1, 17, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {8, 0, 1, 17, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 3, 19, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 0, 1, 17, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 18, 5, 21, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {8, 0, 1, 17, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {5, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {7, 1, 17, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {6, 16, 2, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 0, 1, 17, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {2, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80}, - {4, 1, 17, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {3, 16, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {7, 0, 1, 17, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80}, - {4, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {6, 1, 17, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {5, 16, 3, 19, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {6, 0, 1, 17, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {3, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, - {5, 1, 17, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80}, - {4, 16, 18, 20, 22, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80}, -}; -#endif // SIMDUTF_IS_BIG_ENDIAN -} // namespace ppc64_utf16_to_utf8 -} // namespace tables -} // unnamed namespace -} // namespace simdutf - -#endif // PPC64_SIMDUTF_UTF16_TO_UTF8_TABLES_H -/* end file src/ppc64/ppc64_utf16_to_utf8_tables.h */ - -namespace simdutf { -namespace ppc64 { -namespace { -#ifndef SIMDUTF_PPC64_H - #error "ppc64.h must be included" -#endif -using namespace simd; - -simdutf_really_inline bool is_ascii(const simd8x64 &input) { - // careful: 0x80 is not ascii. - return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); -} - -simdutf_really_inline simd8 -must_be_2_3_continuation(const simd8 prev2, - const simd8 prev3) { - simd8 is_third_byte = - prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80 - simd8 is_fourth_byte = - prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 - // Caller requires a bool (all 1's). All values resulting from the subtraction - // will be <= 64, so signed comparison is fine. - return simd8(is_third_byte | is_fourth_byte); -} - -/// ErrorReporting describes behaviour of a vectorized procedure regarding error -/// checking -enum class ErrorReporting { - precise, // the procedure will report *approximate* or *precise* error - // position - at_the_end, // the procedure will only inform about an error after scanning - // the whole input (or its significant portion) - none, // no error checking is done, we assume valid inputs -}; - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/ppc64/ppc64_validate_utf16.cpp */ -template -simd8 utf16_gather_high_bytes(const simd16 in0, - const simd16 in1) { - if (big_endian) { - const vec_u8_t pack_high = { - 0, 2, 4, 6, 8, 10, 12, 14, // in0 - 16, 18, 20, 22, 24, 26, 28, 30 // in1 - }; - - return vec_perm(vec_u8_t(in0.value), vec_u8_t(in1.value), pack_high); - } else { - const vec_u8_t pack_high = { - 1, 3, 5, 7, 9, 11, 13, 15, // in0 - 17, 19, 21, 23, 25, 27, 29, 31 // in1 - }; - - return vec_perm(vec_u8_t(in0.value), vec_u8_t(in1.value), pack_high); - } -} -/* end file src/ppc64/ppc64_validate_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF8 -/* begin file src/ppc64/ppc64_convert_latin1_to_utf8.cpp */ -/* - * reads a vector of uint16 values - * bits after 11th are ignored - * first 11 bits are encoded into utf8 - * !important! utf8_output must have at least 16 writable bytes - */ -simdutf_really_inline void -write_v_u16_11bits_to_utf8(const vector_u16 v_u16, char *&utf8_output, - const vector_u8 one_byte_bytemask, - const uint16_t one_byte_bitmask) { - - // 0b1100_0000_1000_0000 - const auto v_c080 = vector_u16(0xc080); - // 0b0011_1111_0000_0000 - const auto v_1f00 = vector_u16(0x1f00); - // 0b0000_0000_0011_1111 - const auto v_003f = vector_u16(0x003f); - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - - // t0 = [0000|0000|00bb|bbbb] - const auto t0 = v_u16 & v_003f; - // t1 = [000a|aaaa|bbbb|bb00] - const auto t1 = v_u16.shl<2>(); - // t2 = [000a|aaaa|00bb|bbbb] - const auto t2 = select(v_1f00, t1, t0); - // t3 = [110a|aaaa|10bb|bbbb] - const auto t3 = t2 | v_c080; - - // 2. merge ASCII and 2-byte codewords - const auto utf8_unpacked1 = - select(one_byte_bytemask, as_vector_u8(v_u16), as_vector_u8(t3)); - -#if SIMDUTF_IS_BIG_ENDIAN - const auto tmp = as_vector_u16(utf8_unpacked1).swap_bytes(); -#else - const auto tmp = as_vector_u16(utf8_unpacked1); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto utf8_unpacked = as_vector_u8(tmp); - - // 3. prepare bitmask for 8-bit lookup - // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a - // - LSB) - const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a - const uint16_t m1 = static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 - const uint8_t m2 = static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const auto shuffle = vector_u8::load(row + 1); - const auto utf8_packed = shuffle.lookup_16(utf8_unpacked); - - // 5. store bytes - utf8_packed.store(utf8_output); - - // 6. adjust pointers - utf8_output += row[0]; -} - -inline void write_v_u16_11bits_to_utf8(const vector_u16 v_u16, - char *&utf8_output, - const vector_u16 v_0000, - const vector_u16 v_ff80) { - // no bits set above 7th bit - const auto one_byte_bytemask = (v_u16 & v_ff80) == v_0000; - const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); - - write_v_u16_11bits_to_utf8(v_u16, utf8_output, - as_vector_u8(one_byte_bytemask), one_byte_bitmask); -} - -std::pair -ppc64_convert_latin1_to_utf8(const char *latin_input, - const size_t latin_input_length, - char *utf8_output) { - const char *end = latin_input + latin_input_length; - - const auto v_0000 = vector_u16::zero(); - const auto v_00 = vector_u8::zero(); - - // 0b1111_1111_1000_0000 - const auto v_ff80 = vector_u16(0xff80); - -#if SIMDUTF_IS_BIG_ENDIAN - const auto latin_1_half_into_u16_byte_mask = - vector_u8(16, 0, 16, 1, 16, 2, 16, 3, 16, 4, 16, 5, 16, 6, 16, 7); - const auto latin_2_half_into_u16_byte_mask = - vector_u8(16, 8, 16, 9, 16, 10, 16, 11, 16, 12, 16, 13, 16, 14, 16, 15); -#else - const auto latin_1_half_into_u16_byte_mask = - vector_u8(0, 16, 1, 16, 2, 16, 3, 16, 4, 16, 5, 16, 6, 16, 7, 16); - const auto latin_2_half_into_u16_byte_mask = - vector_u8(8, 16, 9, 16, 10, 16, 11, 16, 12, 16, 13, 16, 14, 16, 15, 16); -#endif // SIMDUTF_IS_BIG_ENDIAN - - // each latin1 takes 1-2 utf8 bytes - // slow path writes useful 8-15 bytes twice (eagerly writes 16 bytes and then - // adjust the pointer) so the last write can exceed the utf8_output size by - // 8-1 bytes by reserving 8 extra input bytes, we expect the output to have - // 8-16 bytes free - while (end - latin_input >= 16 + 8) { - // Load 16 Latin1 characters (16 bytes) into a 128-bit register - const auto v_latin = vector_u8::load(latin_input); - - if (v_latin.is_ascii()) { // ASCII fast path!!!! - v_latin.store(utf8_output); - latin_input += 16; - utf8_output += 16; - continue; - } - - // assuming a/b are bytes and A/B are uint16 of the same value - // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA - const vector_u16 v_u16_latin_1_half = - as_vector_u16(latin_1_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); - - // aaaa_aaaa_bbbb_bbbb -> BBBB_BBBB - const vector_u16 v_u16_latin_2_half = - as_vector_u16(latin_2_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); - - write_v_u16_11bits_to_utf8(v_u16_latin_1_half, utf8_output, v_0000, v_ff80); - write_v_u16_11bits_to_utf8(v_u16_latin_2_half, utf8_output, v_0000, v_ff80); - latin_input += 16; - } - - if (end - latin_input >= 16) { - // Load 16 Latin1 characters (16 bytes) into a 128-bit register - const auto v_latin = vector_u8::load(latin_input); - - if (v_latin.is_ascii()) { // ASCII fast path!!!! - v_latin.store(utf8_output); - latin_input += 16; - utf8_output += 16; - } else { - // assuming a/b are bytes and A/B are uint16 of the same value - // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA - const auto v_u16_latin_1_half = as_vector_u16( - latin_1_half_into_u16_byte_mask.lookup_32(v_latin, v_00)); - - write_v_u16_11bits_to_utf8(v_u16_latin_1_half, utf8_output, v_0000, - v_ff80); - latin_input += 8; - } - } - - return std::make_pair(latin_input, utf8_output); -} -/* end file src/ppc64/ppc64_convert_latin1_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF16 -/* begin file src/ppc64/ppc64_convert_latin1_to_utf16.cpp */ -template -size_t ppc64_convert_latin1_to_utf16(const char *latin1_input, size_t len, - char16_t *utf16_output) { - const size_t rounded_len = align_down(len); - - for (size_t i = 0; i < rounded_len; i += vector_u8::ELEMENTS) { - const auto in = vector_u8::load(&latin1_input[i]); - in.store_bytes_as_utf16(&utf16_output[i]); - } - - return rounded_len; -} -/* end file src/ppc64/ppc64_convert_latin1_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF32 -/* begin file src/ppc64/ppc64_convert_latin1_to_utf32.cpp */ -std::pair -ppc64_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - const size_t rounded_len = align_down(len); - - for (size_t i = 0; i < rounded_len; i += vector_u8::ELEMENTS) { - const auto in = vector_u8::load(&buf[i]); - in.store_bytes_as_utf32(&utf32_output[i]); - } - - return std::make_pair(buf + rounded_len, utf32_output + rounded_len); -} -/* end file src/ppc64/ppc64_convert_latin1_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_LATIN1 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/ppc64/ppc64_convert_utf8_to_latin1.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_latin1(const char *input, - uint64_t utf8_end_of_code_point_mask, - char *&latin1_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - const auto in = vector_u8::load(input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & - 0xfff; // we are only processing 12 bytes in case it is not all ASCII - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - in.store(latin1_output); - latin1_output += 12; // We wrote 12 characters. - return 12; // We consumed 12 bytes. - } - /// We do not have a fast path available, so we fallback. - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - // this indicates an invalid input: - if (idx >= 64) { - return consumed; - } - // Here we should have (idx < 64), if not, there is a bug in the validation or - // elsewhere. SIX (6) input code-code units this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small lookup - // table. - - const auto reshuffle = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); - const auto perm8 = reshuffle.lookup_32(in, vector_u8::zero()); -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm16 = as_vector_u16(perm8).swap_bytes(); -#else - const auto perm16 = as_vector_u16(perm8); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto ascii = perm16 & uint16_t(0x7f); - const auto highbyte = perm16 & uint16_t(0x1f00); - const auto composed = ascii | highbyte.shr<2>(); - - const auto latin1_packed = vector_u16::pack(composed, composed); -#if defined(__clang__) - __attribute__((aligned(16))) char buf[16]; - latin1_packed.store(buf); - memcpy(latin1_output, buf, 6); -#else - // writing 8 bytes even though we only care about the first 6 bytes. - const auto tmp = vec_u64_t(latin1_packed.value); - memcpy(latin1_output, &tmp[0], 8); -#endif - latin1_output += 6; // We wrote 6 bytes. - return consumed; -} -/* end file src/ppc64/ppc64_convert_utf8_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/ppc64/ppc64_convert_utf8_to_utf16.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -template -size_t convert_masked_utf8_to_utf16(const char *input, - uint64_t utf8_end_of_code_point_mask, - char16_t *&utf16_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - const auto in = vector_u8::load(input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - // Note: using 16 bytes is unsafe, see issue_ossfuzz_71218 - in.store_bytes_as_utf16(utf16_output); - utf16_output += 12; // We wrote 12 16-bit characters. - return 12; // We consumed 12 bytes. - } - if (((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) { - // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte - // UTF-16 code units. -#if SIMDUTF_IS_BIG_ENDIAN - const auto in16 = as_vector_u16(in); -#else - const auto in16 = as_vector_u16(in).swap_bytes(); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto lo = in16 & uint16_t(0x007f); - const auto hi = in16.shr<2>(); - - auto composed = select(uint16_t(0x1f00 >> 2), hi, lo); - if (!match_system(big_endian)) { - composed = composed.swap_bytes(); - } - - composed.store(utf16_output); - utf16_output += 8; // We wrote 16 bytes, 8 code points. - return 16; - } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte - // UTF-16 code units. There is probably a more efficient sequence, but the - // following might do. - - // AltiVec: it might be done better, for now SSE translation - - const auto sh = - vector_u8(2, 1, 0, 16, 5, 4, 3, 16, 8, 7, 6, 16, 11, 10, 9, 16); -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm = - as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); -#else - const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto b0 = perm & uint32_t(0x0000007f); - const auto b1 = select(uint32_t(0x00003f00 >> 2), perm.shr<2>(), b0); - const auto b2 = select(uint32_t(0x000f0000 >> 4), perm.shr<4>(), b1); - const auto composed = b2; - auto packed = vector_u32::pack(composed, composed); - - if (!match_system(big_endian)) { - packed = packed.swap_bytes(); - } - - packed.store(utf16_output); - utf16_output += 4; - return 12; - } - /// We do not have a fast path available, so we fallback. - - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - - if (idx < 64) { - // SIX (6) input code-code units - // this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small - // lookup table. - const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm = - as_vector_u16(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); -#else - const auto perm = as_vector_u16(sh.lookup_32(in, vector_u8::zero())); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto b0 = perm & uint16_t(0x007f); - const auto b1 = perm & uint16_t(0x1f00); - - auto composed = b0 | b1.shr<2>(); - - if (!match_system(big_endian)) { - composed = composed.swap_bytes(); - } - - composed.store(utf16_output); - utf16_output += 6; // We wrote 12 bytes, 6 code points. - } else if (idx < 145) { - // FOUR (4) input code-code units - const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm = - as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); -#else - const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto b0 = perm & uint32_t(0x0000007f); - const auto b1 = perm & uint32_t(0x00003f00); - const auto b2 = perm & uint32_t(0x000f0000); - - const auto composed = b0 | b1.shr<2>() | b2.shr<4>(); - - auto packed = vector_u32::pack(composed, composed); - - if (!match_system(big_endian)) { - packed = packed.swap_bytes(); - } - - packed.store(utf16_output); - utf16_output += 4; - } else if (idx < 209) { - // TWO (2) input code-code units - ////////////// - // There might be garbage inputs where a leading byte mascarades as a - // four-byte leading byte (by being followed by 3 continuation byte), but is - // not greater than 0xf0. This could trigger a buffer overflow if we only - // counted leading bytes of the form 0xf0 as generating surrogate pairs, - // without further UTF-8 validation. Thus we must be careful to ensure that - // only leading bytes at least as large as 0xf0 generate surrogate pairs. We - // do as at the cost of an extra mask. - ///////////// - const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm = - as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); -#else - const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto ascii = perm & uint32_t(0x00000007f); - const auto middlebyte = perm & uint32_t(0x00003f00); - const auto middlebyte_shifted = middlebyte.shr<2>(); - - auto middlehighbyte = perm & uint32_t(0x003f0000); - // correct for spurious high bit - - const auto correct = (perm & uint32_t(0x00400000)).shr<1>(); - middlehighbyte = correct ^ middlehighbyte; - const auto middlehighbyte_shifted = middlehighbyte.shr<4>(); - // We deliberately carry the leading four bits in highbyte if they are - // present, we remove them later when computing hightenbits. - const auto highbyte = perm & uint32_t(0xff000000); - const auto highbyte_shifted = highbyte.shr<6>(); - // When we need to generate a surrogate pair (leading byte > 0xF0), then - // the corresponding 32-bit value in 'composed' will be greater than - // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the - // location of the surrogate pairs. - const auto composed = - ascii | middlebyte_shifted | highbyte_shifted | middlehighbyte_shifted; - - const auto composedminus = composed - uint32_t(0x10000); - const auto lowtenbits = composedminus & uint32_t(0x3ff); - // Notice the 0x3ff mask: - const auto hightenbits = composedminus.shr<10>() & uint32_t(0x3ff); - const auto lowtenbitsadd = lowtenbits + uint32_t(0xDC00); - const auto hightenbitsadd = hightenbits + uint32_t(0xD800); - const auto lowtenbitsaddshifted = lowtenbitsadd.shl<16>(); - auto surrogates = hightenbitsadd | lowtenbitsaddshifted; - - uint32_t basic_buffer[4]; - composed.store(basic_buffer); - uint32_t surrogate_buffer[4]; - surrogates.swap_bytes().store(surrogate_buffer); - - for (size_t i = 0; i < 3; i++) { - if (basic_buffer[i] > 0x3c00000) { - const auto ch0 = uint16_t(surrogate_buffer[i] & 0xffff); - const auto ch1 = uint16_t(surrogate_buffer[i] >> 16); - if (match_system(big_endian)) { - utf16_output[1] = scalar::u16_swap_bytes(ch0); - utf16_output[0] = scalar::u16_swap_bytes(ch1); - } else { - utf16_output[1] = ch0; - utf16_output[0] = ch1; - } - utf16_output += 2; - } else { - const auto chr = uint16_t(basic_buffer[i]); - if (match_system(big_endian)) { - utf16_output[0] = chr; - } else { - utf16_output[0] = scalar::u16_swap_bytes(chr); - } - - utf16_output++; - } - } - } else { - // here we know that there is an error but we do not handle errors - } - return consumed; -} -/* end file src/ppc64/ppc64_convert_utf8_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/ppc64/ppc64_convert_utf8_to_utf32.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_utf32(const char *input, - uint64_t utf8_end_of_code_point_mask, - char32_t *&utf32_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - const auto in = vector_u8::load(input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - in.store_bytes_as_utf32(utf32_output); - utf32_output += 12; // We wrote 12 32-bit characters. - return 12; // We consumed 12 bytes. - } - if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { - // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte - // UTF-32 code units. -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm = as_vector_u16(in); -#else - const auto perm = as_vector_u16(in).swap_bytes(); -#endif // SIMDUTF_IS_BIG_ENDIAN - // in = [110aaaaa|10bbbbbb] - // t0 = [00000000|00bbbbbb] - const auto t0 = perm & uint16_t(0x007f); - - // t1 = [00110aaa|aabbbbbb] - const auto t1 = perm.shr<2>(); - const auto composed = select(uint16_t(0x1f00 >> 2), t1, t0); - - const auto composed8 = as_vector_u8(composed); - composed8.store_words_as_utf32(utf32_output); - - utf32_output += 8; // We wrote 32 bytes, 8 code points. - return 16; - } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte - // UTF-32 code units. -#if SIMDUTF_IS_BIG_ENDIAN - const auto sh = - vector_u8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11); -#else - const auto sh = - vector_u8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); - - // in = [1110aaaa|10bbbbbb|10cccccc] - - // t0 = [00000000|00000000|00cccccc] - const auto t0 = perm & uint32_t(0x0000007f); - - // t2 = [00000000|0000bbbb|bbcccccc] - const auto t1 = perm.shr<2>(); - const auto t2 = select(uint32_t(0x00003f00 >> 2), t1, t0); - - // t4 = [00000000|aaaabbbb|bbcccccc] - const auto t3 = perm.shr<4>(); - const auto t4 = select(uint32_t(0x0f0000 >> 4), t3, t2); - - t4.store(utf32_output); - utf32_output += 4; - return 12; - } - /// We do not have a fast path available, so we fallback. - - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - if (idx < 64) { - // SIX (6) input code-code units - // this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small - // lookup table. - const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm = - as_vector_u16(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); -#else - const auto perm = as_vector_u16(sh.lookup_32(in, vector_u8::zero())); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto ascii = perm & uint16_t(0x7f); - const auto highbyte = perm & uint16_t(0x1f00); - const auto composed = ascii | highbyte.shr<2>(); - - as_vector_u8(composed).store_words_as_utf32(utf32_output); - utf32_output += 6; // We wrote 12 bytes, 6 code points. - } else if (idx < 145) { - // FOUR (4) input code-code units - const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm = - as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); -#else - const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto ascii = perm & uint32_t(0x7f); - const auto middlebyte = perm & uint32_t(0x3f00); - const auto middlebyte_shifted = middlebyte.shr<2>(); - const auto highbyte = perm & uint32_t(0x0f0000); - const auto highbyte_shifted = highbyte.shr<4>(); - const auto composed = ascii | middlebyte_shifted | highbyte_shifted; - - composed.store(utf32_output); - utf32_output += 4; - } else if (idx < 209) { - // TWO (2) input code-code units - const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]); -#if SIMDUTF_IS_BIG_ENDIAN - const auto perm = - as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes(); -#else - const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero())); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto ascii = perm & uint32_t(0x0000007f); - const auto middlebyte = perm & uint32_t(0x3f00); - const auto middlebyte_shifted = middlebyte.shr<2>(); - auto middlehighbyte = perm & uint32_t(0x003f0000); - // correct for spurious high bit - const auto correct0 = perm & uint32_t(0x00400000); - const auto correct = correct0.shr<1>(); - middlehighbyte = correct ^ middlehighbyte; - const auto middlehighbyte_shifted = middlehighbyte.shr<4>(); - const auto highbyte = perm & uint32_t(0x07000000); - const auto highbyte_shifted = highbyte.shr<6>(); - const auto composed = - ascii | middlebyte_shifted | highbyte_shifted | middlehighbyte_shifted; - composed.store(utf32_output); - utf32_output += 3; - } else { - // here we know that there is an error but we do not handle errors - } - return consumed; -} -/* end file src/ppc64/ppc64_convert_utf8_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/ppc64/ppc64_convert_utf16_to_latin1.cpp */ -struct utf16_to_latin1_t { - error_code err; - const char16_t *input; - char *output; -}; - -template -utf16_to_latin1_t ppc64_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - while (end - buf >= 8) { - // Load 8 x UTF-16 characters - auto in = vector_u8::load(buf); - - // Move low bytes of UTF-16 chars to lower half of `in` - // and upper bytes to upper half of `in`. - if (!match_system(big_endian)) { - const auto perm = - vector_u8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); - in = perm.lookup_16(in); - } else { - const auto perm = - vector_u8(1, 3, 5, 7, 9, 11, 13, 15, 0, 2, 4, 6, 8, 10, 12, 14); - in = perm.lookup_16(in); - } - - // AltiVec-specific -#if defined(__clang__) - __attribute__((aligned(16))) uint64_t tmp[8]; - in.store(tmp); - #if SIMDUTF_IS_BIG_ENDIAN - memcpy(latin1_output, &tmp[0], 8); - const uint64_t upper = tmp[1]; - #else - memcpy(latin1_output, &tmp[1], 8); - const uint64_t upper = tmp[0]; - #endif // SIMDUTF_IS_BIG_ENDIAN -#else - const auto tmp = vec_u64_t(in.value); - #if SIMDUTF_IS_BIG_ENDIAN - memcpy(latin1_output, &tmp[0], 8); - const uint64_t upper = tmp[1]; - #else - memcpy(latin1_output, &tmp[1], 8); - const uint64_t upper = tmp[0]; - #endif // SIMDUTF_IS_BIG_ENDIAN -#endif // defined(__clang__) - // AltiVec - - if (simdutf_unlikely(upper)) { - uint8_t bytes[8]; - memcpy(bytes, &upper, 8); - for (size_t k = 0; k < 8; k++) { - if (bytes[k] != 0) { - return utf16_to_latin1_t{error_code::TOO_LARGE, buf + k, - latin1_output}; - } - } - } else { - // Adjust pointers for next iteration - buf += 8; - latin1_output += 8; - } - } // while - - return utf16_to_latin1_t{error_code::SUCCESS, buf, latin1_output}; -} -/* end file src/ppc64/ppc64_convert_utf16_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 -/* begin file src/ppc64/ppc64_convert_utf16_to_utf8.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ - -// Auxiliary procedure used by UTF-16 and UTF-32 into UTF-8. -// Note the pointer is passed by reference, it is updated by the procedure. -template -simdutf_really_inline void ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( - const vector_u16 in, uint16_t one_byte_bitmask, - const T one_or_two_bytes_bytemask, uint16_t one_or_two_bytes_bitmask, - char *&utf8_output) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes -#if SIMDUTF_IS_BIG_ENDIAN - const auto dup_lsb = - vector_u8(1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); -#else - const auto dup_lsb = - vector_u8(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); -#endif // SIMDUTF_IS_BIG_ENDIAN - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const auto t0 = as_vector_u16(dup_lsb.lookup_16(as_vector_u8(in))); - - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const auto t1 = t0 & uint16_t(0b0011111101111111); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const auto t2 = t1 | uint16_t(0b1000000000000000); - - // in = [aaaa|bbbb|bbcc|cccc] - // a0 = [0000|0000|0000|aaaa] - const auto a0 = in.shr<12>(); - // b0 = [aabb|bbbb|cccc|cc00] - const auto b0 = in.shl<2>(); - // s0 = [00bb|bbbb|00cc|cccc] - const auto s0 = select(uint16_t(0x3f00), b0, a0); - - // s3 = [11bb|bbbb|1110|aaaa] - const auto s3 = s0 | uint16_t(0b1100000011100000); - - const auto m0 = - ~as_vector_u16(one_or_two_bytes_bytemask) & uint16_t(0b0100000000000000); - const auto s4 = s3 ^ m0; - - // 4. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. -#if SIMDUTF_IS_BIG_ENDIAN - // Lookups produced by scripts/ppc64_convert_utf16_to_utf8.py - const auto shuffle0 = - vector_u8(1, 0, 16, 3, 2, 18, 5, 4, 20, 7, 6, 22, 9, 8, 24, 11); - const auto shuffle1 = vector_u8(10, 26, 13, 12, 28, 15, 14, 30, -1, -1, -1, - -1, -1, -1, -1, -1); -#else - const auto shuffle0 = - vector_u8(0, 1, 17, 2, 3, 19, 4, 5, 21, 6, 7, 23, 8, 9, 25, 10); - const auto shuffle1 = vector_u8(11, 27, 12, 13, 29, 14, 15, 31, -1, -1, -1, - -1, -1, -1, -1, -1); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto utf8_0 = shuffle0.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); - const auto utf8_1 = shuffle1.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); - - utf8_0.store(utf8_output); - utf8_output += 16; - utf8_1.store(utf8_output); - utf8_output += 8; - return; - } - - const uint8_t mask0 = uint8_t(mask); - - const uint8_t *row0 = - &simdutf::tables::ppc64_utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const auto shuffle0 = vector_u8::load(row0 + 1); - - const auto utf8_0 = shuffle0.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); - const uint8_t mask1 = static_cast(mask >> 8); - - const uint8_t *row1 = - &simdutf::tables::ppc64_utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const auto shuffle1 = vector_u8::load(row1 + 1) + uint8_t(8); - const auto utf8_1 = shuffle1.lookup_32(as_vector_u8(s4), as_vector_u8(t2)); - - utf8_0.store(utf8_output); - utf8_output += row0[0]; - utf8_1.store(utf8_output); - utf8_output += row1[0]; -} - -struct utf16_to_utf8_t { - error_code err; - const char16_t *input; - char *output; -}; - -/* - Returns utf16_to_utf8_t value - A scalar routine should carry on the conversion of the tail, - iff there was no error. -*/ -template -utf16_to_utf8_t ppc64_convert_utf16_to_utf8(const char16_t *buf, size_t len, - char *utf8_output) { - - const char16_t *end = buf + len; - - const auto v_f800 = vector_u16(0xf800); - const auto v_d800 = vector_u16(0xd800); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - auto in = vector_u16::load(buf); - if (not match_system(big_endian)) { - in = in.swap_bytes(); - } - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - if (in.is_ascii()) { - auto nextin = vector_u16::load(buf + vector_u16::ELEMENTS); - if (not match_system(big_endian)) { - nextin = nextin.swap_bytes(); - } - - if (nextin.is_ascii()) { - // 1. pack the bytes - const auto utf8_packed = vector_u16::pack(in, nextin); - // 2. store (16 bytes) - utf8_packed.store(utf8_output); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - - // next block is not ASCII - const auto utf8_packed = vector_u16::pack(in, in); - // 2. store (16 bytes) - utf8_packed.store(utf8_output); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - // fallback - } - - // no bits set above 7th bit - const auto one_byte_bytemask = in < uint16_t(1 << 7); - const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); - - // no bits set above 11th bit - const auto one_or_two_bytes_bytemask = in < uint16_t(1 << 11); - const uint16_t one_or_two_bytes_bitmask = - one_or_two_bytes_bytemask.to_bitmask(); - - if (one_or_two_bytes_bitmask == 0xffff) { - write_v_u16_11bits_to_utf8( - in, utf8_output, as_vector_u8(one_byte_bytemask), one_byte_bitmask); - buf += 8; - continue; - } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also to deal with situation when there is a surrogate word - // at the end of a chunk. - const auto surrogates_bytemask = (in & v_f800) == v_d800; - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = surrogates_bytemask.to_bitmask(); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { - ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( - in, one_byte_bitmask, one_or_two_bytes_bytemask, - one_or_two_bytes_bitmask, utf8_output); - - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = not match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k]) - : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = uint8_t(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = uint8_t((word >> 6) | 0b11000000); - *utf8_output++ = uint8_t((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = uint8_t((word >> 12) | 0b11100000); - *utf8_output++ = uint8_t(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = uint8_t((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = not match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return utf16_to_utf8_t{error_code::SURROGATE, buf + k - 1, - utf8_output}; - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = uint8_t((value >> 18) | 0b11110000); - *utf8_output++ = uint8_t(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = uint8_t(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = uint8_t((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return utf16_to_utf8_t{error_code::SUCCESS, buf, utf8_output}; -} -/* end file src/ppc64/ppc64_convert_utf16_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/ppc64/ppc64_convert_utf16_to_utf32.cpp */ -struct utf16_to_utf32_t { - error_code err; // error code - const char16_t *input; // last position in input buffer - char32_t *output; // last position in output buffer -}; - -template -utf16_to_utf32_t ppc64_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const char16_t *end = buf + len; - - const auto v_f800 = vector_u16::splat(0xf800); - const auto v_d800 = vector_u16::splat(0xd800); - const auto zero = vector_u8::zero(); - - while (end - buf >= vector_u16::ELEMENTS) { - auto in = vector_u16::load(buf); - if (not match_system(big_endian)) { - in = in.swap_bytes(); - } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const auto surrogates_bytemask = (in & v_f800) == v_d800; - - // bitmask = 0x0000 if there are no surrogates - const uint16_t surrogates_bitmask = surrogates_bytemask.to_bitmask(); - - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { - // case: no surrogate pairs, extend 16-bit code units to 32-bit code units -#if SIMDUTF_IS_BIG_ENDIAN - const auto lo = - vector_u8(16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7); - const auto hi = vector_u8(16, 16, 8 + 0, 8 + 1, 16, 16, 8 + 2, 8 + 3, 16, - 16, 8 + 4, 8 + 5, 16, 16, 8 + 6, 8 + 7); -#else - const auto lo = - vector_u8(0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7, 16, 16); - const auto hi = vector_u8(8 + 0, 8 + 1, 16, 16, 8 + 2, 8 + 3, 16, 16, - 8 + 4, 8 + 5, 16, 16, 8 + 6, 8 + 7, 16, 16); -#endif // SIMDUTF_IS_BIG_ENDIAN - - const auto utf32_0 = lo.lookup_32(as_vector_u8(in), zero); - const auto utf32_1 = hi.lookup_32(as_vector_u8(in), zero); - - utf32_0.store(utf32_output); - utf32_1.store(utf32_output + 4); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - const uint16_t word = not match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k]) - : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = not match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return utf16_to_utf32_t{error_code::SURROGATE, buf + k - 1, - utf32_output}; - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - - return utf16_to_utf32_t{error_code::SUCCESS, buf, utf32_output}; -} -/* end file src/ppc64/ppc64_convert_utf16_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/ppc64/ppc64_convert_utf32_to_latin1.cpp */ -enum class ErrorChecking { disabled, enabled }; - -struct utf32_to_latin1_t { - error_code err; - const char32_t *input; - char *output; -}; - -template -utf32_to_latin1_t simdutf_really_inline ppc64_convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) { - constexpr size_t N = vector_u32::ELEMENTS; - const size_t rounded_len = align_down<4 * N>(len); - - const auto high_bytes_mask = vector_u32::splat(0xFFFFFF00); - - for (size_t i = 0; i < rounded_len; i += 4 * N) { - const auto in1 = vector_u32::load(buf + 0 * N); - const auto in2 = vector_u32::load(buf + 1 * N); - const auto in3 = vector_u32::load(buf + 2 * N); - const auto in4 = vector_u32::load(buf + 3 * N); - - if (ec == ErrorChecking::enabled) { - const auto combined = in1 | in2 | in3 | in4; - const auto too_big = (combined & high_bytes_mask) != uint32_t(0); - - if (simdutf_unlikely(too_big.any())) { - // Scalar code will carry on from the beginning of the current block - // and report the exact error position. - return utf32_to_latin1_t{error_code::OTHER, buf, latin1_output}; - } - } - - // Note: element #1 contains 0, and is used to mask-out elements -#if SIMDUTF_IS_BIG_ENDIAN - const auto shlo = vector_u8(0 + 3, 4 + 3, 8 + 3, 12 + 3, 16 + 3, 20 + 3, - 24 + 3, 28 + 3, 1, 1, 1, 1, 1, 1, 1, 1); - const auto shhi = vector_u8(1, 1, 1, 1, 1, 1, 1, 1, 0 + 3, 4 + 3, 8 + 3, - 12 + 3, 16 + 3, 20 + 3, 24 + 3, 28 + 3); -#else - const auto shlo = - vector_u8(0, 4, 8, 12, 16, 20, 24, 28, 1, 1, 1, 1, 1, 1, 1, 1); - const auto shhi = - vector_u8(1, 1, 1, 1, 1, 1, 1, 1, 0, 4, 8, 12, 16, 20, 24, 28); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto lo = shlo.lookup_32(as_vector_u8(in1), as_vector_u8(in2)); - const auto hi = shhi.lookup_32(as_vector_u8(in3), as_vector_u8(in4)); - - const auto merged = lo | hi; - - merged.store(latin1_output); - latin1_output += 4 * N; - buf += 4 * N; - } - - return utf32_to_latin1_t{error_code::SUCCESS, buf, latin1_output}; -} -/* end file src/ppc64/ppc64_convert_utf32_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 -/* begin file src/ppc64/ppc64_convert_utf32_to_utf16.cpp */ -struct utf32_to_utf16_t { - error_code err; - const char32_t *input; - char16_t *output; -}; - -template -utf32_to_utf16_t ppc64_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_output) { - - const char32_t *end = buf + len; - - const auto zero = vector_u32::zero(); - const auto v_ffff0000 = vector_u32::splat(0xffff0000); - - auto forbidden_global = simd16(); - - while (end - buf >= 8) { - const auto in0 = vector_u32::load(buf); - const auto in1 = vector_u32::load(buf + vector_u32::ELEMENTS); - - const auto any_surrogate = ((in0 | in1) & v_ffff0000) != zero; - - // Check if no bits set above 15th - if (any_surrogate.is_zero()) { - // Pack UTF-32 to UTF-16 -#if SIMDUTF_IS_BIG_ENDIAN - const auto sh = big_endian ? vector_u8(2, 3, 6, 7, 10, 11, 14, 15, 18, 19, - 22, 23, 26, 27, 30, 31) - : vector_u8(3, 2, 7, 6, 11, 10, 15, 14, 19, 18, - 23, 22, 27, 26, 31, 30); -#else - const auto sh = big_endian ? vector_u8(1, 0, 5, 4, 9, 8, 13, 12, 17, 16, - 21, 20, 25, 24, 29, 28) - : vector_u8(0, 1, 4, 5, 8, 9, 12, 13, 16, 17, - 20, 21, 24, 25, 28, 29); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto packed0 = sh.lookup_32(as_vector_u8(in0), as_vector_u8(in1)); - const auto packed = as_vector_u16(packed0); - -#if SIMDUTF_IS_BIG_ENDIAN - const auto v_f800 = - big_endian ? vector_u16::splat(0xf800) : vector_u16::splat(0x00f8); - const auto v_d800 = - big_endian ? vector_u16::splat(0xd800) : vector_u16::splat(0x00d8); -#else - const auto v_f800 = - big_endian ? vector_u16::splat(0x00f8) : vector_u16::splat(0xf800); - const auto v_d800 = - big_endian ? vector_u16::splat(0x00d8) : vector_u16::splat(0xd800); -#endif // SIMDUTF_IS_BIG_ENDIAN - const auto forbidden = (packed & v_f800) == v_d800; - - switch (er) { - case ErrorReporting::precise: - if (not forbidden.is_zero()) { - // scalar procedure will rescan the portion of buffer we've just - // analysed - return utf32_to_utf16_t{error_code::OTHER, buf, utf16_output}; - } - break; - case ErrorReporting::at_the_end: - forbidden_global |= forbidden; - break; - case ErrorReporting::none: - break; - } - - packed.store(utf16_output); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 7; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return utf32_to_utf16_t{error_code::SURROGATE, buf + k, - utf16_output}; - } - *utf16_output++ = not match_system(big_endian) - ? scalar::u16_swap_bytes(uint16_t(word)) - : uint16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return utf32_to_utf16_t{error_code::TOO_LARGE, buf + k, - utf16_output}; - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (not match_system(big_endian)) { - high_surrogate = scalar::u16_swap_bytes(high_surrogate); - low_surrogate = scalar::u16_swap_bytes(low_surrogate); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } - - if (er == ErrorReporting::at_the_end) { - // check for invalid input - if (not forbidden_global.is_zero()) { - return utf32_to_utf16_t{error_code::SURROGATE, buf, utf16_output}; - } - } - - return utf32_to_utf16_t{error_code::SUCCESS, buf, utf16_output}; -} -/* end file src/ppc64/ppc64_convert_utf32_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF32 -/* begin file src/ppc64/ppc64_convert_utf32_to_utf8.cpp */ -struct utf32_to_utf8_t { - error_code err; - const char32_t *input; - char *output; -}; - -template -utf32_to_utf8_t ppc64_convert_utf32_to_utf8(const char32_t *buf, size_t len, - char *utf8_output) { - const char32_t *end = buf + len; - - const auto v_f800 = vector_u16::splat(0xf800); - const auto v_d800 = vector_u16::splat(0xd800); - - const auto v_ffff0000 = vector_u32::splat(0xffff0000); - const auto v_00000000 = vector_u32::zero(); - auto forbidden_bytemask = simd16(); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= - std::ptrdiff_t( - 16 + safety_margin)) { // buf is a char32_t pointer, each char32_t - // has 4 bytes or 32 bits, thus buf + 16 * - // char_32t = 512 bits = 64 bytes - // We load two 16 bytes registers for a total of 32 bytes or 16 characters. - // These two values can hold only 8 UTF32 chars - auto in0 = vector_u32::load(buf); - auto in1 = vector_u32::load(buf + vector_u32::ELEMENTS); - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - auto in = vector_u32::pack(in0, in1); - - // Try to apply UTF-16 => UTF-8 from ./ppc64_convert_utf16_to_utf8.cpp - - // Check for ASCII fast path - - // ASCII fast path!!!! - // We eagerly load another 32 bytes, hoping that they will be ASCII too. - // The intuition is that we try to collect 16 ASCII characters which - // requires a total of 64 bytes of input. If we fail, we just pass thirdin - // and fourthin as our new inputs. - if (in.is_ascii()) { // if the first two blocks are ASCII - const auto in2 = vector_u32::load(buf + 2 * vector_u32::ELEMENTS); - const auto in3 = vector_u32::load(buf + 3 * vector_u32::ELEMENTS); - - const auto next = vector_u32::pack(in2, in3); - if (next.is_ascii()) { - // 1. pack the bytes - const auto utf8_packed = vector_u16::pack(in, next); - // 2. store (16 bytes) - utf8_packed.store(utf8_output); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - - // `next` is not ASCII, write `in` and carry on with next - - // 1. pack the bytes - const auto utf8_packed = vector_u16::pack(in, in); - utf8_packed.store(utf8_output); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - - // Proceed with next input - in = next; - in0 = in2; - in1 = in3; - } - - // no bits set above 7th bit - const auto one_byte_bytemask = in < uint16_t(1 << 7); - const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask(); - - // no bits set above 11th bit - const auto one_or_two_bytes_bytemask = in < uint16_t(1 << 11); - const uint16_t one_or_two_bytes_bitmask = - one_or_two_bytes_bytemask.to_bitmask(); - - if (one_or_two_bytes_bitmask == 0xffff) { - write_v_u16_11bits_to_utf8( - in, utf8_output, as_vector_u8(one_byte_bytemask), one_byte_bitmask); - buf += 8; - continue; - } - - // Check for overflow in packing - const auto saturation_bytemask = ((in0 | in1) & v_ffff0000) == v_00000000; - const uint16_t saturation_bitmask = saturation_bytemask.to_bitmask(); - if (saturation_bitmask == 0xffff) { - switch (er) { - case ErrorReporting::precise: { - const auto forbidden = (in & v_f800) == v_d800; - if (forbidden.any()) { - // We return no error code, instead we force the scalar procedure - // to rescan the portion of input where we've just found an error. - return utf32_to_utf8_t{error_code::SUCCESS, buf, utf8_output}; - } - } break; - case ErrorReporting::at_the_end: - forbidden_bytemask |= (in & v_f800) == v_d800; - break; - case ErrorReporting::none: - break; - } - - ppc64_convert_utf16_to_1_2_3_bytes_of_utf8( - in, one_byte_bitmask, one_or_two_bytes_bytemask, - one_or_two_bytes_bitmask, utf8_output); - buf += 8; - } else { - // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD in the - // presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (er != ErrorReporting::none and - (word >= 0xD800 && word <= 0xDFFF)) { - return utf32_to_utf8_t{error_code::SURROGATE, buf + k, utf8_output}; - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (er != ErrorReporting::none and (word > 0x10FFFF)) { - return utf32_to_utf8_t{error_code::TOO_LARGE, buf + k, utf8_output}; - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - if (er == ErrorReporting::at_the_end) { - if (forbidden_bytemask.any()) { - return utf32_to_utf8_t{error_code::SURROGATE, buf, utf8_output}; - } - } - - return utf32_to_utf8_t{ - error_code::SUCCESS, - buf, - utf8_output, - }; -} -/* end file src/ppc64/ppc64_convert_utf32_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/ppc64/ppc64_utf8_length_from_latin1.cpp */ -template T min(T a, T b) { return a <= b ? a : b; } - -std::pair ppc64_utf8_length_from_latin1(const char *input, - size_t length) { - constexpr size_t N = vector_u8::ELEMENTS; - length = (length / N); - - size_t count = length * N; - while (length != 0) { - vector_u32 partial = vector_u32::zero(); - - // partial accumulator has 32 bits => this yields (2^31 / 16) - size_t chunk = min(length, size_t(0xffffffff / N)); - length -= chunk; - while (chunk != 0) { - auto local = vector_u8::zero(); - // local accumulator has 8 bits => this yields 255 max (we increment by 1 - // in each iteration) - const size_t n = min(chunk, size_t(255)); - chunk -= n; - for (size_t i = 0; i < n; i++) { - const auto in = vector_i8::load(input); - input += N; - - local -= as_vector_u8(in < vector_i8::splat(0)); - } - - partial = sum4bytes(local, partial); - } - - for (int i = 0; i < vector_u32::ELEMENTS; i++) { - count += size_t(partial.value[i]); - } - } - - return std::make_pair(input, count); -} -/* end file src/ppc64/ppc64_utf8_length_from_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/ppc64/ppc64_base64.cpp */ -/* - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - * - * AMD XOP specific: http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html - * Altivec has capabilites of AMD XOP (or vice versa): shuffle using 2 vectors - * and variable shifts, thus this implementation shares some code solution - * (modulo intrisic function names). - */ - -constexpr bool with_base64_std = false; -constexpr bool with_base64_url = true; -constexpr bool with_ignore_errors = true; -constexpr bool with_ignore_garbage = true; -constexpr bool with_strict_checking = false; - -// --- encoding ----------------------------------------------- - -/* - Procedure translates vector of bytes having 6-bit values - into ASCII counterparts. -*/ -template -vector_u8 encoding_translate_6bit_values(const vector_u8 input) { - // credit: Wojciech Muła - // reduce 0..51 -> 0 - // 52..61 -> 1 .. 10 - // 62 -> 11 - // 63 -> 12 - auto result = input.saturating_sub(vector_u8::splat(51)); - - // distinguish between ranges 0..25 and 26..51: - // 0 .. 25 -> remains 13 - // 26 .. 51 -> becomes 0 - const auto lt = input < vector_u8::splat(26); - result = select(as_vector_u8(lt), vector_u8::splat(13), result); - - const auto shift_LUT = - base64_url ? vector_u8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0) - : vector_u8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); - // read shift - result = result.lookup_16(shift_LUT); - - return input + result; -} - -/* - Procedure expands 12 bytes (4*3 bytes) into 16 bytes, - each byte stores 6 bits of data -*/ -template -simdutf_really_inline vector_u8 encoding_expand_6bit_fields(vector_u8 input) { -#if SIMDUTF_IS_BIG_ENDIAN - #define indices4(dx) (dx + 0), (dx + 1), (dx + 1), (dx + 2) - const auto expand_3_to_4 = vector_u8(indices4(0 * 3), indices4(1 * 3), - indices4(2 * 3), indices4(3 * 3)); - #undef indices4 - - // input = [........|ccdddddd|bbbbcccc|aaaaaabb] as uint8_t - // 3 2 1 0 - // - // in' = [aaaaaabb|bbbbcccc|bbbbcccc|ccdddddd] as uint32_t - // 0 1 1 2 - const auto in = as_vector_u32(expand_3_to_4.lookup_16(input)); - - // t0 = [00000000|00000000|00000000|00dddddd] - const auto t0 = in & uint32_t(0x0000003f); - - // t1 = [00000000|00000000|00cccccc|00dddddd] - const auto t1 = select(uint32_t(0x00003f00), in.shl<2>(), t0); - - // t2 = [00000000|00bbbbbb|00cccccc|00dddddd] - const auto t2 = select(uint32_t(0x003f0000), in.shr<4>(), t1); - - // t3 = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] - const auto t3 = select(uint32_t(0x3f000000), in.shr<2>(), t2); - - return as_vector_u8(t3); -#else - #define indices4(dx) (dx + 1), (dx + 0), (dx + 2), (dx + 1) - const auto expand_3_to_4 = vector_u8(indices4(0 * 3), indices4(1 * 3), - indices4(2 * 3), indices4(3 * 3)); - #undef indices4 - - // input = [........|ccdddddd|bbbbcccc|aaaaaabb] as uint8_t - // 3 2 1 0 - // - // in' = [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] as uint32_t - // 1 2 0 1 - const auto in = as_vector_u32(expand_3_to_4.lookup_16(input)); - - // t0 = [00dddddd|00000000|00000000|00000000] - const auto t0 = in.shl<8>() & uint32_t(0x3f000000); - - // t1 = [00dddddd|00cccccc|00000000|00000000] - const auto t1 = select(uint32_t(0x003f0000), in.shr<6>(), t0); - - // t2 = [00dddddd|00cccccc|00bbbbbb|00000000] - const auto t2 = select(uint32_t(0x00003f00), in.shl<4>(), t1); - - // t3 = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] - const auto t3 = select(uint32_t(0x0000003f), in.shr<10>(), t2); - - return as_vector_u8(t3); -#endif // SIMDUTF_IS_BIG_ENDIAN -} - -template -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - - const uint8_t *input = (const uint8_t *)src; - - uint8_t *out = (uint8_t *)dst; - - size_t i = 0; - for (; i + 52 <= srclen; i += 48) { - const auto in0 = vector_u8::load(input + i + 12 * 0); - const auto in1 = vector_u8::load(input + i + 12 * 1); - const auto in2 = vector_u8::load(input + i + 12 * 2); - const auto in3 = vector_u8::load(input + i + 12 * 3); - - const auto expanded0 = encoding_expand_6bit_fields(in0); - const auto expanded1 = encoding_expand_6bit_fields(in1); - const auto expanded2 = encoding_expand_6bit_fields(in2); - const auto expanded3 = encoding_expand_6bit_fields(in3); - - const auto base64_0 = - encoding_translate_6bit_values(expanded0); - const auto base64_1 = - encoding_translate_6bit_values(expanded1); - const auto base64_2 = - encoding_translate_6bit_values(expanded2); - const auto base64_3 = - encoding_translate_6bit_values(expanded3); - - base64_0.store(out); - out += 16; - - base64_1.store(out); - out += 16; - - base64_2.store(out); - out += 16; - - base64_3.store(out); - out += 16; - } - for (; i + 16 <= srclen; i += 12) { - const auto in = vector_u8::load(input + i); - const auto expanded = encoding_expand_6bit_fields(in); - const auto base64 = encoding_translate_6bit_values(expanded); - - base64.store(out); - out += 16; - } - - return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, - srclen - i, options); -} - -// --- decoding ----------------------------------------------- - -static simdutf_really_inline void compress(const vector_u8 data, uint16_t mask, - char *output) { - if (mask == 0) { - data.store(output); - return; - } - - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - -#if SIMDUTF_IS_BIG_ENDIAN - vec_u64_t tmp = { - tables::base64::thintable_epi8[mask2], - tables::base64::thintable_epi8[mask1], - }; - - auto shufmask = vector_u8(vec_reve(vec_u8_t(tmp))); - - // we increment by 0x08 the second half of the mask - shufmask = - shufmask + vector_u8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); -#else - vec_u64_t tmp = { - tables::base64::thintable_epi8[mask1], - tables::base64::thintable_epi8[mask2], - }; - - auto shufmask = vector_u8(vec_u8_t(tmp)); - - // we increment by 0x08 the second half of the mask - shufmask = - shufmask + vector_u8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); -#endif // SIMDUTF_IS_BIG_ENDIAN - - // this is the version "nearly pruned" - const auto pruned = shufmask.lookup_16(data); - // we still need to put the two halves together. - // we compute the popcount of the first half: - const int pop1 = tables::base64::BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - const auto compactmask = - vector_u8::load(tables::base64::pshufb_combine_table + pop1 * 8); - - const auto answer = compactmask.lookup_16(pruned); - - answer.store(output); -} - -static simdutf_really_inline vector_u8 decoding_pack(vector_u8 input) { -#if SIMDUTF_IS_BIG_ENDIAN - // in = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] - // want = [00000000|aaaaaabb|bbbbcccc|ccdddddd] - - auto in = as_vector_u16(input); - // t0 = [00??aaaa|aabbbbbb|00??cccc|ccdddddd] - const auto t0 = in.shr<2>(); - const auto t1 = select(uint16_t(0x0fc0), t0, in); - - // t0 = [00??????|aaaaaabb|bbbbcccc|ccdddddd] - const auto t2 = as_vector_u32(t1); - const auto t3 = t2.shr<4>(); - const auto t4 = select(uint32_t(0x00fff000), t3, t2); - - const auto tmp = as_vector_u8(t4); - - const auto shuffle = - vector_u8(1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 0, 0, 0, 0); - - const auto t = shuffle.lookup_16(tmp); - - return t; -#else - // in = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] - // want = [00000000|aaaaaabb|bbbbcccc|ccdddddd] - - auto u = as_vector_u32(input).swap_bytes(); - - auto in = vector_u16((vec_u16_t)u.value); - // t0 = [00??aaaa|aabbbbbb|00??cccc|ccdddddd] - const auto t0 = in.shr<2>(); - const auto t1 = select(uint16_t(0x0fc0), t0, in); - - // t0 = [00??????|aaaaaabb|bbbbcccc|ccdddddd] - const auto t2 = as_vector_u32(t1); - const auto t3 = t2.shr<4>(); - const auto t4 = select(uint32_t(0x00fff000), t3, t2); - - const auto tmp = as_vector_u8(t4); - - const auto shuffle = - vector_u8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 0, 0, 0, 0); - - const auto t = shuffle.lookup_16(tmp); - - return t; -#endif // SIMDUTF_IS_BIG_ENDIAN -} -static simdutf_really_inline void base64_decode(char *out, vector_u8 input) { - const auto expanded = decoding_pack(input); - expanded.store(out); -} - -static simdutf_really_inline void base64_decode_block(char *out, - const char *src) { - base64_decode(out + 12 * 0, vector_u8::load(src + 0 * 16)); - base64_decode(out + 12 * 1, vector_u8::load(src + 1 * 16)); - base64_decode(out + 12 * 2, vector_u8::load(src + 2 * 16)); - base64_decode(out + 12 * 3, vector_u8::load(src + 3 * 16)); -} - -static simdutf_really_inline void base64_decode_block_safe(char *out, - const char *src) { - base64_decode(out + 12 * 0, vector_u8::load(src + 0 * 16)); - base64_decode(out + 12 * 1, vector_u8::load(src + 1 * 16)); - base64_decode(out + 12 * 2, vector_u8::load(src + 2 * 16)); - - char buffer[16]; - base64_decode(buffer, vector_u8::load(src + 3 * 16)); - std::memcpy(out + 36, buffer, 12); -} - -// ---base64 decoding::block64 class -------------------------- - -class block64 { - simd8x64 b; - -public: - simdutf_really_inline block64(const char *src) : b(load_block(src)) {} - simdutf_really_inline block64(const char16_t *src) : b(load_block(src)) {} - -private: - // The caller of this function is responsible to ensure that there are 64 - // bytes available from reading at src. The data is read into a block64 - // structure. - static simdutf_really_inline simd8x64 load_block(const char *src) { - const auto v0 = vector_u8::load(src + 16 * 0); - const auto v1 = vector_u8::load(src + 16 * 1); - const auto v2 = vector_u8::load(src + 16 * 2); - const auto v3 = vector_u8::load(src + 16 * 3); - - return simd8x64(v0, v1, v2, v3); - } - - // The caller of this function is responsible to ensure that there are 128 - // bytes available from reading at src. The data is read into a block64 - // structure. - static simdutf_really_inline simd8x64 - load_block(const char16_t *src) { - const auto m1 = vector_u16::load(src + 8 * 0); - const auto m2 = vector_u16::load(src + 8 * 1); - const auto m3 = vector_u16::load(src + 8 * 2); - const auto m4 = vector_u16::load(src + 8 * 3); - const auto m5 = vector_u16::load(src + 8 * 4); - const auto m6 = vector_u16::load(src + 8 * 5); - const auto m7 = vector_u16::load(src + 8 * 6); - const auto m8 = vector_u16::load(src + 8 * 7); - - return simd8x64(vector_u16::pack(m1, m2), vector_u16::pack(m3, m4), - vector_u16::pack(m5, m6), - vector_u16::pack(m7, m8)); - } - -public: - template - static inline uint16_t to_base64_mask(vector_u8 &src, uint16_t &error) { - const auto ascii_space_tbl = - vector_u8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, - 0xc, 0xd, 0x0, 0x0); - - // credit: aqrit - const auto delta_asso = - base64_url ? vector_u8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, - 0x0, 0x0, 0x0, 0xF, 0x0, 0xF) - : vector_u8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); - - const auto delta_values = - base64_url ? vector_u8(0x0, 0x0, 0x0, 0x13, 0x4, 0xBF, 0xBF, 0xB9, 0xB9, - 0x0, 0x11, 0xC3, 0xBF, 0xE0, 0xB9, 0xB9) - : vector_u8(0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9, - 0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9); - - const auto check_asso = - base64_url ? vector_u8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x3, 0x7, 0xB, 0xE, 0xB, 0x6) - : vector_u8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); - - const auto check_values = - base64_url ? vector_u8(0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xB6, 0xA6, - 0xB5, 0xA1, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80) - : vector_u8(0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6, - 0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80); - - const auto shifted = src.shr<3>(); - - const auto delta_hash = avg(src.lookup_16(delta_asso), shifted); - const auto check_hash = avg(src.lookup_16(check_asso), shifted); - - const auto out = as_vector_i8(delta_hash.lookup_16(delta_values)) - .saturating_add(as_vector_i8(src)); - const auto chk = as_vector_i8(check_hash.lookup_16(check_values)) - .saturating_add(as_vector_i8(src)); - - const uint16_t mask = chk.to_bitmask(); - if (!ignore_garbage && mask) { - const auto ascii = src.lookup_16(ascii_space_tbl); - const auto ascii_space = (ascii == src); - error = (mask ^ ascii_space.to_bitmask()); - } - src = out; - - return mask; - } - - template - simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { - uint16_t err0 = 0; - uint16_t err1 = 0; - uint16_t err2 = 0; - uint16_t err3 = 0; - uint64_t m0 = to_base64_mask(b.chunks[0], err0); - uint64_t m1 = to_base64_mask(b.chunks[1], err1); - uint64_t m2 = to_base64_mask(b.chunks[2], err2); - uint64_t m3 = to_base64_mask(b.chunks[3], err3); - - if (!ignore_garbage) { - *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | - ((uint64_t)err3 << 48); - } - return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); - } - - simdutf_really_inline void copy_block(char *output) { - b.store(reinterpret_cast(output)); - } - - simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { - uint64_t nmask = ~mask; - compress(b.chunks[0], uint16_t(mask), output); - compress(b.chunks[1], uint16_t(mask >> 16), - output + count_ones(nmask & 0xFFFF)); - compress(b.chunks[2], uint16_t(mask >> 32), - output + count_ones(nmask & 0xFFFFFFFF)); - compress(b.chunks[3], uint16_t(mask >> 48), - output + count_ones(nmask & 0xFFFFFFFFFFFFULL)); - return count_ones(nmask); - } - - simdutf_really_inline void base64_decode_block(char *out) { - base64_decode(out + 12 * 0, b.chunks[0]); - base64_decode(out + 12 * 1, b.chunks[1]); - base64_decode(out + 12 * 2, b.chunks[2]); - base64_decode(out + 12 * 3, b.chunks[3]); - } - - simdutf_really_inline void base64_decode_block_safe(char *out) { - base64_decode(out + 12 * 0, b.chunks[0]); - base64_decode(out + 12 * 1, b.chunks[1]); - base64_decode(out + 12 * 2, b.chunks[2]); - char buffer[16]; - base64_decode(buffer, b.chunks[3]); - std::memcpy(out + 12 * 3, buffer, 12); - } -}; -/* end file src/ppc64/ppc64_base64.cpp */ -#endif // SIMDUTF_FEATURE_BASE64 - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf - -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/generic/buf_block_reader.h */ -namespace simdutf { -namespace ppc64 { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with -// spaces -template struct buf_block_reader { -public: - simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdutf_really_inline size_t block_index(); - simdutf_really_inline bool has_full_block() const; - simdutf_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 - * (in which case this function fills the buffer with spaces and returns 0. In - * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder - * block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdutf_really_inline size_t get_remainder(uint8_t *dst) const; - simdutf_really_inline void advance(); - -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text_64(const uint8_t *text) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text(const simd8x64 &in) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - in.store(reinterpret_cast(buf)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - if (buf[i] < ' ') { - buf[i] = '_'; - } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdutf_unused static char *format_mask(uint64_t mask) { - static char *buf = reinterpret_cast(malloc(64 + 1)); - for (size_t i = 0; i < 64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdutf_really_inline -buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) - : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, - idx{0} {} - -template -simdutf_really_inline size_t buf_block_reader::block_index() { - return idx; -} - -template -simdutf_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdutf_really_inline const uint8_t * -buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdutf_really_inline size_t -buf_block_reader::get_remainder(uint8_t *dst) const { - if (len == idx) { - return 0; - } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, - STEP_SIZE); // std::memset STEP_SIZE because it is more efficient - // to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdutf_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/buf_block_reader.h */ -/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_validation { - -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = {255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 0b11110000u - 1, - 0b11100000u - 1, - 0b11000000u - 1}; - const simd8 max_value( - &max_array[sizeof(max_array) - sizeof(simd8)]); - return input.gt_bits(max_value); -} - -struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error |= this->prev_incomplete; - } - - simdutf_really_inline void check_next_input(const simd8x64 &input) { - if (simdutf_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = - is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; - } - } - - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -/* begin file src/generic/utf8_validation/utf8_validator.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_validation { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return !c.errors(); -} - -bool generic_validate_utf8(const char *input, size_t length) { - return generic_validate_utf8( - reinterpret_cast(input), length); -} - -/** - * Validates that the string is actual UTF-8 and stops on errors. - */ -template -result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input + count), length - count); - res.count += count; - return res; - } - reader.advance(); - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input) + count, length - count); - res.count += count; - return res; - } else { - return result(error_code::SUCCESS, length); - } -} - -result generic_validate_utf8_with_errors(const char *input, size_t length) { - return generic_validate_utf8_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_utf16 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - template - simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); - if (howmany == 0) { - return 0; - } - utf16_output += howmany; - } - return utf16_output - start; - } - - template - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf16_output += res.count; - } - } - return result(error_code::SUCCESS, utf16_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_utf16 { - -using namespace simd; - -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_utf32 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // we have an error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); - if (howmany == 0) { - return 0; - } - utf32_output += howmany; - } - return utf32_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf32_output += res.count; - } - } - return result(error_code::SUCCESS, utf32_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_utf32 { - -using namespace simd; - -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { - size_t pos = 0; - char32_t *start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - size_t max_starting_point = (pos + 64) - 12; - while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - } - } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/generic/utf8.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); - } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} - -#ifdef SIMDUTF_SIMD_HAS_BYTEMASK -simdutf_really_inline size_t count_code_points_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 4; - - size_t pos = 0; - size_t count = 0; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - size_t iterations = 0; - for (; pos + 4 * N <= size; pos += 4 * N) { - const auto input0 = - simd8::load(reinterpret_cast(in + pos + 0 * N)); - const auto input1 = - simd8::load(reinterpret_cast(in + pos + 1 * N)); - const auto input2 = - simd8::load(reinterpret_cast(in + pos + 2 * N)); - const auto input3 = - simd8::load(reinterpret_cast(in + pos + 3 * N)); - const auto mask0 = input0 > int8_t(-65); - const auto mask1 = input1 > int8_t(-65); - const auto mask2 = input2 > int8_t(-65); - const auto mask3 = input3 > int8_t(-65); - - local -= vector_u8(mask0); - local -= vector_u8(mask1); - local -= vector_u8(mask2); - local -= vector_u8(mask3); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} -#endif // SIMDUTF_SIMD_HAS_BYTEMASK - -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/generic/utf16.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf16 { - -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); - count += count_ones(not_pair) / 2; - } - return count + - scalar::utf16::count_code_points(in + pos, size - pos); -} - -template -simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t ascii_mask = input.lteq(0x7F); - uint64_t twobyte_mask = input.lteq(0x7FF); - uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); - - size_t ascii_count = count_ones(ascii_mask) / 2; - size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; - size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; - size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; - count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + - ascii_count; - } - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} - -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; - - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; - } - - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf16.h */ -/* begin file src/generic/validate_utf16.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf16 { -/* - UTF-16 validation - -------------------------------------------------- - - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We are going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ -template -const result validate_utf16_with_errors(const char16_t *input, size_t size) { - if (simdutf_unlikely(size == 0)) { - return result(error_code::SUCCESS, 0); - } - - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 - // and yields a single vector having only higher bytes of characters. - const auto in = utf16_gather_high_bytes(in0, in1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/validate_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 -/* begin file src/generic/utf32.h */ -#include - -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf32 { - -template T min(T a, T b) { return a <= b ? a : b; } - -simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, - size_t length) { - using vector_u32 = simd32; - - const char32_t *start = input; - - // we add up to three ones in a single iteration (see the vectorized loop in - // section #2 below) - const size_t max_increment = 3; - - const size_t N = vector_u32::ELEMENTS; - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - const auto v_0000007f = vector_u32::splat(0x0000007f); - const auto v_000007ff = vector_u32::splat(0x000007ff); - const auto v_0000ffff = vector_u32::splat(0x0000ffff); -#else - const auto v_ffffff80 = vector_u32::splat(0xffffff80); - const auto v_fffff800 = vector_u32::splat(0xfffff800); - const auto v_ffff0000 = vector_u32::splat(0xffff0000); - const auto one = vector_u32::splat(1); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - size_t counter = 0; - - // 1. vectorized loop unrolled 4 times - { - // we use vector of uint32 counters, this is why this limit is used - const size_t max_iterations = - std::numeric_limits::max() / (max_increment * 4); - size_t blocks = length / (N * 4); - length -= blocks * (N * 4); - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - simd32 acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in0 = vector_u32(input + 0 * N); - const auto in1 = vector_u32(input + 1 * N); - const auto in2 = vector_u32(input + 2 * N); - const auto in3 = vector_u32(input + 3 * N); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in0 > v_0000007f); - acc -= as_vector_u32(in1 > v_0000007f); - acc -= as_vector_u32(in2 > v_0000007f); - acc -= as_vector_u32(in3 > v_0000007f); - - acc -= as_vector_u32(in0 > v_000007ff); - acc -= as_vector_u32(in1 > v_000007ff); - acc -= as_vector_u32(in2 > v_000007ff); - acc -= as_vector_u32(in3 > v_000007ff); - - acc -= as_vector_u32(in0 > v_0000ffff); - acc -= as_vector_u32(in1 > v_0000ffff); - acc -= as_vector_u32(in2 > v_0000ffff); - acc -= as_vector_u32(in3 > v_0000ffff); -#else - acc += min(one, in0 & v_ffffff80); - acc += min(one, in1 & v_ffffff80); - acc += min(one, in2 & v_ffffff80); - acc += min(one, in3 & v_ffffff80); - - acc += min(one, in0 & v_fffff800); - acc += min(one, in1 & v_fffff800); - acc += min(one, in2 & v_fffff800); - acc += min(one, in3 & v_fffff800); - - acc += min(one, in0 & v_ffff0000); - acc += min(one, in1 & v_ffff0000); - acc += min(one, in2 & v_ffff0000); - acc += min(one, in3 & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += 4 * N; - } - - counter += acc.sum(); - } - } - - // 2. vectorized loop for tail - { - const size_t max_iterations = - std::numeric_limits::max() / max_increment; - size_t blocks = length / N; - length -= blocks * N; - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - auto acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in = vector_u32(input); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in > v_0000007f); - acc -= as_vector_u32(in > v_000007ff); - acc -= as_vector_u32(in > v_0000ffff); -#else - acc += min(one, in & v_ffffff80); - acc += min(one, in & v_fffff800); - acc += min(one, in & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += N; - } - - counter += acc.sum(); - } - } - - const size_t consumed = input - start; - if (consumed != 0) { - // We don't count 0th bytes in the vectorized loops above, this - // is why we need to count them in the end. - counter += consumed; - } - - return counter + scalar::utf32::utf8_length_from_utf32(input, length); -} - -} // namespace utf32 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf32.h */ -/* begin file src/generic/validate_utf32.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf32 { - -simdutf_really_inline bool validate(const char32_t *input, size_t size) { - if (simdutf_unlikely(size == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return true; - } - - const char32_t *end = input + size; - - using vector_u32 = simd32; - - const auto standardmax = vector_u32::splat(0x10ffff); - const auto offset = vector_u32::splat(0xffff2000); - const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); - auto currentmax = vector_u32::zero(); - auto currentoffsetmax = vector_u32::zero(); - - constexpr size_t N = vector_u32::ELEMENTS; - - while (input + N < end) { - auto in = vector_u32(input); - if (!match_system(endianness::BIG)) { - in.swap_bytes(); - } - - currentmax = max(currentmax, in); - currentoffsetmax = max(currentoffsetmax, in + offset); - input += N; - } - - const auto too_large = currentmax > standardmax; - if (too_large.any()) { - return false; - } - - const auto surrogate = currentoffsetmax > standardoffsetmax; - if (surrogate.any()) { - return false; - } - - return scalar::utf32::validate(input, end - input); -} - -simdutf_really_inline result validate_with_errors(const char32_t *input, - size_t size) { - if (simdutf_unlikely(size == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return result(error_code::SUCCESS, 0); - } - - const char32_t *start = input; - const char32_t *end = input + size; - - using vector_u32 = simd32; - - const auto standardmax = vector_u32::splat(0x10ffff + 1); - const auto surrogate_mask = vector_u32::splat(0xfffff800); - const auto surrogate_byte = vector_u32::splat(0x0000d800); - - constexpr size_t N = vector_u32::ELEMENTS; - - while (input + N < end) { - auto in = vector_u32(input); - if (!match_system(endianness::BIG)) { - in.swap_bytes(); - } - - const auto too_large = in >= standardmax; - const auto surrogate = (in & surrogate_mask) == surrogate_byte; - - const auto combined = too_large | surrogate; - if (simdutf_unlikely(combined.any())) { - const size_t consumed = input - start; - auto sr = scalar::utf32::validate_with_errors(input, end - input); - sr.count += consumed; - - return sr; - } - - input += N; - } - - const size_t consumed = input - start; - auto sr = scalar::utf32::validate_with_errors(input, end - input); - sr.count += consumed; - - return sr; -} - -} // namespace utf32 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/validate_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_ASCII -/* begin file src/generic/ascii_validation.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace ascii_validation { - -bool generic_validate_ascii(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} - -result generic_validate_ascii_with_errors(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } - reader.advance(); - - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} - -} // namespace ascii_validation -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/ascii_validation.h */ -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // For UTF-8 to Latin 1, we can allow any ASCII character, and any - // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or - // 0b11000010 and nothing else. - // - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - constexpr const uint8_t FORBIDDEN = 0xff; - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - FORBIDDEN, - // 1110____ ________ - FORBIDDEN, - // 1111____ ________ - FORBIDDEN); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - FORBIDDEN, - // ____0101 ________ - FORBIDDEN, - // ____011_ ________ - FORBIDDEN, FORBIDDEN, - - // ____1___ ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, - // ____1101 ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 16; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); - if (howmany == 0) { - return 0; - } - latin1_output += howmany; - } - return latin1_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - latin1_output += res.count; - } - } - return result(error_code::SUCCESS, latin1_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline size_t convert_valid(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last - // 16 bytes, and if the data is valid, then it is entirely safe because 16 - // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally - // assume that you have valid UTF-8 input, so we are going to go back from the - // end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (pos < size) { - size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, - latin1_output); - latin1_output += howmany; - } - return latin1_output - start; -} - -} // namespace utf8_to_latin1 -} // namespace -} // namespace ppc64 -} // namespace simdutf - // namespace simdutf -/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/generic/base64.h */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ -namespace simdutf { -namespace ppc64 { -namespace { -namespace base64 { - -/* - The following template function implements API for Base64 decoding. - - An implementation is responsible for providing the `block64` type and - associated methods that perform actual conversion. Please refer - to any vectorized implementation to learn the API of these procedures. -*/ -template -full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (!ignore_garbage && srclen > 0 && - scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - char *end_of_safe_64byte_zone = - (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; - - const chartype *const srcinit = src; - const char *const dstinit = dst; - const chartype *const srcend = src + srclen; - - constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block_size must be at least two"); - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b(src); - src += 64; - uint64_t error = 0; - const uint64_t badcharmask = - b.to_base64_mask(&error); - if (!ignore_garbage && error) { - src -= 64; - const size_t error_offset = trailing_zeroes(error); - return {error_code::INVALID_BASE64_CHARACTER, - size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; - } - if (badcharmask != 0) { - bufferptr += b.compress_block(badcharmask, bufferptr); - } else if (bufferptr != buffer) { - b.copy_block(bufferptr); - bufferptr += 64; - } else { - if (dst >= end_of_safe_64byte_zone) { - b.base64_decode_block_safe(dst); - } else { - b.base64_decode_block(dst); - } - dst += 48; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 2); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); - } else { - base64_decode_block(dst, buffer + (block_size - 2) * 64); - } - dst += 48; - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } - } - - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { - - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if (!ignore_garbage && - (!scalar::base64::is_eight_byte(*src) || val > 64)) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } - } - - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer_start); - } else { - base64_decode_block(dst, buffer_start); - } - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; -#if !SIMDUTF_IS_BIG_ENDIAN - triple = scalar::u32_swap_bytes(triple); -#endif - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; -#if !SIMDUTF_IS_BIG_ENDIAN - triple = scalar::u32_swap_bytes(triple); -#endif - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - if (!ignore_garbage) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - } else { - while (to_base64[uint8_t(*(src - 1))] >= 64) { - src--; - } - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - r.input_count += size_t(src - srcinit); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - return r; - } else { - r.output_count += size_t(dst - dstinit); - } - if (!ignore_garbage && last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } - } - return r; - } - if (!ignore_garbage && equalsigns > 0) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } - } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} - -} // namespace base64 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf -/* end file src/generic/base64.h */ -#endif // SIMDUTF_FEATURE_BASE64 - -/* begin file src/ppc64/templates.cpp */ -/* - Template `convert_impl` implements generic conversion routine between - different encodings. Procedure returns the number of written elements, - or zero in the case of error. - - Parameters: - * VectorizedConvert - vectorized procedure that returns structure having - three fields: error_code (err), const Source* (input), Destination* - (output) - * ScalarConvert - scalar procedure that carries on conversion of tail - * Source - type of input char (like char16_t, char) - * Destination - type of input char -*/ -template -size_t convert_impl(VectorizedConvert vectorized_convert, - ScalarConvert scalar_convert, const Source *buf, size_t len, - Destination *output) { - const auto vr = vectorized_convert(buf, len, output); - const size_t consumed = vr.input - buf; - const size_t written = vr.output - output; - if (vr.err != simdutf::error_code::SUCCESS) { - if (vr.err == simdutf::error_code::OTHER) { - // Vectorized procedure detected an error, but does not know - // exact position. The scalar procedure rescan the portion of - // input and figure out where the error is located. - return scalar_convert(vr.input, len - consumed, vr.output); - } - return 0; - } - - if (consumed == len) { - return written; - } - - const auto ret = scalar_convert(vr.input, len - consumed, vr.output); - if (ret == 0) { - return 0; - } - - return written + ret; -} - -/* - Template `convert_with_errors_impl` implements generic conversion routine - between different encodings. Procedure returns a `result` instance --- - please refer to its documentation for details. - - Parameters: - * VectorizedConvert - vectorized procedure that returns structure having - three fields: error_code (err), const Source* (input), Destination* - (output) - * ScalarConvert - scalar procedure that carries on conversion of tail - * Source - type of input char (like char16_t, char) - * Destination - type of input char -*/ -template -simdutf::result convert_with_errors_impl(VectorizedConvert vectorized_convert, - ScalarConvert scalar_convert, - const Source *buf, size_t len, - Destination *output) { - - const auto vr = vectorized_convert(buf, len, output); - const size_t consumed = vr.input - buf; - const size_t written = vr.output - output; - if (vr.err != simdutf::error_code::SUCCESS) { - if (vr.err == simdutf::error_code::OTHER) { - // Vectorized procedure detected an error, but does not know - // exact position. The scalar procedure rescan the portion of - // input and figure out where the error is located. - auto sr = scalar_convert(vr.input, len - consumed, vr.output); - sr.count += consumed; - return sr; - } - return simdutf::result(vr.err, consumed); - } - - if (consumed == len) { - return simdutf::result(simdutf::error_code::SUCCESS, written); - } - - simdutf::result sr = scalar_convert(vr.input, len - consumed, vr.output); - if (sr.is_ok()) { - sr.count += written; - } else { - sr.count += consumed; - } - - return sr; -} -/* end file src/ppc64/templates.cpp */ - -#ifdef SIMDUTF_INTERNAL_TESTS - #if SIMDUTF_FEATURE_BASE64 - #include "ppc64_base64_internal_tests.cpp" - #endif // SIMDUTF_FEATURE_BASE64 -#endif // SIMDUTF_INTERNAL_TESTS -// -// Implementation-specific overrides -// -namespace simdutf { -namespace ppc64 { - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - int out = 0; - // todo: reimplement as a one-pass algorithm. - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } - } - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return ppc64::utf8_validation::generic_validate_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return ppc64::utf8_validation::generic_validate_utf8_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return ppc64::ascii_validation::generic_validate_ascii(buf, len); -} - -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return ppc64::ascii_validation::generic_validate_ascii_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - const auto res = - ppc64::utf16::validate_utf16_with_errors(buf, len); - if (res.is_err()) { - return false; - } - - if (res.count != len) { - return scalar::utf16::validate(buf + res.count, - len - res.count); - } - - return true; -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - return validate_utf16be_with_errors(buf, len).is_ok(); -} - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} - -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - const auto res = - ppc64::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - auto scalar = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - scalar.count += res.count; - return scalar; - } - - return res; -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - const auto res = - ppc64::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - auto scalar = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - scalar.count += res.count; - return scalar; - } - - return res; -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - return utf32::validate(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - return utf32::validate_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - const auto ret = ppc64_convert_latin1_to_utf8(buf, len, utf8_output); - size_t converted_chars = ret.second - utf8_output; - - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - size_t n = - ppc64_convert_latin1_to_utf16(buf, len, utf16_output); - if (n < len) { - n += scalar::latin1_to_utf16::convert(buf + n, len - n, - utf16_output + n); - } - - return n; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - size_t n = - ppc64_convert_latin1_to_utf16(buf, len, utf16_output); - if (n < len) { - n += scalar::latin1_to_utf16::convert(buf + n, len - n, - utf16_output + n); - } - - return n; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - const auto ret = ppc64_convert_latin1_to_utf32(buf, len, utf32_output); - if (ret.first != buf + len) { - const size_t processed = ret.first - buf; - scalar::latin1_to_utf32::convert(ret.first, len - processed, ret.second); - } - - return len; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert_with_errors(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return ppc64::utf8_to_latin1::convert_valid(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(buf, len, utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert(buf, len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return utf8_to_utf32::convert_valid(input, size, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - - return convert_impl(ppc64_convert_utf16_to_latin1, - scalar::utf16_to_latin1::convert, buf, - len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - - return convert_impl(ppc64_convert_utf16_to_latin1, - scalar::utf16_to_latin1::convert, buf, - len, latin1_output); -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - - return convert_with_errors_impl( - ppc64_convert_utf16_to_latin1, - scalar::utf16_to_latin1::convert_with_errors, buf, - len, latin1_output); -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - - return convert_with_errors_impl( - ppc64_convert_utf16_to_latin1, - scalar::utf16_to_latin1::convert_with_errors, buf, len, - latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: we could provide an optimized function. - return convert_utf16be_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: we could provide an optimized function. - return convert_utf16le_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - - return convert_impl(ppc64_convert_utf16_to_utf8, - scalar::utf16_to_utf8::convert, buf, - len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - - return convert_impl(ppc64_convert_utf16_to_utf8, - scalar::utf16_to_utf8::convert, buf, len, - utf8_output); -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - - return convert_with_errors_impl( - ppc64_convert_utf16_to_utf8, - scalar::utf16_to_utf8::convert_with_errors, buf, len, - utf8_output); -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - - return convert_with_errors_impl( - ppc64_convert_utf16_to_utf8, - scalar::utf16_to_utf8::convert_with_errors, buf, len, - utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return convert_impl(ppc64_convert_utf32_to_latin1, - scalar::utf32_to_latin1::convert, buf, len, - latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return convert_with_errors_impl( - ppc64_convert_utf32_to_latin1, - scalar::utf32_to_latin1::convert_with_errors, buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - return convert_impl(ppc64_convert_utf32_to_latin1, - scalar::utf32_to_latin1::convert, buf, len, - latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_impl(ppc64_convert_utf32_to_utf8, - scalar::utf32_to_utf8::convert, buf, len, utf8_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_with_errors_impl( - ppc64_convert_utf32_to_utf8, - scalar::utf32_to_utf8::convert_with_errors, buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_impl(ppc64_convert_utf32_to_utf8, - scalar::utf32_to_utf8::convert, buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - - return convert_impl(ppc64_convert_utf32_to_utf16, - scalar::utf32_to_utf16::convert, buf, - len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - - return convert_impl( - ppc64_convert_utf32_to_utf16, - scalar::utf32_to_utf16::convert, buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - - return convert_with_errors_impl( - ppc64_convert_utf32_to_utf16, - scalar::utf32_to_utf16::convert_with_errors, buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - - return convert_with_errors_impl( - ppc64_convert_utf32_to_utf16, - scalar::utf32_to_utf16::convert_with_errors, buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - - return convert_impl( - ppc64_convert_utf32_to_utf16, - scalar::utf32_to_utf16::convert, buf, len, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - - return convert_impl( - ppc64_convert_utf32_to_utf16, - scalar::utf32_to_utf16::convert, buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_impl(ppc64_convert_utf16_to_utf32, - scalar::utf16_to_utf32::convert, buf, - len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_impl(ppc64_convert_utf16_to_utf32, - scalar::utf16_to_utf32::convert, buf, - len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_with_errors_impl( - ppc64_convert_utf16_to_utf32, - scalar::utf16_to_utf32::convert_with_errors, buf, len, - utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_with_errors_impl( - ppc64_convert_utf16_to_utf32, - scalar::utf16_to_utf32::convert_with_errors, buf, len, - utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16le_to_utf32(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16be_to_utf32(buf, len, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - utf16::change_endianness_utf16(input, length, output); -} - -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} - -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - const auto ret = ppc64_utf8_length_from_latin1(input, length); - const size_t consumed = ret.first - input; - - if (consumed == length) { - return ret.second; - } - - const auto scalar = - scalar::latin1::utf8_length_from_latin1(ret.first, length - consumed); - return scalar + ret.second; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::utf16_length_from_utf8(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return utf32::utf8_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return scalar::utf32::utf16_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused size_t implementation::maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept { - return scalar::base64::maximal_binary_length_from_base64(input, length); -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - if (options & base64_url) { - return encode_base64(output, input, length, options); - } else { - return encode_base64(output, input, length, options); - } -} -#endif // SIMDUTF_FEATURE_BASE64 - -#ifdef SIMDUTF_INTERNAL_TESTS -std::vector -implementation::internal_tests() const { - #define entry(proc) \ - TestProcedure { #proc, proc } - return {entry(base64_encoding_translate_6bit_values), - entry(base64_encoding_expand_6bit_fields), - entry(base64_decoding_valid), - entry(base64_decoding_invalid_ignore_errors), - entry(base64url_decoding_invalid_ignore_errors), - entry(base64_decoding_invalid_strict_errors), - entry(base64url_decoding_invalid_strict_errors), - entry(base64_decoding_pack), - entry(base64_compress)}; - #undef entry -} -#endif - -} // namespace ppc64 -} // namespace simdutf - -/* begin file src/simdutf/ppc64/end.h */ -/* end file src/simdutf/ppc64/end.h */ -/* end file src/ppc64/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_RVV -/* begin file src/rvv/implementation.cpp */ -/* begin file src/simdutf/rvv/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "rvv" -// #define SIMDUTF_IMPLEMENTATION rvv - -#if SIMDUTF_CAN_ALWAYS_RUN_RVV -// nothing needed. -#else -SIMDUTF_TARGET_RVV -#endif -/* end file src/simdutf/rvv/begin.h */ -namespace simdutf { -namespace rvv { -namespace { -#ifndef SIMDUTF_RVV_H - #error "rvv.h must be included" -#endif - -} // unnamed namespace -} // namespace rvv -} // namespace simdutf - -// -// Implementation-specific overrides -// -namespace simdutf { -namespace rvv { -/* begin file src/rvv/rvv_helpers.inl.cpp */ -template -simdutf_really_inline static size_t -rvv_utf32_store_utf16_m4(uint16_t *dst, vuint32m4_t utf32, size_t vl, - vbool4_t m4even) { - /* convert [000000000000aaaa|aaaaaabbbbbbbbbb] - * to [110111bbbbbbbbbb|110110aaaaaaaaaa] */ - vuint32m4_t sur = __riscv_vsub_vx_u32m4(utf32, 0x10000, vl); - sur = __riscv_vor_vv_u32m4(__riscv_vsll_vx_u32m4(sur, 16, vl), - __riscv_vsrl_vx_u32m4(sur, 10, vl), vl); - sur = __riscv_vand_vx_u32m4(sur, 0x3FF03FF, vl); - sur = __riscv_vor_vx_u32m4(sur, 0xDC00D800, vl); - /* merge 1 byte utf32 and 2 byte sur */ - vbool8_t m4 = __riscv_vmsgtu_vx_u32m4_b8(utf32, 0xFFFF, vl); - vuint16m4_t utf32_16 = __riscv_vreinterpret_v_u32m4_u16m4( - __riscv_vmerge_vvm_u32m4(utf32, sur, m4, vl)); - /* compress and store */ - vbool4_t mOut = __riscv_vmor_mm_b4( - __riscv_vmsne_vx_u16m4_b4(utf32_16, 0, vl * 2), m4even, vl * 2); - vuint16m4_t vout = __riscv_vcompress_vm_u16m4(utf32_16, mOut, vl * 2); - vl = __riscv_vcpop_m_b4(mOut, vl * 2); - __riscv_vse16_v_u16m4(dst, simdutf_byteflip(vout, vl), vl); - return vl; -}; -/* end file src/rvv/rvv_helpers.inl.cpp */ - -/* begin file src/rvv/rvv_length_from.inl.cpp */ -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t -implementation::count_utf16le(const char16_t *src, size_t len) const noexcept { - return utf32_length_from_utf16le(src, len); -} - -simdutf_warn_unused size_t -implementation::count_utf16be(const char16_t *src, size_t len) const noexcept { - return utf32_length_from_utf16be(src, len); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *src, size_t len) const noexcept { - return utf32_length_from_utf8(src, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *src, size_t len) const noexcept { - return utf32_length_from_utf8(src, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *src, size_t len) const noexcept { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); - count += __riscv_vcpop_m_b1(mask, vl); - } - return count; -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 -template -simdutf_really_inline static size_t -rvv_utf32_length_from_utf16(const char16_t *src, size_t len) { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - v = simdutf_byteflip(v, vl); - vbool2_t notHigh = - __riscv_vmor_mm_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 0xDFFF, vl), - __riscv_vmsltu_vx_u16m8_b2(v, 0xDC00, vl), vl); - count += __riscv_vcpop_m_b2(notHigh, vl); - } - return count; -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *src, size_t len) const noexcept { - return rvv_utf32_length_from_utf16(src, len); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *src, size_t len) const noexcept { - if (supports_zvbb()) - return rvv_utf32_length_from_utf16(src, len); - else - return rvv_utf32_length_from_utf16(src, len); -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *src, size_t len) const noexcept { - size_t count = len; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - count += __riscv_vcpop_m_b1(__riscv_vmslt_vx_i8m8_b1(v, 0, vl), vl); - } - return count; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -template -simdutf_really_inline static size_t -rvv_utf8_length_from_utf16(const char16_t *src, size_t len) { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - v = simdutf_byteflip(v, vl); - vbool2_t m234 = __riscv_vmsgtu_vx_u16m8_b2(v, 0x7F, vl); - vbool2_t m34 = __riscv_vmsgtu_vx_u16m8_b2(v, 0x7FF, vl); - vbool2_t notSur = - __riscv_vmor_mm_b2(__riscv_vmsltu_vx_u16m8_b2(v, 0xD800, vl), - __riscv_vmsgtu_vx_u16m8_b2(v, 0xDFFF, vl), vl); - vbool2_t m3 = __riscv_vmand_mm_b2(m34, notSur, vl); - count += vl + __riscv_vcpop_m_b2(m234, vl) + __riscv_vcpop_m_b2(m3, vl); - } - return count; -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *src, size_t len) const noexcept { - return rvv_utf8_length_from_utf16(src, len); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *src, size_t len) const noexcept { - if (supports_zvbb()) - return rvv_utf8_length_from_utf16(src, len); - else - return rvv_utf8_length_from_utf16(src, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *src, size_t len) const noexcept { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - vbool4_t m234 = __riscv_vmsgtu_vx_u32m8_b4(v, 0x7F, vl); - vbool4_t m34 = __riscv_vmsgtu_vx_u32m8_b4(v, 0x7FF, vl); - vbool4_t m4 = __riscv_vmsgtu_vx_u32m8_b4(v, 0xFFFF, vl); - count += vl + __riscv_vcpop_m_b4(m234, vl) + __riscv_vcpop_m_b4(m34, vl) + - __riscv_vcpop_m_b4(m4, vl); - } - return count; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *src, size_t len) const noexcept { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - vbool1_t m1234 = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl); - vbool1_t m4 = __riscv_vmsgtu_vx_u8m8_b1(__riscv_vreinterpret_u8m8(v), - (uint8_t)0b11101111, vl); - count += __riscv_vcpop_m_b1(m1234, vl) + __riscv_vcpop_m_b1(m4, vl); - } - return count; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *src, size_t len) const noexcept { - size_t count = 0; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - vbool4_t m4 = __riscv_vmsgtu_vx_u32m8_b4(v, 0xFFFF, vl); - count += vl + __riscv_vcpop_m_b4(m4, vl); - } - return count; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* end file src/rvv/rvv_length_from.inl.cpp */ -/* begin file src/rvv/rvv_validate.inl.cpp */ -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *src, size_t len) const noexcept { - size_t vlmax = __riscv_vsetvlmax_e8m8(); - vint8m8_t mask = __riscv_vmv_v_x_i8m8(0, vlmax); - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - mask = __riscv_vor_vv_i8m8_tu(mask, mask, v, vl); - } - return __riscv_vfirst_m_b1(__riscv_vmslt_vx_i8m8_b1(mask, 0, vlmax), vlmax) < - 0; -} - -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *src, size_t len) const noexcept { - const char *beg = src; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e8m8(len); - vint8m8_t v = __riscv_vle8_v_i8m8((int8_t *)src, vl); - long idx = __riscv_vfirst_m_b1(__riscv_vmslt_vx_i8m8_b1(v, 0, vl), vl); - if (idx >= 0) - return result(error_code::TOO_LARGE, src - beg + idx); - } - return result(error_code::SUCCESS, src - beg); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* Returns a close estimation of the number of valid UTF-8 bytes up to the - * first invalid one, but never overestimating. */ -simdutf_really_inline static size_t rvv_count_valid_utf8(const char *src, - size_t len) { - const char *beg = src; - if (len < 32) - return 0; - - /* validate first three bytes */ - { - size_t idx = 3; - while (idx < len && (uint8_t(src[idx]) >> 6) == 0b10) - ++idx; - if (idx > 3 + 3 || !scalar::utf8::validate(src, idx)) - return 0; - } - - static const uint64_t err1m[] = {0x0202020202020202, 0x4915012180808080}; - static const uint64_t err2m[] = {0xCBCBCB8B8383A3E7, 0xCBCBDBCBCBCBCBCB}; - static const uint64_t err3m[] = {0x0101010101010101, 0X01010101BABAAEE6}; - - const vuint8m1_t err1tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err1m, 2)); - const vuint8m1_t err2tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err2m, 2)); - const vuint8m1_t err3tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); - - size_t tail = 3; - size_t n = len - tail; - - for (size_t vl; n > 0; n -= vl, src += vl) { - vl = __riscv_vsetvl_e8m4(n); - vuint8m4_t v0 = __riscv_vle8_v_u8m4((uint8_t const *)src, vl); - - uint8_t next0 = src[vl + 0]; - uint8_t next1 = src[vl + 1]; - uint8_t next2 = src[vl + 2]; - - /* fast path: ASCII */ - if (__riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u8m4_b2(v0, 0b01111111, vl), vl) < - 0 && - (next0 | next1 | next2) < 0b10000000) - continue; - - /* see "Validating UTF-8 In Less Than One Instruction Per Byte" - * https://arxiv.org/abs/2010.03090 */ - vuint8m4_t v1 = __riscv_vslide1down_vx_u8m4(v0, next0, vl); - vuint8m4_t v2 = __riscv_vslide1down_vx_u8m4(v1, next1, vl); - vuint8m4_t v3 = __riscv_vslide1down_vx_u8m4(v2, next2, vl); - - vuint8m4_t idx2 = __riscv_vand_vx_u8m4(v2, 0xF, vl); - vuint8m4_t idx1 = __riscv_vsrl_vx_u8m4(v2, 4, vl); - vuint8m4_t idx3 = __riscv_vsrl_vx_u8m4(v3, 4, vl); - - vuint8m4_t err1 = simdutf_vrgather_u8m1x4(err1tbl, idx1); - vuint8m4_t err2 = simdutf_vrgather_u8m1x4(err2tbl, idx2); - vuint8m4_t err3 = simdutf_vrgather_u8m1x4(err3tbl, idx3); - vint8m4_t errs = __riscv_vreinterpret_v_u8m4_i8m4( - __riscv_vand_vv_u8m4(__riscv_vand_vv_u8m4(err1, err2, vl), err3, vl)); - - vbool2_t is_3 = __riscv_vmsgtu_vx_u8m4_b2(v1, 0b11100000 - 1, vl); - vbool2_t is_4 = __riscv_vmsgtu_vx_u8m4_b2(v0, 0b11110000 - 1, vl); - vbool2_t is_34 = __riscv_vmor_mm_b2(is_3, is_4, vl); - vbool2_t err34 = - __riscv_vmxor_mm_b2(is_34, __riscv_vmslt_vx_i8m4_b2(errs, 0, vl), vl); - vbool2_t errm = - __riscv_vmor_mm_b2(__riscv_vmsgt_vx_i8m4_b2(errs, 0, vl), err34, vl); - if (__riscv_vfirst_m_b2(errm, vl) >= 0) - break; - } - - /* we need to validate the last character */ - while (tail < len && (uint8_t(src[0]) >> 6) == 0b10) - --src, ++tail; - return src - beg; -} - -simdutf_warn_unused bool -implementation::validate_utf8(const char *src, size_t len) const noexcept { - size_t count = rvv_count_valid_utf8(src, len); - return scalar::utf8::validate(src + count, len - count); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *src, size_t len) const noexcept { - size_t count = rvv_count_valid_utf8(src, len); - result res = scalar::utf8::validate_with_errors(src + count, len - count); - return result(res.error, count + res.count); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -template -simdutf_really_inline static result -rvv_validate_utf16_with_errors(const char16_t *src, size_t len) { - const char16_t *beg = src; - uint16_t last = 0; - for (size_t vl; len > 0; - len -= vl, src += vl, last = simdutf_byteflip(src[-1])) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v1 = __riscv_vle16_v_u16m8((const uint16_t *)src, vl); - v1 = simdutf_byteflip(v1, vl); - vuint16m8_t v0 = __riscv_vslide1up_vx_u16m8(v1, last, vl); - - vbool2_t surhi = __riscv_vmseq_vx_u16m8_b2( - __riscv_vand_vx_u16m8(v0, 0xFC00, vl), 0xD800, vl); - vbool2_t surlo = __riscv_vmseq_vx_u16m8_b2( - __riscv_vand_vx_u16m8(v1, 0xFC00, vl), 0xDC00, vl); - - long idx = __riscv_vfirst_m_b2(__riscv_vmxor_mm_b2(surhi, surlo, vl), vl); - if (idx >= 0) { - last = idx > 0 ? simdutf_byteflip(src[idx - 1]) : last; - return result(error_code::SURROGATE, - src - beg + idx - (last - 0xD800u < 0x400u)); - break; - } - } - if (last - 0xD800u < 0x400u) { - return result(error_code::SURROGATE, - src - beg - 1); /* end on high surrogate */ - } else { - return result(error_code::SUCCESS, src - beg); - } -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *src, - size_t len) const noexcept { - return rvv_validate_utf16_with_errors(src, len) - .error == error_code::SUCCESS; -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *src, - size_t len) const noexcept { - return validate_utf16be_with_errors(src, len).error == error_code::SUCCESS; -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *src, size_t len) const noexcept { - return rvv_validate_utf16_with_errors(src, len); -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *src, size_t len) const noexcept { - if (supports_zvbb()) - return rvv_validate_utf16_with_errors(src, len); - else - return rvv_validate_utf16_with_errors(src, len); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *src, size_t len) const noexcept { - size_t vlmax = __riscv_vsetvlmax_e32m8(); - vuint32m8_t max = __riscv_vmv_v_x_u32m8(0x10FFFF, vlmax); - vuint32m8_t maxOff = __riscv_vmv_v_x_u32m8(0xFFFFF7FF, vlmax); - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - vuint32m8_t off = __riscv_vadd_vx_u32m8(v, 0xFFFF2000, vl); - max = __riscv_vmaxu_vv_u32m8_tu(max, max, v, vl); - maxOff = __riscv_vmaxu_vv_u32m8_tu(maxOff, maxOff, off, vl); - } - return __riscv_vfirst_m_b4( - __riscv_vmor_mm_b4( - __riscv_vmsne_vx_u32m8_b4(max, 0x10FFFF, vlmax), - __riscv_vmsne_vx_u32m8_b4(maxOff, 0xFFFFF7FF, vlmax), vlmax), - vlmax) < 0; -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *src, size_t len) const noexcept { - const char32_t *beg = src; - for (size_t vl; len > 0; len -= vl, src += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - vuint32m8_t off = __riscv_vadd_vx_u32m8(v, 0xFFFF2000, vl); - long idx1 = - __riscv_vfirst_m_b4(__riscv_vmsgtu_vx_u32m8_b4(v, 0x10FFFF, vl), vl); - long idx2 = __riscv_vfirst_m_b4( - __riscv_vmsgtu_vx_u32m8_b4(off, 0xFFFFF7FF, vl), vl); - if (idx1 >= 0 && idx2 >= 0) { - if (idx1 <= idx2) { - return result(error_code::TOO_LARGE, src - beg + idx1); - } else { - return result(error_code::SURROGATE, src - beg + idx2); - } - } - if (idx1 >= 0) { - return result(error_code::TOO_LARGE, src - beg + idx1); - } - if (idx2 >= 0) { - return result(error_code::SURROGATE, src - beg + idx2); - } - } - return result(error_code::SUCCESS, src - beg); -} -#endif // SIMDUTF_FEATURE_UTF32 -/* end file src/rvv/rvv_validate.inl.cpp */ - -/* begin file src/rvv/rvv_latin1_to.inl.cpp */ -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *src, size_t len, char *dst) const noexcept { - char *beg = dst; - for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { - vl = __riscv_vsetvl_e8m2(len); - vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); - vbool4_t nascii = - __riscv_vmslt_vx_i8m2_b4(__riscv_vreinterpret_v_u8m2_i8m2(v1), 0, vl); - size_t cnt = __riscv_vcpop_m_b4(nascii, vl); - vlOut = vl + cnt; - if (cnt == 0) { - __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); - continue; - } - - vuint8m2_t v0 = - __riscv_vor_vx_u8m2(__riscv_vsrl_vx_u8m2(v1, 6, vl), 0b11000000, vl); - v1 = __riscv_vand_vx_u8m2_mu(nascii, v1, v1, 0b10111111, vl); - - vuint8m4_t wide = - __riscv_vreinterpret_v_u16m4_u8m4(__riscv_vwmaccu_vx_u16m4( - __riscv_vwaddu_vv_u16m4(v0, v1, vl), 0xFF, v1, vl)); - vbool2_t mask = __riscv_vmsgtu_vx_u8m4_b2( - __riscv_vsub_vx_u8m4(wide, 0b11000000, vl * 2), 1, vl * 2); - vuint8m4_t comp = __riscv_vcompress_vm_u8m4(wide, mask, vl * 2); - - __riscv_vse8_v_u8m4((uint8_t *)dst, comp, vlOut); - } - return dst - beg; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *src, size_t len, char16_t *dst) const noexcept { - char16_t *beg = dst; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e8m4(len); - vuint8m4_t v = __riscv_vle8_v_u8m4((uint8_t *)src, vl); - __riscv_vse16_v_u16m8((uint16_t *)dst, __riscv_vzext_vf2_u16m8(v, vl), vl); - } - return dst - beg; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *src, size_t len, char16_t *dst) const noexcept { - char16_t *beg = dst; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e8m4(len); - vuint8m4_t v = __riscv_vle8_v_u8m4((uint8_t *)src, vl); - __riscv_vse16_v_u16m8( - (uint16_t *)dst, - __riscv_vsll_vx_u16m8(__riscv_vzext_vf2_u16m8(v, vl), 8, vl), vl); - } - return dst - beg; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *src, size_t len, char32_t *dst) const noexcept { - char32_t *beg = dst; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e8m2(len); - vuint8m2_t v = __riscv_vle8_v_u8m2((uint8_t *)src, vl); - __riscv_vse32_v_u32m8((uint32_t *)dst, __riscv_vzext_vf4_u32m8(v, vl), vl); - } - return dst - beg; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* end file src/rvv/rvv_latin1_to.inl.cpp */ -/* begin file src/rvv/rvv_utf16_to.inl.cpp */ -#if SIMDUTF_FEATURE_UTF16 -template -simdutf_really_inline static result -rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) { - const char16_t *const beg = src; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - v = simdutf_byteflip(v, vl); - long idx = __riscv_vfirst_m_b2(__riscv_vmsgtu_vx_u16m8_b2(v, 255, vl), vl); - if (idx >= 0) - return result(error_code::TOO_LARGE, src - beg + idx); - __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m4(v, vl), vl); - } - return result(error_code::SUCCESS, src - beg); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf16le_to_latin1_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf16be_to_latin1_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *src, size_t len, char *dst) const noexcept { - return rvv_utf16_to_latin1_with_errors(src, len, dst); -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *src, size_t len, char *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf16_to_latin1_with_errors(src, len, - dst); - else - return rvv_utf16_to_latin1_with_errors(src, len, dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *src, size_t len, char *dst) const noexcept { - const char16_t *const beg = src; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m4(v, vl), vl); - } - return src - beg; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *src, size_t len, char *dst) const noexcept { - const char16_t *const beg = src; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - __riscv_vse8_v_u8m4((uint8_t *)dst, __riscv_vnsrl_wx_u8m4(v, 8, vl), vl); - } - return src - beg; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -template -simdutf_really_inline static result -rvv_utf16_to_utf8_with_errors(const char16_t *src, size_t len, char *dst) { - size_t n = len; - const char16_t *srcBeg = src; - const char *dstBeg = dst; - size_t vl8m4 = __riscv_vsetvlmax_e8m4(); - vbool2_t m4mulp2 = __riscv_vmseq_vx_u8m4_b2( - __riscv_vand_vx_u8m4(__riscv_vid_v_u8m4(vl8m4), 3, vl8m4), 2, vl8m4); - - for (size_t vl, vlOut; n > 0;) { - vl = __riscv_vsetvl_e16m2(n); - - vuint16m2_t v = __riscv_vle16_v_u16m2((uint16_t const *)src, vl); - v = simdutf_byteflip(v, vl); - vbool8_t m234 = __riscv_vmsgtu_vx_u16m2_b8(v, 0x80 - 1, vl); - - if (__riscv_vfirst_m_b8(m234, vl) < 0) { /* 1 byte utf8 */ - vlOut = vl; - __riscv_vse8_v_u8m1((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m1(v, vlOut), - vlOut); - n -= vl, src += vl, dst += vlOut; - continue; - } - - vbool8_t m34 = __riscv_vmsgtu_vx_u16m2_b8(v, 0x800 - 1, vl); - - if (__riscv_vfirst_m_b8(m34, vl) < 0) { /* 1/2 byte utf8 */ - /* 0: [ aaa|aabbbbbb] - * 1: [aabbbbbb| ] vsll 8 - * 2: [ | aaaaa] vsrl 6 - * 3: [00111111|00011111] - * 4: [ bbbbbb|000aaaaa] (1|2)&3 - * 5: [11000000|11000000] - * 6: [10bbbbbb|110aaaaa] 4|5 */ - vuint16m2_t twoByte = __riscv_vand_vx_u16m2( - __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(v, 8, vl), - __riscv_vsrl_vx_u16m2(v, 6, vl), vl), - 0b0011111100011111, vl); - vuint16m2_t vout16 = - __riscv_vor_vx_u16m2_mu(m234, v, twoByte, 0b1000000011000000, vl); - vuint8m2_t vout = __riscv_vreinterpret_v_u16m2_u8m2(vout16); - - /* Every high byte that is zero should be compressed - * low bytes should never be compressed, so we set them - * to all ones, and then create a non-zero bytes mask */ - vbool4_t mcomp = - __riscv_vmsne_vx_u8m2_b4(__riscv_vreinterpret_v_u16m2_u8m2( - __riscv_vor_vx_u16m2(vout16, 0xFF, vl)), - 0, vl * 2); - vlOut = __riscv_vcpop_m_b4(mcomp, vl * 2); - - vout = __riscv_vcompress_vm_u8m2(vout, mcomp, vl * 2); - __riscv_vse8_v_u8m2((uint8_t *)dst, vout, vlOut); - - n -= vl, src += vl, dst += vlOut; - continue; - } - - vbool8_t sur = __riscv_vmseq_vx_u16m2_b8( - __riscv_vand_vx_u16m2(v, 0xF800, vl), 0xD800, vl); - long first = __riscv_vfirst_m_b8(sur, vl); - size_t tail = vl - first; - vl = first < 0 ? vl : first; - - if (vl > 0) { /* 1/2/3 byte utf8 */ - /* in: [aaaabbbb|bbcccccc] - * v1: [0bcccccc| ] vsll 8 - * v1: [10cccccc| ] vsll 8 & 0b00111111 | 0b10000000 - * v2: [ |110bbbbb] vsrl 6 & 0b00111111 | 0b11000000 - * v2: [ |10bbbbbb] vsrl 6 & 0b00111111 | 0b10000000 - * v3: [ |1110aaaa] vsrl 12 | 0b11100000 - * 1: [00000000|0bcccccc|00000000|00000000] => [0bcccccc] - * 2: [00000000|10cccccc|110bbbbb|00000000] => [110bbbbb] [10cccccc] - * 3: [00000000|10cccccc|10bbbbbb|1110aaaa] => [1110aaaa] [10bbbbbb] - * [10cccccc] - */ - vuint16m2_t v1, v2, v3, v12; - v1 = __riscv_vor_vx_u16m2_mu( - m234, v, __riscv_vand_vx_u16m2(v, 0b00111111, vl), 0b10000000, vl); - v1 = __riscv_vsll_vx_u16m2(v1, 8, vl); - - v2 = __riscv_vor_vx_u16m2( - __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 6, vl), 0b00111111, - vl), - 0b10000000, vl); - v2 = __riscv_vor_vx_u16m2_mu(__riscv_vmnot_m_b8(m34, vl), v2, v2, - 0b01000000, vl); - v3 = __riscv_vor_vx_u16m2(__riscv_vsrl_vx_u16m2(v, 12, vl), 0b11100000, - vl); - v12 = __riscv_vor_vv_u16m2_mu(m234, v1, v1, v2, vl); - - vuint32m4_t w12 = __riscv_vwmulu_vx_u32m4(v12, 1 << 8, vl); - vuint32m4_t w123 = __riscv_vwaddu_wv_u32m4_mu(m34, w12, w12, v3, vl); - vuint8m4_t vout = __riscv_vreinterpret_v_u32m4_u8m4(w123); - - vbool2_t mcomp = __riscv_vmor_mm_b2( - m4mulp2, __riscv_vmsne_vx_u8m4_b2(vout, 0, vl * 4), vl * 4); - vlOut = __riscv_vcpop_m_b2(mcomp, vl * 4); - - vout = __riscv_vcompress_vm_u8m4(vout, mcomp, vl * 4); - __riscv_vse8_v_u8m4((uint8_t *)dst, vout, vlOut); - - n -= vl, src += vl, dst += vlOut; - } - - if (tail) - while (n) { - uint16_t word = simdutf_byteflip(src[0]); - if ((word & 0xFF80) == 0) { - break; - } else if ((word & 0xF800) == 0) { - break; - } else if ((word & 0xF800) != 0xD800) { - break; - } else { - // must be a surrogate pair - if (n <= 1) - return result(error_code::SURROGATE, src - srcBeg); - uint16_t diff = word - 0xD800; - if (diff > 0x3FF) - return result(error_code::SURROGATE, src - srcBeg); - uint16_t diff2 = simdutf_byteflip(src[1]) - 0xDC00; - if (diff2 > 0x3FF) - return result(error_code::SURROGATE, src - srcBeg); - - uint32_t value = ((diff + 0x40) << 10) + diff2; - - // will generate four UTF-8 bytes - // we have 0b11110XXX 0b10XXXXXX 0b10XXXXXX 0b10XXXXXX - *dst++ = (char)((value >> 18) | 0b11110000); - *dst++ = (char)(((value >> 12) & 0b111111) | 0b10000000); - *dst++ = (char)(((value >> 6) & 0b111111) | 0b10000000); - *dst++ = (char)((value & 0b111111) | 0b10000000); - src += 2; - n -= 2; - } - } - } - - return result(error_code::SUCCESS, dst - dstBeg); -} - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf16le_to_utf8_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf16be_to_utf8_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *src, size_t len, char *dst) const noexcept { - return rvv_utf16_to_utf8_with_errors(src, len, dst); -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *src, size_t len, char *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf16_to_utf8_with_errors(src, len, dst); - else - return rvv_utf16_to_utf8_with_errors(src, len, dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *src, size_t len, char *dst) const noexcept { - return convert_utf16le_to_utf8(src, len, dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *src, size_t len, char *dst) const noexcept { - return convert_utf16be_to_utf8(src, len, dst); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -template -simdutf_really_inline static result -rvv_utf16_to_utf32_with_errors(const char16_t *src, size_t len, char32_t *dst) { - const char16_t *const srcBeg = src; - char32_t *const dstBeg = dst; - - constexpr const uint16_t ANY_SURROGATE_MASK = 0xf800; - constexpr const uint16_t ANY_SURROGATE_VALUE = 0xd800; - constexpr const uint16_t LO_SURROGATE_MASK = 0xfc00; - constexpr const uint16_t LO_SURROGATE_VALUE = 0xdc00; - constexpr const uint16_t HI_SURROGATE_MASK = 0xfc00; - constexpr const uint16_t HI_SURROGATE_VALUE = 0xd800; - - uint16_t last = 0; - while (len > 0) { - size_t vl = __riscv_vsetvl_e16m2(len); - vuint16m2_t v0 = __riscv_vle16_v_u16m2((uint16_t const *)src, vl); - v0 = simdutf_byteflip(v0, vl); - - { // check fast-path - const vuint16m2_t v = __riscv_vand_vx_u16m2(v0, ANY_SURROGATE_MASK, vl); - const vbool8_t any_surrogate = - __riscv_vmseq_vx_u16m2_b8(v, ANY_SURROGATE_VALUE, vl); - if (__riscv_vfirst_m_b8(any_surrogate, vl) < 0) { - /* no surrogates */ - __riscv_vse32_v_u32m4((uint32_t *)dst, __riscv_vzext_vf2_u32m4(v0, vl), - vl); - len -= vl; - src += vl; - dst += vl; - continue; - } - } - - if ((simdutf_byteflip(src[0]) & LO_SURROGATE_MASK) == - LO_SURROGATE_VALUE) { - return result(error_code::SURROGATE, src - srcBeg); - } - - // decode surrogates - vuint16m2_t v1 = __riscv_vslide1down_vx_u16m2(v0, 0, vl); - vl = __riscv_vsetvl_e16m2(vl - 1); - if (vl == 0) { - return result(error_code::SURROGATE, src - srcBeg); - } - - const vbool8_t surhi = __riscv_vmseq_vx_u16m2_b8( - __riscv_vand_vx_u16m2(v0, HI_SURROGATE_MASK, vl), HI_SURROGATE_VALUE, - vl); - const vbool8_t surlo = __riscv_vmseq_vx_u16m2_b8( - __riscv_vand_vx_u16m2(v1, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, - vl); - - // compress everything but lo surrogates - const vbool8_t compress = __riscv_vmsne_vx_u16m2_b8( - __riscv_vand_vx_u16m2(v0, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, - vl); - - { - const vbool8_t diff = __riscv_vmxor_mm_b8(surhi, surlo, vl); - const long idx = __riscv_vfirst_m_b8(diff, vl); - if (idx >= 0) { - uint16_t word = simdutf_byteflip(src[idx]); - if (word < 0xD800 || word > 0xDBFF) { - return result(error_code::SURROGATE, src - srcBeg + idx + 1); - } - return result(error_code::SURROGATE, src - srcBeg + idx); - } - } - - last = simdutf_byteflip(src[vl]); - vuint32m4_t utf32 = __riscv_vzext_vf2_u32m4(v0, vl); - - // v0 = 110110yyyyyyyyyy (0xd800 + yyyyyyyyyy) --- hi surrogate - // v1 = 110111xxxxxxxxxx (0xdc00 + xxxxxxxxxx) --- lo surrogate - - // t0 = u16( 0000_00yy_yyyy_yyyy) - const vuint32m4_t t0 = - __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v0, 0x03ff, vl), vl); - // t1 = u32(0000_0000_0000_yyyy_yyyy_yy00_0000_0000) - const vuint32m4_t t1 = __riscv_vsll_vx_u32m4(t0, 10, vl); - - // t2 = u32(0000_0000_0000_0000_0000_00xx_xxxx_xxxx) - const vuint32m4_t t2 = - __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v1, 0x03ff, vl), vl); - - // t3 = u32(0000_0000_0000_yyyy_yyyy_yyxx_xxxx_xxxx) - const vuint32m4_t t3 = __riscv_vor_vv_u32m4(t1, t2, vl); - - // t4 = utf32 from surrogate pairs - const vuint32m4_t t4 = __riscv_vadd_vx_u32m4(t3, 0x10000, vl); - - const vuint32m4_t result = __riscv_vmerge_vvm_u32m4(utf32, t4, surhi, vl); - - const vuint32m4_t comp = __riscv_vcompress_vm_u32m4(result, compress, vl); - const size_t vlOut = __riscv_vcpop_m_b8(compress, vl); - __riscv_vse32_v_u32m4((uint32_t *)dst, comp, vlOut); - - len -= vl; - src += vl; - dst += vlOut; - - if ((last & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) { - // last item is lo surrogate and got already consumed - len -= 1; - src += 1; - } - } - - return result(error_code::SUCCESS, dst - dstBeg); -} - -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - result res = convert_utf16le_to_utf32_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - result res = convert_utf16be_to_utf32_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - return rvv_utf16_to_utf32_with_errors(src, len, dst); -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf16_to_utf32_with_errors(src, len, - dst); - else - return rvv_utf16_to_utf32_with_errors(src, len, dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - return convert_utf16le_to_utf32(src, len, dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *src, size_t len, char32_t *dst) const noexcept { - return convert_utf16be_to_utf32(src, len, dst); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* end file src/rvv/rvv_utf16_to.inl.cpp */ - -/* begin file src/rvv/rvv_utf32_to.inl.cpp */ -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf32_to_latin1_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *src, size_t len, char *dst) const noexcept { - const char32_t *const beg = src; - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e32m8(len); - vuint32m8_t v = __riscv_vle32_v_u32m8((uint32_t *)src, vl); - long idx = __riscv_vfirst_m_b4(__riscv_vmsgtu_vx_u32m8_b4(v, 255, vl), vl); - if (idx >= 0) - return result(error_code::TOO_LARGE, src - beg + idx); - /* We don't use vcompress here, because its performance varies widely on - * current platforms. This might be worth reconsidering once there is more - * hardware available. */ - __riscv_vse8_v_u8m2( - (uint8_t *)dst, - __riscv_vncvt_x_x_w_u8m2(__riscv_vncvt_x_x_w_u16m4(v, vl), vl), vl); - } - return result(error_code::SUCCESS, src - beg); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *src, size_t len, char *dst) const noexcept { - return convert_utf32_to_latin1(src, len, dst); -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *src, size_t len, char *dst) const noexcept { - size_t n = len; - const char32_t *srcBeg = src; - const char *dstBeg = dst; - size_t vl8m4 = __riscv_vsetvlmax_e8m4(); - vbool2_t m4mulp2 = __riscv_vmseq_vx_u8m4_b2( - __riscv_vand_vx_u8m4(__riscv_vid_v_u8m4(vl8m4), 3, vl8m4), 2, vl8m4); - - for (size_t vl, vlOut; n > 0;) { - vl = __riscv_vsetvl_e32m4(n); - - vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t const *)src, vl); - vbool8_t m234 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x80 - 1, vl); - vuint16m2_t vn = __riscv_vncvt_x_x_w_u16m2(v, vl); - - if (__riscv_vfirst_m_b8(m234, vl) < 0) { /* 1 byte utf8 */ - vlOut = vl; - __riscv_vse8_v_u8m1((uint8_t *)dst, __riscv_vncvt_x_x_w_u8m1(vn, vlOut), - vlOut); - n -= vl, src += vl, dst += vlOut; - continue; - } - - vbool8_t m34 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x800 - 1, vl); - - if (__riscv_vfirst_m_b8(m34, vl) < 0) { /* 1/2 byte utf8 */ - /* 0: [ aaa|aabbbbbb] - * 1: [aabbbbbb| ] vsll 8 - * 2: [ | aaaaa] vsrl 6 - * 3: [00111111|00111111] - * 4: [ bbbbbb|000aaaaa] (1|2)&3 - * 5: [10000000|11000000] - * 6: [10bbbbbb|110aaaaa] 4|5 */ - vuint16m2_t twoByte = __riscv_vand_vx_u16m2( - __riscv_vor_vv_u16m2(__riscv_vsll_vx_u16m2(vn, 8, vl), - __riscv_vsrl_vx_u16m2(vn, 6, vl), vl), - 0b0011111100111111, vl); - vuint16m2_t vout16 = - __riscv_vor_vx_u16m2_mu(m234, vn, twoByte, 0b1000000011000000, vl); - vuint8m2_t vout = __riscv_vreinterpret_v_u16m2_u8m2(vout16); - - /* Every high byte that is zero should be compressed - * low bytes should never be compressed, so we set them - * to all ones, and then create a non-zero bytes mask */ - vbool4_t mcomp = - __riscv_vmsne_vx_u8m2_b4(__riscv_vreinterpret_v_u16m2_u8m2( - __riscv_vor_vx_u16m2(vout16, 0xFF, vl)), - 0, vl * 2); - vlOut = __riscv_vcpop_m_b4(mcomp, vl * 2); - - vout = __riscv_vcompress_vm_u8m2(vout, mcomp, vl * 2); - __riscv_vse8_v_u8m2((uint8_t *)dst, vout, vlOut); - - n -= vl, src += vl, dst += vlOut; - continue; - } - const long idx1 = - __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0x10FFFF, vl), vl); - vbool8_t sur = __riscv_vmseq_vx_u32m4_b8( - __riscv_vand_vx_u32m4(v, 0xFFFFF800, vl), 0xD800, vl); - const long idx2 = __riscv_vfirst_m_b8(sur, vl); - if (idx1 >= 0 || idx2 >= 0) { - if (static_cast(idx1) <= - static_cast(idx2)) { - return result(error_code::TOO_LARGE, src - srcBeg + idx1); - } else { - return result(error_code::SURROGATE, src - srcBeg + idx2); - } - } - - vbool8_t m4 = __riscv_vmsgtu_vx_u32m4_b8(v, 0x10000 - 1, vl); - long first = __riscv_vfirst_m_b8(m4, vl); - size_t tail = vl - first; - vl = first < 0 ? vl : first; - - if (vl > 0) { /* 1/2/3 byte utf8 */ - /* vn: [aaaabbbb|bbcccccc] - * v1: [0bcccccc| ] vsll 8 - * v1: [10cccccc| ] vsll 8 & 0b00111111 | 0b10000000 - * v2: [ |110bbbbb] vsrl 6 & 0b00111111 | 0b11000000 - * v2: [ |10bbbbbb] vsrl 6 & 0b00111111 | 0b10000000 - * v3: [ |1110aaaa] vsrl 12 | 0b11100000 - * 1: [00000000|0bcccccc|00000000|00000000] => [0bcccccc] - * 2: [00000000|10cccccc|110bbbbb|00000000] => [110bbbbb] [10cccccc] - * 3: [00000000|10cccccc|10bbbbbb|1110aaaa] => [1110aaaa] [10bbbbbb] - * [10cccccc] - */ - vuint16m2_t v1, v2, v3, v12; - v1 = __riscv_vor_vx_u16m2_mu( - m234, vn, __riscv_vand_vx_u16m2(vn, 0b00111111, vl), 0b10000000, vl); - v1 = __riscv_vsll_vx_u16m2(v1, 8, vl); - - v2 = __riscv_vor_vx_u16m2( - __riscv_vand_vx_u16m2(__riscv_vsrl_vx_u16m2(vn, 6, vl), 0b00111111, - vl), - 0b10000000, vl); - v2 = __riscv_vor_vx_u16m2_mu(__riscv_vmnot_m_b8(m34, vl), v2, v2, - 0b01000000, vl); - v3 = __riscv_vor_vx_u16m2(__riscv_vsrl_vx_u16m2(vn, 12, vl), 0b11100000, - vl); - v12 = __riscv_vor_vv_u16m2_mu(m234, v1, v1, v2, vl); - - vuint32m4_t w12 = __riscv_vwmulu_vx_u32m4(v12, 1 << 8, vl); - vuint32m4_t w123 = __riscv_vwaddu_wv_u32m4_mu(m34, w12, w12, v3, vl); - vuint8m4_t vout = __riscv_vreinterpret_v_u32m4_u8m4(w123); - - vbool2_t mcomp = __riscv_vmor_mm_b2( - m4mulp2, __riscv_vmsne_vx_u8m4_b2(vout, 0, vl * 4), vl * 4); - vlOut = __riscv_vcpop_m_b2(mcomp, vl * 4); - - vout = __riscv_vcompress_vm_u8m4(vout, mcomp, vl * 4); - __riscv_vse8_v_u8m4((uint8_t *)dst, vout, vlOut); - - n -= vl, src += vl, dst += vlOut; - } - - if (tail) - while (n) { - uint32_t word = src[0]; - if (word < 0x10000) - break; - if (word > 0x10FFFF) - return result(error_code::TOO_LARGE, src - srcBeg); - *dst++ = (uint8_t)((word >> 18) | 0b11110000); - *dst++ = (uint8_t)(((word >> 12) & 0b111111) | 0b10000000); - *dst++ = (uint8_t)(((word >> 6) & 0b111111) | 0b10000000); - *dst++ = (uint8_t)((word & 0b111111) | 0b10000000); - ++src; - --n; - } - } - - return result(error_code::SUCCESS, dst - dstBeg); -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *src, size_t len, char *dst) const noexcept { - result res = convert_utf32_to_utf8_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *src, size_t len, char *dst) const noexcept { - return convert_utf32_to_utf8(src, len, dst); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -template -simdutf_really_inline static result -rvv_convert_utf32_to_utf16_with_errors(const char32_t *src, size_t len, - char16_t *dst) { - size_t vl8m2 = __riscv_vsetvlmax_e8m2(); - vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( - __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); - const char16_t *dstBeg = dst; - const char32_t *srcBeg = src; - for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { - vl = __riscv_vsetvl_e32m4(len); - vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t *)src, vl); - vuint32m4_t off = __riscv_vadd_vx_u32m4(v, 0xFFFF2000, vl); - const long idx1 = - __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0x10FFFF, vl), vl); - const long idx2 = __riscv_vfirst_m_b8( - __riscv_vmsgtu_vx_u32m4_b8(off, 0xFFFFF7FF, vl), vl); - if (idx1 >= 0 || idx2 >= 0) { - if (static_cast(idx1) <= - static_cast(idx2)) { - return result(error_code::TOO_LARGE, src - srcBeg + idx1); - } else { - return result(error_code::SURROGATE, src - srcBeg + idx2); - } - } - const long idx = - __riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0xFFFF, vl), vl); - if (idx < 0) { - vlOut = vl; - vuint16m2_t n = - simdutf_byteflip(__riscv_vncvt_x_x_w_u16m2(v, vlOut), vlOut); - __riscv_vse16_v_u16m2((uint16_t *)dst, n, vlOut); - continue; - } - vlOut = rvv_utf32_store_utf16_m4((uint16_t *)dst, v, vl, m4even); - } - return result(error_code::SUCCESS, dst - dstBeg); -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - result res = convert_utf32_to_utf16le_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - result res = convert_utf32_to_utf16be_with_errors(src, len, dst); - return res.error == error_code::SUCCESS ? res.count : 0; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - return rvv_convert_utf32_to_utf16_with_errors( - src, len, dst); -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_convert_utf32_to_utf16_with_errors( - src, len, dst); - else - return rvv_convert_utf32_to_utf16_with_errors(src, len, - dst); -} - -template -simdutf_really_inline static size_t -rvv_convert_valid_utf32_to_utf16(const char32_t *src, size_t len, - char16_t *dst) { - size_t vl8m2 = __riscv_vsetvlmax_e8m2(); - vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( - __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); - char16_t *dstBeg = dst; - for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut) { - vl = __riscv_vsetvl_e32m4(len); - vuint32m4_t v = __riscv_vle32_v_u32m4((uint32_t *)src, vl); - if (__riscv_vfirst_m_b8(__riscv_vmsgtu_vx_u32m4_b8(v, 0xFFFF, vl), vl) < - 0) { - vlOut = vl; - vuint16m2_t n = - simdutf_byteflip(__riscv_vncvt_x_x_w_u16m2(v, vlOut), vlOut); - __riscv_vse16_v_u16m2((uint16_t *)dst, n, vlOut); - continue; - } - vlOut = rvv_utf32_store_utf16_m4((uint16_t *)dst, v, vl, m4even); - } - return dst - dstBeg; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - return rvv_convert_valid_utf32_to_utf16(src, len, - dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *src, size_t len, char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_convert_valid_utf32_to_utf16(src, len, - dst); - else - return rvv_convert_valid_utf32_to_utf16(src, len, dst); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* end file src/rvv/rvv_utf32_to.inl.cpp */ -/* begin file src/rvv/rvv_utf8_to.inl.cpp */ -#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) -template -simdutf_really_inline static size_t rvv_utf8_to_common(char const *src, - size_t len, Tdst *dst) { - static_assert(std::is_same() || - std::is_same(), - "invalid type"); - constexpr bool is16 = std::is_same(); - constexpr endianness endian = - bflip == simdutf_ByteFlip::NONE ? endianness::LITTLE : endianness::BIG; - const auto scalar = [](char const *in, size_t count, Tdst *out) { - return is16 ? scalar::utf8_to_utf16::convert(in, count, - (char16_t *)out) - : scalar::utf8_to_utf32::convert(in, count, (char32_t *)out); - }; - - if (len < 32) - return scalar(src, len, dst); - - /* validate first three bytes */ - if (validate) { - size_t idx = 3; - while (idx < len && (uint8_t(src[idx]) >> 6) == 0b10) - ++idx; - if (idx > 3 + 3 || !scalar::utf8::validate(src, idx)) - return 0; - } - - size_t tail = 3; - size_t n = len - tail; - Tdst *beg = dst; - - static const uint64_t err1m[] = {0x0202020202020202, 0x4915012180808080}; - static const uint64_t err2m[] = {0xCBCBCB8B8383A3E7, 0xCBCBDBCBCBCBCBCB}; - static const uint64_t err3m[] = {0x0101010101010101, 0X01010101BABAAEE6}; - - const vuint8m1_t err1tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err1m, 2)); - const vuint8m1_t err2tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err2m, 2)); - const vuint8m1_t err3tbl = - __riscv_vreinterpret_v_u64m1_u8m1(__riscv_vle64_v_u64m1(err3m, 2)); - - size_t vl8m1 = __riscv_vsetvlmax_e8m1(); - size_t vl8m2 = __riscv_vsetvlmax_e8m2(); - vbool4_t m4even = __riscv_vmseq_vx_u8m2_b4( - __riscv_vand_vx_u8m2(__riscv_vid_v_u8m2(vl8m2), 1, vl8m2), 0, vl8m2); - - for (size_t vl, vlOut; n > 0; n -= vl, src += vl, dst += vlOut) { - vl = __riscv_vsetvl_e8m2(n); - - vuint8m2_t v0 = __riscv_vle8_v_u8m2((uint8_t const *)src, vl); - uint64_t max = __riscv_vmv_x_s_u8m1_u8( - __riscv_vredmaxu_vs_u8m2_u8m1(v0, __riscv_vmv_s_x_u8m1(0, vl), vl)); - - uint8_t next0 = src[vl + 0]; - uint8_t next1 = src[vl + 1]; - uint8_t next2 = src[vl + 2]; - - /* fast path: ASCII */ - if ((max | next0 | next1 | next2) < 0b10000000) { - vlOut = vl; - if (is16) - __riscv_vse16_v_u16m4( - (uint16_t *)dst, - simdutf_byteflip(__riscv_vzext_vf2_u16m4(v0, vlOut), vlOut), - vlOut); - else - __riscv_vse32_v_u32m8((uint32_t *)dst, - __riscv_vzext_vf4_u32m8(v0, vlOut), vlOut); - continue; - } - - /* see "Validating UTF-8 In Less Than One Instruction Per Byte" - * https://arxiv.org/abs/2010.03090 */ - vuint8m2_t v1 = __riscv_vslide1down_vx_u8m2(v0, next0, vl); - vuint8m2_t v2 = __riscv_vslide1down_vx_u8m2(v1, next1, vl); - vuint8m2_t v3 = __riscv_vslide1down_vx_u8m2(v2, next2, vl); - - if (validate) { - vuint8m2_t idx2 = __riscv_vand_vx_u8m2(v2, 0xF, vl); - vuint8m2_t idx1 = __riscv_vsrl_vx_u8m2(v2, 4, vl); - vuint8m2_t idx3 = __riscv_vsrl_vx_u8m2(v3, 4, vl); - - vuint8m2_t err1 = simdutf_vrgather_u8m1x2(err1tbl, idx1); - vuint8m2_t err2 = simdutf_vrgather_u8m1x2(err2tbl, idx2); - vuint8m2_t err3 = simdutf_vrgather_u8m1x2(err3tbl, idx3); - vint8m2_t errs = __riscv_vreinterpret_v_u8m2_i8m2( - __riscv_vand_vv_u8m2(__riscv_vand_vv_u8m2(err1, err2, vl), err3, vl)); - - vbool4_t is_3 = __riscv_vmsgtu_vx_u8m2_b4(v1, 0b11100000 - 1, vl); - vbool4_t is_4 = __riscv_vmsgtu_vx_u8m2_b4(v0, 0b11110000 - 1, vl); - vbool4_t is_34 = __riscv_vmor_mm_b4(is_3, is_4, vl); - vbool4_t err34 = - __riscv_vmxor_mm_b4(is_34, __riscv_vmslt_vx_i8m2_b4(errs, 0, vl), vl); - vbool4_t errm = - __riscv_vmor_mm_b4(__riscv_vmsgt_vx_i8m2_b4(errs, 0, vl), err34, vl); - if (__riscv_vfirst_m_b4(errm, vl) >= 0) - return 0; - } - - /* decoding */ - - /* mask of non continuation bytes */ - vbool4_t m = - __riscv_vmsgt_vx_i8m2_b4(__riscv_vreinterpret_v_u8m2_i8m2(v0), -65, vl); - vlOut = __riscv_vcpop_m_b4(m, vl); - - /* extract first and second bytes */ - vuint8m2_t b1 = __riscv_vcompress_vm_u8m2(v0, m, vl); - vuint8m2_t b2 = __riscv_vcompress_vm_u8m2(v1, m, vl); - - /* fast path: one and two byte */ - if (max < 0b11100000) { - b2 = __riscv_vand_vx_u8m2(b2, 0b00111111, vlOut); - - vbool4_t m1 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b10111111, vlOut); - b1 = __riscv_vand_vx_u8m2_mu(m1, b1, b1, 63, vlOut); - - vuint16m4_t b12 = __riscv_vwmulu_vv_u16m4( - b1, - __riscv_vmerge_vxm_u8m2(__riscv_vmv_v_x_u8m2(1, vlOut), 1 << 6, m1, - vlOut), - vlOut); - b12 = __riscv_vwaddu_wv_u16m4_mu(m1, b12, b12, b2, vlOut); - if (is16) - __riscv_vse16_v_u16m4((uint16_t *)dst, - simdutf_byteflip(b12, vlOut), vlOut); - else - __riscv_vse32_v_u32m8((uint32_t *)dst, - __riscv_vzext_vf2_u32m8(b12, vlOut), vlOut); - continue; - } - - /* fast path: one, two and three byte */ - if (max < 0b11110000) { - vuint8m2_t b3 = __riscv_vcompress_vm_u8m2(v2, m, vl); - - b2 = __riscv_vand_vx_u8m2(b2, 0b00111111, vlOut); - b3 = __riscv_vand_vx_u8m2(b3, 0b00111111, vlOut); - - vbool4_t m1 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b10111111, vlOut); - vbool4_t m3 = __riscv_vmsgtu_vx_u8m2_b4(b1, 0b11011111, vlOut); - - vuint8m2_t t1 = __riscv_vand_vx_u8m2_mu(m1, b1, b1, 63, vlOut); - b1 = __riscv_vand_vx_u8m2_mu(m3, t1, b1, 15, vlOut); - - vuint16m4_t b12 = __riscv_vwmulu_vv_u16m4( - b1, - __riscv_vmerge_vxm_u8m2(__riscv_vmv_v_x_u8m2(1, vlOut), 1 << 6, m1, - vlOut), - vlOut); - b12 = __riscv_vwaddu_wv_u16m4_mu(m1, b12, b12, b2, vlOut); - vuint16m4_t b123 = __riscv_vwaddu_wv_u16m4_mu( - m3, b12, __riscv_vsll_vx_u16m4_mu(m3, b12, b12, 6, vlOut), b3, vlOut); - if (is16) - __riscv_vse16_v_u16m4((uint16_t *)dst, - simdutf_byteflip(b123, vlOut), vlOut); - else - __riscv_vse32_v_u32m8((uint32_t *)dst, - __riscv_vzext_vf2_u32m8(b123, vlOut), vlOut); - continue; - } - - /* extract third and fourth bytes */ - vuint8m2_t b3 = __riscv_vcompress_vm_u8m2(v2, m, vl); - vuint8m2_t b4 = __riscv_vcompress_vm_u8m2(v3, m, vl); - - /* remove prefix from leading bytes - * - * We could also use vrgather here, but it increases register pressure, - * and its performance varies widely on current platforms. It might be - * worth reconsidering, though, once there is more hardware available. - * Same goes for the __riscv_vsrl_vv_u32m4 correction step. - * - * We shift left and then right by the number of bytes in the prefix, - * which can be calculated as follows: - * x max(x-10, 0) - * 0xxx -> 0000-0111 -> sift by 0 or 1 -> 0 - * 10xx -> 1000-1011 -> don't care - * 110x -> 1100,1101 -> sift by 3 -> 2,3 - * 1110 -> 1110 -> sift by 4 -> 4 - * 1111 -> 1111 -> sift by 5 -> 5 - * - * vssubu.vx v, 10, (max(x-10, 0)) almost gives us what we want, we - * just need to manually detect and handle the one special case: - */ - #define SIMDUTF_RVV_UTF8_TO_COMMON_M1(idx) \ - vuint8m1_t c1 = __riscv_vget_v_u8m2_u8m1(b1, idx); \ - vuint8m1_t c2 = __riscv_vget_v_u8m2_u8m1(b2, idx); \ - vuint8m1_t c3 = __riscv_vget_v_u8m2_u8m1(b3, idx); \ - vuint8m1_t c4 = __riscv_vget_v_u8m2_u8m1(b4, idx); \ - /* remove prefix from trailing bytes */ \ - c2 = __riscv_vand_vx_u8m1(c2, 0b00111111, vlOut); \ - c3 = __riscv_vand_vx_u8m1(c3, 0b00111111, vlOut); \ - c4 = __riscv_vand_vx_u8m1(c4, 0b00111111, vlOut); \ - vuint8m1_t shift = __riscv_vsrl_vx_u8m1(c1, 4, vlOut); \ - shift = __riscv_vmerge_vxm_u8m1( \ - __riscv_vssubu_vx_u8m1(shift, 10, vlOut), 3, \ - __riscv_vmseq_vx_u8m1_b8(shift, 12, vlOut), vlOut); \ - c1 = __riscv_vsll_vv_u8m1(c1, shift, vlOut); \ - c1 = __riscv_vsrl_vv_u8m1(c1, shift, vlOut); \ - /* unconditionally widen and combine to c1234 */ \ - vuint16m2_t c34 = __riscv_vwaddu_wv_u16m2( \ - __riscv_vwmulu_vx_u16m2(c3, 1 << 6, vlOut), c4, vlOut); \ - vuint16m2_t c12 = __riscv_vwaddu_wv_u16m2( \ - __riscv_vwmulu_vx_u16m2(c1, 1 << 6, vlOut), c2, vlOut); \ - vuint32m4_t c1234 = __riscv_vwaddu_wv_u32m4( \ - __riscv_vwmulu_vx_u32m4(c12, 1 << 12, vlOut), c34, vlOut); \ - /* derive required right-shift amount from `shift` to reduce \ - * c1234 to the required number of bytes */ \ - c1234 = __riscv_vsrl_vv_u32m4( \ - c1234, \ - __riscv_vzext_vf4_u32m4( \ - __riscv_vmul_vx_u8m1( \ - __riscv_vrsub_vx_u8m1(__riscv_vssubu_vx_u8m1(shift, 2, vlOut), \ - 3, vlOut), \ - 6, vlOut), \ - vlOut), \ - vlOut); \ - /* store result in desired format */ \ - if (is16) \ - vlDst = rvv_utf32_store_utf16_m4((uint16_t *)dst, c1234, vlOut, \ - m4even); \ - else \ - vlDst = vlOut, __riscv_vse32_v_u32m4((uint32_t *)dst, c1234, vlOut); - - /* Unrolling this manually reduces register pressure and allows - * us to terminate early. */ - { - size_t vlOutm2 = vlOut, vlDst; - vlOut = __riscv_vsetvl_e8m1(vlOut < vl8m1 ? vlOut : vl8m1); - SIMDUTF_RVV_UTF8_TO_COMMON_M1(0) - if (vlOutm2 == vlOut) { - vlOut = vlDst; - continue; - } - - dst += vlDst; - vlOut = vlOutm2 - vlOut; - } - { - size_t vlDst; - SIMDUTF_RVV_UTF8_TO_COMMON_M1(1) - vlOut = vlDst; - } - - #undef SIMDUTF_RVV_UTF8_TO_COMMON_M1 - } - - /* validate the last character and reparse it + tail */ - if (len > tail) { - if ((uint8_t(src[0]) >> 6) == 0b10) - --dst; - while ((uint8_t(src[0]) >> 6) == 0b10 && tail < len) - --src, ++tail; - if (is16) { - /* go back one more, when on high surrogate */ - if (simdutf_byteflip((uint16_t)dst[-1]) >= 0xD800 && - simdutf_byteflip((uint16_t)dst[-1]) <= 0xDBFF) - --dst; - } - } - size_t ret = scalar(src, tail, dst); - if (ret == 0) - return 0; - return (size_t)(dst - beg) + ret; -} -#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || - // SIMDUTF_FEATURE_UTF32) - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *src, size_t len, char *dst) const noexcept { - const char *beg = dst; - uint8_t last = 0; - for (size_t vl, vlOut; len > 0; - len -= vl, src += vl, dst += vlOut, last = src[-1]) { - vl = __riscv_vsetvl_e8m2(len); - vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); - // check which bytes are ASCII - vbool4_t ascii = __riscv_vmsltu_vx_u8m2_b4(v1, 0b10000000, vl); - // count ASCII bytes - vlOut = __riscv_vcpop_m_b4(ascii, vl); - // The original code would only enter the next block after this check: - // vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); - // vlOut = __riscv_vcpop_m_b4(m, vl); - // if (vlOut != vl || last > 0b01111111) {...}q - // So that everything is ASCII or continuation bytes, we just proceeded - // without any processing, going straight to __riscv_vse8_v_u8m2. - // But you need the __riscv_vslide1up_vx_u8m2 whenever there is a non-ASCII - // byte. - if (vlOut != vl) { // If not pure ASCII - // Non-ASCII characters - // We now want to mark the ascii and continuation bytes - vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); - // We count them, that's our new vlOut (output vector length) - vlOut = __riscv_vcpop_m_b4(m, vl); - - vuint8m2_t v0 = __riscv_vslide1up_vx_u8m2(v1, last, vl); - - vbool4_t leading0 = __riscv_vmsgtu_vx_u8m2_b4(v0, 0b10111111, vl); - vbool4_t trailing1 = __riscv_vmslt_vx_i8m2_b4( - __riscv_vreinterpret_v_u8m2_i8m2(v1), (uint8_t)0b11000000, vl); - // -62 i 0b11000010, so we check whether any of v0 is too big - vbool4_t tobig = __riscv_vmand_mm_b4( - leading0, - __riscv_vmsgtu_vx_u8m2_b4(__riscv_vxor_vx_u8m2(v0, (uint8_t)-62, vl), - 1, vl), - vl); - if (__riscv_vfirst_m_b4( - __riscv_vmor_mm_b4( - tobig, __riscv_vmxor_mm_b4(leading0, trailing1, vl), vl), - vl) >= 0) - return 0; - - v1 = __riscv_vor_vx_u8m2_mu(__riscv_vmseq_vx_u8m2_b4(v0, 0b11000011, vl), - v1, v1, 0b01000000, vl); - v1 = __riscv_vcompress_vm_u8m2(v1, m, vl); - } else if (last >= 0b11000000) { // If last byte is a leading byte and we - // got only ASCII, error! - return 0; - } - __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); - } - if (last > 0b10111111) - return 0; - return dst - beg; -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *src, size_t len, char *dst) const noexcept { - size_t res = convert_utf8_to_latin1(src, len, dst); - if (res) - return result(error_code::SUCCESS, res); - return scalar::utf8_to_latin1::convert_with_errors(src, len, dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *src, size_t len, char *dst) const noexcept { - const char *beg = dst; - uint8_t last = 0; - for (size_t vl, vlOut; len > 0; - len -= vl, src += vl, dst += vlOut, last = src[-1]) { - vl = __riscv_vsetvl_e8m2(len); - vuint8m2_t v1 = __riscv_vle8_v_u8m2((uint8_t *)src, vl); - vbool4_t ascii = __riscv_vmsltu_vx_u8m2_b4(v1, 0b10000000, vl); - vlOut = __riscv_vcpop_m_b4(ascii, vl); - if (vlOut != vl) { // If not pure ASCII - vbool4_t m = __riscv_vmsltu_vx_u8m2_b4(v1, 0b11000000, vl); - vlOut = __riscv_vcpop_m_b4(m, vl); - vuint8m2_t v0 = __riscv_vslide1up_vx_u8m2(v1, last, vl); - v1 = __riscv_vor_vx_u8m2_mu(__riscv_vmseq_vx_u8m2_b4(v0, 0b11000011, vl), - v1, v1, 0b01000000, vl); - v1 = __riscv_vcompress_vm_u8m2(v1, m, vl); - } - __riscv_vse8_v_u8m2((uint8_t *)dst, v1, vlOut); - } - return dst - beg; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *src, size_t len, char16_t *dst) const noexcept { - return rvv_utf8_to_common(src, len, - (uint16_t *)dst); -} - -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *src, size_t len, char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf8_to_common( - src, len, (uint16_t *)dst); - else - return rvv_utf8_to_common(src, len, - (uint16_t *)dst); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *src, size_t len, char16_t *dst) const noexcept { - size_t res = convert_utf8_to_utf16le(src, len, dst); - if (res) - return result(error_code::SUCCESS, res); - return scalar::utf8_to_utf16::convert_with_errors( - src, len, dst); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *src, size_t len, char16_t *dst) const noexcept { - size_t res = convert_utf8_to_utf16be(src, len, dst); - if (res) - return result(error_code::SUCCESS, res); - return scalar::utf8_to_utf16::convert_with_errors(src, len, - dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *src, size_t len, char16_t *dst) const noexcept { - return rvv_utf8_to_common( - src, len, (uint16_t *)dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *src, size_t len, char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_utf8_to_common( - src, len, (uint16_t *)dst); - else - return rvv_utf8_to_common( - src, len, (uint16_t *)dst); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *src, size_t len, char32_t *dst) const noexcept { - return rvv_utf8_to_common(src, len, - (uint32_t *)dst); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *src, size_t len, char32_t *dst) const noexcept { - size_t res = convert_utf8_to_utf32(src, len, dst); - if (res) - return result(error_code::SUCCESS, res); - return scalar::utf8_to_utf32::convert_with_errors(src, len, dst); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *src, size_t len, char32_t *dst) const noexcept { - return rvv_utf8_to_common( - src, len, (uint32_t *)dst); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* end file src/rvv/rvv_utf8_to.inl.cpp */ - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) - return bom_encoding; - // todo: reimplement as a one-pass algorithm. - int out = 0; - if (validate_utf8(input, length)) - out |= encoding_type::UTF8; - if (length % 2 == 0) { - if (validate_utf16le(reinterpret_cast(input), length / 2)) - out |= encoding_type::UTF16_LE; - } - if (length % 4 == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) - out |= encoding_type::UTF32_LE; - } - - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} - -template -simdutf_really_inline static void -rvv_change_endianness_utf16(const char16_t *src, size_t len, char16_t *dst) { - for (size_t vl; len > 0; len -= vl, src += vl, dst += vl) { - vl = __riscv_vsetvl_e16m8(len); - vuint16m8_t v = __riscv_vle16_v_u16m8((uint16_t *)src, vl); - __riscv_vse16_v_u16m8((uint16_t *)dst, simdutf_byteflip(v, vl), vl); - } -} - -void implementation::change_endianness_utf16(const char16_t *src, size_t len, - char16_t *dst) const noexcept { - if (supports_zvbb()) - return rvv_change_endianness_utf16(src, len, dst); - else - return rvv_change_endianness_utf16(src, len, dst); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } - } - return r; -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - auto equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation}; - } - return {SUCCESS, 0}; - } - result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.count % 3 == 0) || ((r.count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation}; - } - } - return r; -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - const bool ignore_garbage = - (options == base64_options::base64_url_accept_garbage) || - (options == base64_options::base64_default_accept_garbage); - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - size_t equallocation = - length; // location of the first padding character if any - size_t equalsigns = 0; - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - length -= 1; - equalsigns++; - while (length > 0 && - scalar::base64::is_ascii_white_space(input[length - 1])) { - length--; - } - if (length > 0 && input[length - 1] == '=') { - equallocation = length - 1; - equalsigns++; - length -= 1; - } - } - if (length == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - full_result r = scalar::base64::base64_tail_decode( - output, input, length, equalsigns, options, last_chunk_options); - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, r.output_count}; - } - } - return r; -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - return scalar::base64::tail_encode_base64(output, input, length, options); -} -#endif // SIMDUTF_FEATURE_BASE64 - -} // namespace rvv -} // namespace simdutf - -/* begin file src/simdutf/rvv/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_RVV -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -/* end file src/simdutf/rvv/end.h */ -/* end file src/rvv/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_WESTMERE -/* begin file src/westmere/implementation.cpp */ -/* begin file src/simdutf/westmere/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "westmere" -// #define SIMDUTF_IMPLEMENTATION westmere -#define SIMDUTF_SIMD_HAS_BYTEMASK 1 - -#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE -// nothing needed. -#else -SIMDUTF_TARGET_WESTMERE -#endif -/* end file src/simdutf/westmere/begin.h */ - -namespace simdutf { -namespace westmere { -namespace { -#ifndef SIMDUTF_WESTMERE_H - #error "westmere.h must be included" -#endif -using namespace simd; - -#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ - SIMDUTF_FEATURE_UTF8 -simdutf_really_inline bool is_ascii(const simd8x64 &input) { - return input.reduce_or().is_ascii(); -} -#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || - // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_really_inline simd8 -must_be_2_3_continuation(const simd8 prev2, - const simd8 prev3) { - simd8 is_third_byte = - prev2.saturating_sub(0xe0u - 0x80); // Only 111_____ will be >= 0x80 - simd8 is_fourth_byte = - prev3.saturating_sub(0xf0u - 0x80); // Only 1111____ will be >= 0x80 - return simd8(is_third_byte | is_fourth_byte); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/westmere/internal/loader.cpp */ -namespace internal { -namespace westmere { - -/* begin file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */ -/* - * reads a vector of uint16 values - * bits after 11th are ignored - * first 11 bits are encoded into utf8 - * !important! utf8_output must have at least 16 writable bytes - */ - -inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, - const __m128i one_byte_bytemask, - const uint16_t one_byte_bitmask) { - // 0b1100_0000_1000_0000 - const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080); - // 0b0001_1111_0000_0000 - const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); - // 0b0000_0000_0011_1111 - const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); - - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - - // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = _mm_slli_epi16(v_u16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = _mm_and_si128(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = _mm_and_si128(v_u16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = _mm_or_si128(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = _mm_or_si128(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m128i utf8_unpacked = _mm_blendv_epi8(t4, v_u16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a - // - LSB) - const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a - const uint16_t m1 = static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 - const uint8_t m2 = static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); - - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - - // 6. adjust pointers - utf8_output += row[0]; -} - -inline void write_v_u16_11bits_to_utf8(const __m128i v_u16, char *&utf8_output, - const __m128i v_0000, - const __m128i v_ff80) { - // no bits set above 7th bit - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(v_u16, v_ff80), v_0000); - const uint16_t one_byte_bitmask = - static_cast(_mm_movemask_epi8(one_byte_bytemask)); - - write_v_u16_11bits_to_utf8(v_u16, utf8_output, one_byte_bytemask, - one_byte_bitmask); -} -/* end file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */ - -} // namespace westmere -} // namespace internal -/* end file src/westmere/internal/loader.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/westmere/sse_utf16fix.cpp */ -/* - * Process one block of 8 characters. If in_place is false, - * copy the block from in to out. If there is a sequencing - * error in the block, overwrite the illsequenced characters - * with the replacement character. This function reads one - * character before the beginning of the buffer as a lookback. - * If that character is illsequenced, it too is overwritten. - */ -template -simdutf_really_inline void utf16fix_block_sse(char16_t *out, - const char16_t *in) { - const char16_t replacement = scalar::utf16::replacement(); - auto swap_if_needed = [](uint16_t c) -> uint16_t { - return !simdutf::match_system(big_endian) ? scalar::u16_swap_bytes(c) : c; - }; - - __m128i lookback, block, lb_masked, block_masked, lb_is_high, block_is_low; - __m128i illseq, lb_illseq, block_illseq; - - lookback = _mm_loadu_si128((const __m128i *)(in - 1)); - block = _mm_loadu_si128((const __m128i *)in); - lb_masked = _mm_and_si128(lookback, _mm_set1_epi16(swap_if_needed(0xfc00U))); - block_masked = _mm_and_si128(block, _mm_set1_epi16(swap_if_needed(0xfc00U))); - lb_is_high = - _mm_cmpeq_epi16(lb_masked, _mm_set1_epi16(swap_if_needed(0xd800U))); - block_is_low = - _mm_cmpeq_epi16(block_masked, _mm_set1_epi16(swap_if_needed(0xdc00U))); - - illseq = _mm_xor_si128(lb_is_high, block_is_low); - if (_mm_movemask_epi8(illseq) != 0) { - int lb; - - /* compute the cause of the illegal sequencing */ - lb_illseq = _mm_andnot_si128(block_is_low, lb_is_high); - block_illseq = _mm_or_si128(_mm_andnot_si128(lb_is_high, block_is_low), - _mm_bsrli_si128(lb_illseq, 2)); - - /* fix illegal sequencing in the lookback */ - lb = _mm_cvtsi128_si32(lb_illseq); - lb = (lb & replacement) | (~lb & out[-1]); - out[-1] = char16_t(lb); - /* fix illegal sequencing in the main block */ - block = - _mm_or_si128(_mm_andnot_si128(block_illseq, block), - _mm_and_si128(block_illseq, _mm_set1_epi16(replacement))); - _mm_storeu_si128((__m128i *)out, block); - } else if (!in_place) { - _mm_storeu_si128((__m128i *)out, block); - } -} - -template -void utf16fix_sse(const char16_t *in, size_t n, char16_t *out) { - const char16_t replacement = scalar::utf16::replacement(); - size_t i; - if (n < 9) { - scalar::utf16::to_well_formed_utf16(in, n, out); - return; - } - - out[0] = - scalar::utf16::is_low_surrogate(in[0]) ? replacement : in[0]; - - /* duplicate code to have the compiler specialise utf16fix_block() */ - if (in == out) { - for (i = 1; i + 8 < n; i += 8) { - utf16fix_block_sse(out + i, in + i); - } - - utf16fix_block_sse(out + n - 8, in + n - 8); - } else { - for (i = 1; i + 8 < n; i += 8) { - utf16fix_block_sse(out + i, in + i); - } - - utf16fix_block_sse(out + n - 8, in + n - 8); - } - - out[n - 1] = scalar::utf16::is_high_surrogate(out[n - 1]) - ? replacement - : out[n - 1]; -} -/* end file src/westmere/sse_utf16fix.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/westmere/sse_validate_utf16.cpp */ -template -simd8 utf16_gather_high_bytes(const simd16 in0, - const simd16 in1) { - if (big_endian) { - // we want lower bytes - const auto mask = simd16(0x00ff); - const auto t0 = in0 & mask; - const auto t1 = in1 & mask; - - return simd16::pack(t0, t1); - } else { - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - - return simd16::pack(t0, t1); - } -} -/* end file src/westmere/sse_validate_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/westmere/sse_convert_latin1_to_utf8.cpp */ -std::pair -sse_convert_latin1_to_utf8(const char *latin_input, - const size_t latin_input_length, char *utf8_output) { - const char *end = latin_input + latin_input_length; - - const __m128i v_0000 = _mm_setzero_si128(); - // 0b1000_0000 - const __m128i v_80 = _mm_set1_epi8((uint8_t)0x80); - // 0b1111_1111_1000_0000 - const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); - - const __m128i latin_1_half_into_u16_byte_mask = - _mm_setr_epi8(0, '\x80', 1, '\x80', 2, '\x80', 3, '\x80', 4, '\x80', 5, - '\x80', 6, '\x80', 7, '\x80'); - - const __m128i latin_2_half_into_u16_byte_mask = - _mm_setr_epi8(8, '\x80', 9, '\x80', 10, '\x80', 11, '\x80', 12, '\x80', - 13, '\x80', 14, '\x80', 15, '\x80'); - - // each latin1 takes 1-2 utf8 bytes - // slow path writes useful 8-15 bytes twice (eagerly writes 16 bytes and then - // adjust the pointer) so the last write can exceed the utf8_output size by - // 8-1 bytes by reserving 8 extra input bytes, we expect the output to have - // 8-16 bytes free - while (end - latin_input >= 16 + 8) { - // Load 16 Latin1 characters (16 bytes) into a 128-bit register - __m128i v_latin = _mm_loadu_si128((__m128i *)latin_input); - - if (_mm_testz_si128(v_latin, v_80)) { // ASCII fast path!!!! - _mm_storeu_si128((__m128i *)utf8_output, v_latin); - latin_input += 16; - utf8_output += 16; - continue; - } - - // assuming a/b are bytes and A/B are uint16 of the same value - // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA - __m128i v_u16_latin_1_half = - _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask); - // aaaa_aaaa_bbbb_bbbb -> BBBB_BBBB - __m128i v_u16_latin_2_half = - _mm_shuffle_epi8(v_latin, latin_2_half_into_u16_byte_mask); - - internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_1_half, - utf8_output, v_0000, v_ff80); - internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_2_half, - utf8_output, v_0000, v_ff80); - latin_input += 16; - } - - if (end - latin_input >= 16) { - // Load 16 Latin1 characters (16 bytes) into a 128-bit register - __m128i v_latin = _mm_loadu_si128((__m128i *)latin_input); - - if (_mm_testz_si128(v_latin, v_80)) { // ASCII fast path!!!! - _mm_storeu_si128((__m128i *)utf8_output, v_latin); - latin_input += 16; - utf8_output += 16; - } else { - // assuming a/b are bytes and A/B are uint16 of the same value - // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA - __m128i v_u16_latin_1_half = - _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask); - internal::westmere::write_v_u16_11bits_to_utf8( - v_u16_latin_1_half, utf8_output, v_0000, v_ff80); - latin_input += 8; - } - } - - return std::make_pair(latin_input, utf8_output); -} -/* end file src/westmere/sse_convert_latin1_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/westmere/sse_convert_latin1_to_utf16.cpp */ -template -std::pair -sse_convert_latin1_to_utf16(const char *latin1_input, size_t len, - char16_t *utf16_output) { - size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - for (size_t i = 0; i < rounded_len; i += 16) { - // Load 16 Latin1 characters into a 128-bit register - __m128i in = - _mm_loadu_si128(reinterpret_cast(&latin1_input[i])); - __m128i out1 = big_endian ? _mm_unpacklo_epi8(_mm_setzero_si128(), in) - : _mm_unpacklo_epi8(in, _mm_setzero_si128()); - __m128i out2 = big_endian ? _mm_unpackhi_epi8(_mm_setzero_si128(), in) - : _mm_unpackhi_epi8(in, _mm_setzero_si128()); - // Zero extend each Latin1 character to 16-bit integers and store the - // results back to memory - _mm_storeu_si128(reinterpret_cast<__m128i *>(&utf16_output[i]), out1); - _mm_storeu_si128(reinterpret_cast<__m128i *>(&utf16_output[i + 8]), out2); - } - // return pointers pointing to where we left off - return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len); -} -/* end file src/westmere/sse_convert_latin1_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/westmere/sse_convert_latin1_to_utf32.cpp */ -std::pair -sse_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - const char *end = buf + len; - - while (end - buf >= 16) { - // Load 16 Latin1 characters (16 bytes) into a 128-bit register - __m128i in = _mm_loadu_si128((__m128i *)buf); - - // Shift input to process next 4 bytes - __m128i in_shifted1 = _mm_srli_si128(in, 4); - __m128i in_shifted2 = _mm_srli_si128(in, 8); - __m128i in_shifted3 = _mm_srli_si128(in, 12); - - // expand 8-bit to 32-bit unit - __m128i out1 = _mm_cvtepu8_epi32(in); - __m128i out2 = _mm_cvtepu8_epi32(in_shifted1); - __m128i out3 = _mm_cvtepu8_epi32(in_shifted2); - __m128i out4 = _mm_cvtepu8_epi32(in_shifted3); - - _mm_storeu_si128((__m128i *)utf32_output, out1); - _mm_storeu_si128((__m128i *)(utf32_output + 4), out2); - _mm_storeu_si128((__m128i *)(utf32_output + 8), out3); - _mm_storeu_si128((__m128i *)(utf32_output + 12), out4); - - utf32_output += 16; - buf += 16; - } - - return std::make_pair(buf, utf32_output); -} -/* end file src/westmere/sse_convert_latin1_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/westmere/sse_convert_utf8_to_utf16.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -template -size_t convert_masked_utf8_to_utf16(const char *input, - uint64_t utf8_end_of_code_point_mask, - char16_t *&utf16_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i in = _mm_loadu_si128((__m128i *)input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - // Note: using 16 bytes is unsafe, see issue_ossfuzz_71218 - __m128i ascii_first = _mm_cvtepu8_epi16(in); - __m128i ascii_second = _mm_cvtepu8_epi16(_mm_srli_si128(in, 8)); - if (big_endian) { - ascii_first = _mm_shuffle_epi8(ascii_first, swap); - ascii_second = _mm_shuffle_epi8(ascii_second, swap); - } - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output), ascii_first); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf16_output + 8), - ascii_second); - utf16_output += 12; // We wrote 12 16-bit characters. - return 12; // We consumed 12 bytes. - } - if (((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) { - // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte - // UTF-16 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - if (big_endian) - composed = _mm_shuffle_epi8(composed, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed); - utf16_output += 8; // We wrote 16 bytes, 8 code points. - return 16; - } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte - // UTF-16 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - __m128i composed_repacked = _mm_packus_epi32(composed, composed); - if (big_endian) - composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); - utf16_output += 4; - return 12; - } - /// We do not have a fast path available, so we fallback. - - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - if (idx < 64) { - // SIX (6) input code-code units - // this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small - // lookup table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - if (big_endian) - composed = _mm_shuffle_epi8(composed, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed); - utf16_output += 6; // We wrote 12 bytes, 6 code points. - } else if (idx < 145) { - // FOUR (4) input code-code units - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - __m128i composed_repacked = _mm_packus_epi32(composed, composed); - if (big_endian) - composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); - _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); - utf16_output += 4; - } else if (idx < 209) { - // TWO (2) input code-code units - ////////////// - // There might be garbage inputs where a leading byte mascarades as a - // four-byte leading byte (by being followed by 3 continuation byte), but is - // not greater than 0xf0. This could trigger a buffer overflow if we only - // counted leading bytes of the form 0xf0 as generating surrogate pairs, - // without further UTF-8 validation. Thus we must be careful to ensure that - // only leading bytes at least as large as 0xf0 generate surrogate pairs. We - // do as at the cost of an extra mask. - ///////////// - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); - const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); - // correct for spurious high bit - const __m128i correct = - _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); - middlehighbyte = _mm_xor_si128(correct, middlehighbyte); - const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); - // We deliberately carry the leading four bits in highbyte if they are - // present, we remove them later when computing hightenbits. - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); - // When we need to generate a surrogate pair (leading byte > 0xF0), then - // the corresponding 32-bit value in 'composed' will be greater than - // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the - // location of the surrogate pairs. - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), - _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); - const __m128i composedminus = - _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); - const __m128i lowtenbits = - _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); - // Notice the 0x3ff mask: - const __m128i hightenbits = - _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); - const __m128i lowtenbitsadd = - _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); - const __m128i hightenbitsadd = - _mm_add_epi32(hightenbits, _mm_set1_epi32(0xD800)); - const __m128i lowtenbitsaddshifted = _mm_slli_epi32(lowtenbitsadd, 16); - __m128i surrogates = _mm_or_si128(hightenbitsadd, lowtenbitsaddshifted); - uint32_t basic_buffer[4]; - uint32_t basic_buffer_swap[4]; - if (big_endian) { - _mm_storeu_si128((__m128i *)basic_buffer_swap, - _mm_shuffle_epi8(composed, swap)); - surrogates = _mm_shuffle_epi8(surrogates, swap); - } - _mm_storeu_si128((__m128i *)basic_buffer, composed); - uint32_t surrogate_buffer[4]; - _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); - for (size_t i = 0; i < 3; i++) { - if (basic_buffer[i] > 0x3c00000) { - utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); - utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); - utf16_output += 2; - } else { - utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) - : uint16_t(basic_buffer[i]); - utf16_output++; - } - } - } else { - // here we know that there is an error but we do not handle errors - } - return consumed; -} -/* end file src/westmere/sse_convert_utf8_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/westmere/sse_convert_utf8_to_utf32.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_utf32(const char *input, - uint64_t utf8_end_of_code_point_mask, - char32_t *&utf32_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - const __m128i in = _mm_loadu_si128((__m128i *)input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu8_epi32(in)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu8_epi32(_mm_srli_si128(in, 4))); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 8), - _mm_cvtepu8_epi32(_mm_srli_si128(in, 8))); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 12), - _mm_cvtepu8_epi32(_mm_srli_si128(in, 12))); - utf32_output += 12; // We wrote 12 32-bit characters. - return 12; // We consumed 12 bytes. - } - if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) { - // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte - // UTF-32 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu16_epi32(composed)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu16_epi32(_mm_srli_si128(composed, 8))); - utf32_output += 8; // We wrote 32 bytes, 8 code points. - return 16; - } - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte - // UTF-32 code units. There is probably a more efficient sequence, but the - // following might do. - const __m128i sh = - _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 4; - return 12; - } - /// We do not have a fast path available, so we fallback. - - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - if (idx < 64) { - // SIX (6) input code-code units - // this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small - // lookup table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu16_epi32(composed)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu16_epi32(_mm_srli_si128(composed, 8))); - utf32_output += 6; // We wrote 12 bytes, 6 code points. - } else if (idx < 145) { - // FOUR (4) input code-code units - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = - _mm_and_si128(perm, _mm_set1_epi32(0x7f)); // 7 or 6 bits - const __m128i middlebyte = - _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); // 5 or 6 bits - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - const __m128i highbyte = - _mm_and_si128(perm, _mm_set1_epi32(0x0f0000)); // 4 bits - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 4); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), highbyte_shifted); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 4; - } else if (idx < 209) { - // TWO (2) input code-code units - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi32(0x7f)); - const __m128i middlebyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f00)); - const __m128i middlebyte_shifted = _mm_srli_epi32(middlebyte, 2); - __m128i middlehighbyte = _mm_and_si128(perm, _mm_set1_epi32(0x3f0000)); - // correct for spurious high bit - const __m128i correct = - _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); - middlehighbyte = _mm_xor_si128(correct, middlehighbyte); - const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); - const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); - const __m128i composed = - _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), - _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); - _mm_storeu_si128((__m128i *)utf32_output, composed); - utf32_output += 3; - } else { - // here we know that there is an error but we do not handle errors - } - return consumed; -} -/* end file src/westmere/sse_convert_utf8_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/westmere/sse_convert_utf8_to_latin1.cpp */ -// depends on "tables/utf8_to_utf16_tables.h" - -// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_latin1(const char *input, - uint64_t utf8_end_of_code_point_mask, - char *&latin1_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - const __m128i in = _mm_loadu_si128((__m128i *)input); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & - 0xfff; // we are only processing 12 bytes in case it is not all ASCII - if (utf8_end_of_code_point_mask == 0xfff) { - // We process the data in chunks of 12 bytes. - _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in); - latin1_output += 12; // We wrote 12 characters. - return 12; // We consumed 12 bytes. - } - /// We do not have a fast path available, so we fallback. - const uint8_t idx = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = - tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1]; - // this indicates an invalid input: - if (idx >= 64) { - return consumed; - } - // Here we should have (idx < 64), if not, there is a bug in the validation or - // elsewhere. SIX (6) input code-code units this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. On - // processors where pdep/pext is fast, we might be able to use a small lookup - // table. - const __m128i sh = - _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); - const __m128i perm = _mm_shuffle_epi8(in, sh); - const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f)); - const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); - __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); - const __m128i latin1_packed = _mm_packus_epi16(composed, composed); - // writing 8 bytes even though we only care about the first 6 bytes. - // performance note: it would be faster to use _mm_storeu_si128, we should - // investigate. - _mm_storel_epi64((__m128i *)latin1_output, latin1_packed); - latin1_output += 6; // We wrote 6 bytes. - return consumed; -} -/* end file src/westmere/sse_convert_utf8_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/westmere/sse_convert_utf16_to_latin1.cpp */ -template -std::pair -sse_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - while (end - buf >= 8) { - // Load 8 UTF-16 characters into 128-bit SSE register - __m128i in = _mm_loadu_si128(reinterpret_cast(buf)); - - if (!match_system(big_endian)) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); - } - - __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00); - if (_mm_testz_si128(in, high_byte_mask)) { - // Pack 16-bit characters into 8-bit and store in latin1_output - __m128i latin1_packed = _mm_packus_epi16(in, in); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), - latin1_packed); - // Adjust pointers for next iteration - buf += 8; - latin1_output += 8; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} - -template -std::pair -sse_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - while (end - buf >= 8) { - __m128i in = _mm_loadu_si128(reinterpret_cast(buf)); - - if (!match_system(big_endian)) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); - } - - __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00); - if (_mm_testz_si128(in, high_byte_mask)) { - __m128i latin1_packed = _mm_packus_epi16(in, in); - _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output), - latin1_packed); - buf += 8; - latin1_output += 8; - } else { - // Fallback to scalar code for handling errors - for (int k = 0; k < 8; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - buf += 8; - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/westmere/sse_convert_utf16_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/westmere/sse_convert_utf16_to_utf8.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it is an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ - -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::pair -sse_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_output) { - - const char16_t *end = buf + len; - - const __m128i v_0000 = _mm_setzero_si128(); - const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); - } - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80); - if (_mm_testz_si128(in, v_ff80)) { // ASCII fast path!!!! - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - nextin = _mm_shuffle_epi8(nextin, swap); - } - if (!_mm_testz_si128(nextin, v_ff80)) { - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in, in); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } else { - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in, nextin); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - } - - // no bits set above 7th bit - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000); - const uint16_t one_byte_bitmask = - static_cast(_mm_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m128i one_or_two_bytes_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000); - const uint16_t one_or_two_bytes_bitmask = - static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); - - if (one_or_two_bytes_bitmask == 0xffff) { - internal::westmere::write_v_u16_11bits_to_utf8( - in, utf8_output, one_byte_bytemask, one_byte_bitmask); - buf += 8; - continue; - } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m128i surrogates_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = - static_cast(_mm_movemask_epi8(surrogates_bytemask)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m128i t0 = _mm_shuffle_epi8(in, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m128i s0 = _mm_srli_epi16(in, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); - const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m128i s4 = _mm_xor_si128(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m128i out0 = _mm_unpacklo_epi16(t2, s4); - const __m128i out1 = _mm_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. - const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, - 15, 13, -1, -1, -1, -1); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += 12; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - } - const uint8_t mask0 = uint8_t(mask); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, utf8_output); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(buf, utf8_output); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -sse_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - - const __m128i v_0000 = _mm_setzero_si128(); - const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); - } - // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes - const __m128i v_ff80 = _mm_set1_epi16((int16_t)0xff80); - if (_mm_testz_si128(in, v_ff80)) { // ASCII fast path!!!! - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - nextin = _mm_shuffle_epi8(nextin, swap); - } - if (!_mm_testz_si128(nextin, v_ff80)) { - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in, in); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } else { - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in, nextin); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - } - - // no bits set above 7th bit - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_ff80), v_0000); - const uint16_t one_byte_bitmask = - static_cast(_mm_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m128i one_or_two_bytes_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_0000); - const uint16_t one_or_two_bytes_bitmask = - static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); - - if (one_or_two_bytes_bitmask == 0xffff) { - internal::westmere::write_v_u16_11bits_to_utf8( - in, utf8_output, one_byte_bytemask, one_byte_bitmask); - buf += 8; - continue; - } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m128i surrogates_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = - static_cast(_mm_movemask_epi8(surrogates_bytemask)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m128i t0 = _mm_shuffle_epi8(in, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m128i s0 = _mm_srli_epi16(in, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); - const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m128i s4 = _mm_xor_si128(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m128i out0 = _mm_unpacklo_epi16(t2, s4); - const __m128i out1 = _mm_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. - const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, - 15, 13, -1, -1, -1, -1); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += 12; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - } - const uint8_t mask0 = uint8_t(mask); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - utf8_output); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); -} -/* end file src/westmere/sse_convert_utf16_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/westmere/sse_convert_utf16_to_utf32.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ - -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routine should carry on the conversion of the tail. -*/ -template -std::pair -sse_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const char16_t *end = buf + len; - - const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); - - while (end - buf >= 8) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); - } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m128i surrogates_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = - static_cast(_mm_movemask_epi8(surrogates_bytemask)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { - // case: no surrogate pair, extend 16-bit code units to 32-bit code units - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu16_epi32(in)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu16_epi32(_mm_srli_si128(in, 8))); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, utf32_output); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(buf, utf32_output); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -sse_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - - const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); - - while (end - buf >= 8) { - __m128i in = _mm_loadu_si128((__m128i *)buf); - - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - in = _mm_shuffle_epi8(in, swap); - } - - // 1. Check if there are any surrogate word in the input chunk. - // We have also deal with situation when there is a surrogate word - // at the end of a chunk. - const __m128i surrogates_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in, v_f800), v_d800); - - // bitmask = 0x0000 if there are no surrogates - // = 0xc000 if the last word is a surrogate - const uint16_t surrogates_bitmask = - static_cast(_mm_movemask_epi8(surrogates_bytemask)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (surrogates_bitmask == 0x0000) { - // case: no surrogate pair, extend 16-bit code units to 32-bit code units - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), - _mm_cvtepu16_epi32(in)); - _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output + 4), - _mm_cvtepu16_epi32(_mm_srli_si128(in, 8))); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = big_endian ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - big_endian ? scalar::u16_swap_bytes(buf[k + 1]) : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - utf32_output); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), utf32_output); -} -/* end file src/westmere/sse_convert_utf16_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/westmere/sse_convert_utf32_to_latin1.cpp */ -std::pair -sse_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - - __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00); - __m128i shufmask = - _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); - - for (size_t i = 0; i < rounded_len; i += 16) { - __m128i in1 = _mm_loadu_si128((__m128i *)buf); - __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4)); - __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8)); - __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12)); - - __m128i check_combined = _mm_or_si128(in1, in2); - check_combined = _mm_or_si128(check_combined, in3); - check_combined = _mm_or_si128(check_combined, in4); - - if (!_mm_testz_si128(check_combined, high_bytes_mask)) { - return std::make_pair(nullptr, latin1_output); - } - __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), - _mm_shuffle_epi8(in2, shufmask)); - __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), - _mm_shuffle_epi8(in4, shufmask)); - __m128i pack = _mm_unpacklo_epi64(pack1, pack2); - _mm_storeu_si128((__m128i *)latin1_output, pack); - latin1_output += 16; - buf += 16; - } - - return std::make_pair(buf, latin1_output); -} - -std::pair -sse_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *start = buf; - const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16 - - __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00); - __m128i shufmask = - _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0); - - for (size_t i = 0; i < rounded_len; i += 16) { - __m128i in1 = _mm_loadu_si128((__m128i *)buf); - __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4)); - __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8)); - __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12)); - - __m128i check_combined = _mm_or_si128(in1, in2); - check_combined = _mm_or_si128(check_combined, in3); - check_combined = _mm_or_si128(check_combined, in4); - - if (!_mm_testz_si128(check_combined, high_bytes_mask)) { - // Fallback to scalar code for handling errors - for (int k = 0; k < 16; k++) { - char32_t codepoint = buf[k]; - if (codepoint <= 0xff) { - *latin1_output++ = char(codepoint); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - buf += 16; - continue; - } - __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), - _mm_shuffle_epi8(in2, shufmask)); - __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), - _mm_shuffle_epi8(in4, shufmask)); - __m128i pack = _mm_unpacklo_epi64(pack1, pack2); - _mm_storeu_si128((__m128i *)latin1_output, pack); - latin1_output += 16; - buf += 16; - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/westmere/sse_convert_utf32_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/westmere/sse_convert_utf32_to_utf8.cpp */ -std::pair -sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) { - const char32_t *end = buf + len; - - const __m128i v_0000 = _mm_setzero_si128(); //__m128 = 128 bits - const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); // 1111 1000 0000 - // 0000 - const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); // 1100 0000 1000 - // 0000 - const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); // 1111 1111 1000 - // 0000 - const __m128i v_ffff0000 = _mm_set1_epi32( - (uint32_t)0xffff0000); // 1111 1111 1111 1111 0000 0000 0000 0000 - const __m128i v_7fffffff = _mm_set1_epi32( - (uint32_t)0x7fffffff); // 0111 1111 1111 1111 1111 1111 1111 1111 - __m128i running_max = _mm_setzero_si128(); - __m128i forbidden_bytemask = _mm_setzero_si128(); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= - std::ptrdiff_t( - 16 + safety_margin)) { // buf is a char32_t pointer, each char32_t - // has 4 bytes or 32 bits, thus buf + 16 * - // char_32t = 512 bits = 64 bytes - // We load two 16 bytes registers for a total of 32 bytes or 16 characters. - __m128i in = _mm_loadu_si128((__m128i *)buf); - __m128i nextin = _mm_loadu_si128( - (__m128i *)buf + 1); // These two values can hold only 8 UTF32 chars - running_max = _mm_max_epu32( - _mm_max_epu32(in, running_max), // take element-wise max char32_t from - // in and running_max vector - nextin); // and take element-wise max element from nextin and - // running_max vector - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m128i in_16 = _mm_packus_epi32( - _mm_and_si128(in, v_7fffffff), - _mm_and_si128( - nextin, - v_7fffffff)); // in this context pack the two __m128 into a single - // By ensuring the highest bit is set to 0(&v_7fffffff), we are making sure - // all values are interpreted as non-negative, or specifically, the values - // are within the range of valid Unicode code points. remember : having - // leading byte 0 means a positive number by the two complements system. - // Unicode is well beneath the range where you'll start getting issues so - // that's OK. - - // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp - - // Check for ASCII fast path - - // ASCII fast path!!!! - // We eagerly load another 32 bytes, hoping that they will be ASCII too. - // The intuition is that we try to collect 16 ASCII characters which - // requires a total of 64 bytes of input. If we fail, we just pass thirdin - // and fourthin as our new inputs. - if (_mm_testz_si128(in_16, v_ff80)) { // if the first two blocks are ASCII - __m128i thirdin = _mm_loadu_si128((__m128i *)buf + 2); - __m128i fourthin = _mm_loadu_si128((__m128i *)buf + 3); - running_max = _mm_max_epu32( - _mm_max_epu32(thirdin, running_max), - fourthin); // take the running max of all 4 vectors thus far - __m128i nextin_16 = _mm_packus_epi32( - _mm_and_si128(thirdin, v_7fffffff), - _mm_and_si128(fourthin, - v_7fffffff)); // pack into 1 vector, now you have two - if (!_mm_testz_si128( - nextin_16, - v_ff80)) { // checks if the second packed vector is ASCII, if not: - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16( - in_16, in_16); // creates two copy of in_16 in 1 vector - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, - utf8_packed); // put them into the output - // 3. adjust pointers - buf += 8; // the char32_t buffer pointer goes up 8 char32_t chars* 32 - // bits = 256 bits - utf8_output += - 8; // same with output, e.g. lift the first two blocks alone. - // Proceed with next input - in_16 = nextin_16; - // We need to update in and nextin because they are used later. - in = thirdin; - nextin = fourthin; - } else { - // 1. pack the bytes - const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - } - - // no bits set above 7th bit -- find out all the ASCII characters - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16( // this takes four bytes at a time and compares: - _mm_and_si128(in_16, v_ff80), // the vector that get only the first - // 9 bits of each 16-bit/2-byte units - v_0000 // - ); // they should be all zero if they are ASCII. E.g. ASCII in UTF32 is - // of format 0000 0000 0000 0XXX XXXX - // _mm_cmpeq_epi16 should now return a 1111 1111 1111 1111 for equals, and - // 0000 0000 0000 0000 if not for each 16-bit/2-byte units - const uint16_t one_byte_bitmask = static_cast(_mm_movemask_epi8( - one_byte_bytemask)); // collect the MSB from previous vector and put - // them into uint16_t mas - - // no bits set above 11th bit - const __m128i one_or_two_bytes_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000); - const uint16_t one_or_two_bytes_bitmask = - static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); - - if (one_or_two_bytes_bitmask == 0xffff) { - // case: all code units either produce 1 or 2 UTF-8 bytes (at least one - // produces 2 bytes) - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m128i v_1f00 = - _mm_set1_epi16((int16_t)0x1f00); // 0001 1111 0000 0000 - const __m128i v_003f = - _mm_set1_epi16((int16_t)0x003f); // 0000 0000 0011 1111 - - // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = _mm_slli_epi16(in_16, 2); // shift packed vector by two - // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = - _mm_and_si128(t0, v_1f00); // potentital first utf8 byte - // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = - _mm_and_si128(in_16, v_003f); // potential second utf8 byte - // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = - _mm_or_si128(t1, t2); // first and second potential utf8 byte together - // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = _mm_or_si128( - t3, - v_c080); // t3 | 1100 0000 1000 0000 = full potential 2-byte utf8 unit - - // 2. merge ASCII and 2-byte codewords - const __m128i utf8_unpacked = - _mm_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - - // MSB, a - LSB) - const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a - const uint16_t m1 = - static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 - const uint8_t m2 = - static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); - - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - - // Check for overflow in packing - - const __m128i saturation_bytemask = _mm_cmpeq_epi32( - _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm_movemask_epi8(saturation_bytemask)); - if (saturation_bitmask == 0xffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); - forbidden_bytemask = - _mm_or_si128(forbidden_bytemask, - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800)); - - const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m128i s0 = _mm_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); - const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m128i s4 = _mm_xor_si128(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m128i out0 = _mm_unpacklo_epi16(t2, s4); - const __m128i out1 = _mm_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. - const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, - 15, 13, -1, -1, -1, -1); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += 12; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - } - const uint8_t mask0 = uint8_t(mask); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - - buf += 8; - } else { - // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD in the - // presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, utf8_output); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(nullptr, utf8_output); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - // check for invalid input - const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); - if (static_cast(_mm_movemask_epi8(_mm_cmpeq_epi32( - _mm_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffff) { - return std::make_pair(nullptr, utf8_output); - } - - if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf8_output); - } - - return std::make_pair(buf, utf8_output); -} - -std::pair -sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_output) { - const char32_t *end = buf + len; - const char32_t *start = buf; - - const __m128i v_0000 = _mm_setzero_si128(); - const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); - const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); - const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); - const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); - const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff); - const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - // We load two 16 bytes registers for a total of 32 bytes or 8 characters. - __m128i in = _mm_loadu_si128((__m128i *)buf); - __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - // Check for too large input - __m128i max_input = _mm_max_epu32(_mm_max_epu32(in, nextin), v_10ffff); - if (static_cast(_mm_movemask_epi8( - _mm_cmpeq_epi32(max_input, v_10ffff))) != 0xffff) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - utf8_output); - } - - // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned - // saturation - __m128i in_16 = _mm_packus_epi32(_mm_and_si128(in, v_7fffffff), - _mm_and_si128(nextin, v_7fffffff)); - - // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp - - // Check for ASCII fast path - if (_mm_testz_si128(in_16, v_ff80)) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - const __m128i utf8_packed = _mm_packus_epi16(in_16, in_16); - // 2. store (16 bytes) - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - continue; - } - - // no bits set above 7th bit - const __m128i one_byte_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_ff80), v_0000); - const uint16_t one_byte_bitmask = - static_cast(_mm_movemask_epi8(one_byte_bytemask)); - - // no bits set above 11th bit - const __m128i one_or_two_bytes_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000); - const uint16_t one_or_two_bytes_bitmask = - static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask)); - - if (one_or_two_bytes_bitmask == 0xffff) { - // case: all code units either produce 1 or 2 UTF-8 bytes (at least one - // produces 2 bytes) - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); - const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); - - // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = _mm_slli_epi16(in_16, 2); - // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = _mm_and_si128(t0, v_1f00); - // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = _mm_and_si128(in_16, v_003f); - // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = _mm_or_si128(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = _mm_or_si128(t3, v_c080); - - // 2. merge ASCII and 2-byte codewords - const __m128i utf8_unpacked = - _mm_blendv_epi8(t4, in_16, one_byte_bytemask); - - // 3. prepare bitmask for 8-bit lookup - // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - - // MSB, a - LSB) - const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a - const uint16_t m1 = - static_cast(m0 >> 7); // m1 = 00000000h0g0f0e0 - const uint8_t m2 = - static_cast((m0 | m1) & 0xff); // m2 = hdgcfbea - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0]; - const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1)); - const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle); - - // 5. store bytes - _mm_storeu_si128((__m128i *)utf8_output, utf8_packed); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - - // Check for overflow in packing - const __m128i saturation_bytemask = _mm_cmpeq_epi32( - _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); - const uint32_t saturation_bitmask = - static_cast(_mm_movemask_epi8(saturation_bytemask)); - - if (saturation_bitmask == 0xffff) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - - // Check for illegal surrogate code units - const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); - const __m128i forbidden_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800); - if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - utf8_output); - } - - const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606, - 0x0808, 0x0a0a, 0x0c0c, 0x0e0e); - - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ -#define simdutf_vec(x) _mm_set1_epi16(static_cast(x)) - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000)); - - // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] - const __m128i s0 = _mm_srli_epi16(in_16, 4); - // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] - const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); - // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] - const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); - // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); - const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, - simdutf_vec(0b0100000000000000)); - const __m128i s4 = _mm_xor_si128(s3, m0); -#undef simdutf_vec - - // 4. expand code units 16-bit => 32-bit - const __m128i out0 = _mm_unpacklo_epi16(t2, s4); - const __m128i out1 = _mm_unpackhi_epi16(t2, s4); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - const uint16_t mask = - (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa); - if (mask == 0) { - // We only have three-byte code units. Use fast path. - const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14, - 15, 13, -1, -1, -1, -1); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle); - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += 12; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += 12; - buf += 8; - continue; - } - const uint8_t mask0 = uint8_t(mask); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1)); - const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0); - - const uint8_t mask1 = static_cast(mask >> 8); - - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1)); - const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1); - - _mm_storeu_si128((__m128i *)utf8_output, utf8_0); - utf8_output += row0[0]; - _mm_storeu_si128((__m128i *)utf8_output, utf8_1); - utf8_output += row1[0]; - - buf += 8; - } else { - // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem - // wasteful to use scalar code, but being efficient with SIMD in the - // presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf8_output); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf8_output); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); -} -/* end file src/westmere/sse_convert_utf32_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/westmere/sse_convert_utf32_to_utf16.cpp */ -struct expansion_result_t { - size_t u16count; - __m128i compressed; -}; - -// Function sse_expand_surrogate takes four **valid** UTF-32 characters -// having at least one code-point producing a surrogate pair. -template -expansion_result_t sse_expand_surrogate(const __m128i x) { - using vector_u32 = simd32; - using vector_u8 = simd8; - - const auto in = vector_u32(x); - - const auto non_surrogate_mask = (in & uint32_t(0xffff0000)) == uint32_t(0); - const auto mask = (~non_surrogate_mask.to_4bit_bitmask()) & 0xf; - - const auto t0 = in - uint32_t(0x00010000); - const auto hi = t0.shr<10>() & uint32_t(0x000003ff); - const auto lo = t0.shl<16>() & uint32_t(0x03ff0000); - const auto surrogates = (lo | hi) | uint32_t(0xdc00d800); - - const auto merged = as_vector_u8(select(non_surrogate_mask, in, surrogates)); - - const auto shuffle = vector_u8::load( - (byte_order == endianness::LITTLE) - ? tables::utf32_to_utf16::pack_utf32_to_utf16le[mask] - : tables::utf32_to_utf16::pack_utf32_to_utf16be[mask]); - - const size_t u16count = (4 + count_ones(mask)); - const auto compressed = shuffle.lookup_16(merged); - - return {u16count, compressed}; -} - -// Function `validate_utf32` checks 2 x 4 UTF-32 characters for their validity. -simdutf_really_inline bool validate_utf32(const __m128i a, const __m128i b) { - using vector_u32 = simd32; - - const auto in0 = vector_u32(a); - const auto in1 = vector_u32(b); - - const auto standardmax = vector_u32::splat(0x10ffff); - const auto offset = vector_u32::splat(0xffff2000); - const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); - - const auto too_large = max(in0, in1) > standardmax; - const auto surrogate0 = (in0 + offset) > standardoffsetmax; - const auto surrogate1 = (in1 + offset) > standardoffsetmax; - - const auto combined = too_large | surrogate0 | surrogate1; - return !combined.any(); -} - -template -std::pair -sse_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_output) { - - const char32_t *end = buf + len; - - const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); - __m128i forbidden_bytemask = _mm_setzero_si128(); - - while (end - buf >= 16 + 8) { - const __m128i *ptr = reinterpret_cast(buf); - const __m128i in0 = _mm_loadu_si128(ptr + 0); - const __m128i in1 = _mm_loadu_si128(ptr + 1); - const __m128i in2 = _mm_loadu_si128(ptr + 2); - const __m128i in3 = _mm_loadu_si128(ptr + 3); - - const __m128i combined = - _mm_or_si128(_mm_or_si128(in2, in3), _mm_or_si128(in0, in1)); - if (simdutf_likely(_mm_testz_si128(combined, v_ffff0000))) { - // No bits set above 16th, directly pack UTF-32 to UTF-16 - __m128i utf16_packed0 = _mm_packus_epi32(in0, in1); - __m128i utf16_packed1 = _mm_packus_epi32(in2, in3); - - const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); - forbidden_bytemask = _mm_or_si128( - forbidden_bytemask, - _mm_or_si128( - _mm_cmpeq_epi16(_mm_and_si128(utf16_packed0, v_f800), v_d800), - _mm_cmpeq_epi16(_mm_and_si128(utf16_packed1, v_f800), v_d800))); - - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed0 = _mm_shuffle_epi8(utf16_packed0, swap); - utf16_packed1 = _mm_shuffle_epi8(utf16_packed1, swap); - } - - _mm_storeu_si128((__m128i *)utf16_output + 0, utf16_packed0); - _mm_storeu_si128((__m128i *)utf16_output + 1, utf16_packed1); - utf16_output += 16; - buf += 16; - } else { - if (!validate_utf32(in0, in1) || !validate_utf32(in2, in3)) { - return std::make_pair(nullptr, utf16_output); - } - - const auto ret0 = sse_expand_surrogate(in0); - _mm_storeu_si128((__m128i *)utf16_output, ret0.compressed); - utf16_output += ret0.u16count; - - const auto ret1 = sse_expand_surrogate(in1); - _mm_storeu_si128((__m128i *)utf16_output, ret1.compressed); - utf16_output += ret1.u16count; - - const auto ret2 = sse_expand_surrogate(in2); - _mm_storeu_si128((__m128i *)utf16_output, ret2.compressed); - utf16_output += ret2.u16count; - - const auto ret3 = sse_expand_surrogate(in3); - _mm_storeu_si128((__m128i *)utf16_output, ret3.compressed); - utf16_output += ret3.u16count; - - buf += 16; - } - } - - // check for invalid input - if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(nullptr, utf16_output); - } - - return std::make_pair(buf, utf16_output); -} - -template -std::pair -sse_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_output) { - const char32_t *start = buf; - const char32_t *end = buf + len; - - const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000); - - while (end - buf >= 8) { - const __m128i in = _mm_loadu_si128((__m128i *)buf); - const __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1); - - const __m128i combined = _mm_or_si128(in, nextin); - if (simdutf_likely(_mm_testz_si128(combined, v_ffff0000))) { - // No bits set above 16th, directly pack UTF-32 to UTF-16 - __m128i utf16_packed = _mm_packus_epi32(in, nextin); - - const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); - const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); - const __m128i forbidden_bytemask = - _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800); - if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - utf16_output); - } - - if (big_endian) { - const __m128i swap = - _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); - utf16_packed = _mm_shuffle_epi8(utf16_packed, swap); - } - - _mm_storeu_si128((__m128i *)utf16_output, utf16_packed); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 7; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), utf16_output); - } - *utf16_output++ = - big_endian - ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), utf16_output); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (big_endian) { - high_surrogate = - uint16_t((high_surrogate >> 8) | (high_surrogate << 8)); - low_surrogate = - uint16_t((low_surrogate >> 8) | (low_surrogate << 8)); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); -} -/* end file src/westmere/sse_convert_utf32_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/westmere/sse_base64.cpp */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ - -// --- encoding ---------------------------------------------------- - -template __m128i lookup_pshufb_improved(const __m128i input) { - // credit: Wojciech Muła - // reduce 0..51 -> 0 - // 52..61 -> 1 .. 10 - // 62 -> 11 - // 63 -> 12 - __m128i result = _mm_subs_epu8(input, _mm_set1_epi8(51)); - - // distinguish between ranges 0..25 and 26..51: - // 0 .. 25 -> remains 0 - // 26 .. 51 -> becomes 13 - const __m128i less = _mm_cmpgt_epi8(_mm_set1_epi8(26), input); - result = _mm_or_si128(result, _mm_and_si128(less, _mm_set1_epi8(13))); - - __m128i shift_LUT; - if (base64_url) { - shift_LUT = _mm_setr_epi8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0); - } else { - shift_LUT = _mm_setr_epi8('a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, - '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0); - } - - // read shift - result = _mm_shuffle_epi8(shift_LUT, result); - - return _mm_add_epi8(result, input); -} - -template -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // credit: Wojciech Muła - // SSE (lookup: pshufb improved unrolled) - const uint8_t *input = (const uint8_t *)src; - - uint8_t *out = (uint8_t *)dst; - const __m128i shuf = - _mm_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); - - size_t i = 0; - for (; i + 52 <= srclen; i += 48) { - __m128i in0 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 0)); - __m128i in1 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 1)); - __m128i in2 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 2)); - __m128i in3 = _mm_loadu_si128( - reinterpret_cast(input + i + 4 * 3 * 3)); - - in0 = _mm_shuffle_epi8(in0, shuf); - in1 = _mm_shuffle_epi8(in1, shuf); - in2 = _mm_shuffle_epi8(in2, shuf); - in3 = _mm_shuffle_epi8(in3, shuf); - - const __m128i t0_0 = _mm_and_si128(in0, _mm_set1_epi32(0x0fc0fc00)); - const __m128i t0_1 = _mm_and_si128(in1, _mm_set1_epi32(0x0fc0fc00)); - const __m128i t0_2 = _mm_and_si128(in2, _mm_set1_epi32(0x0fc0fc00)); - const __m128i t0_3 = _mm_and_si128(in3, _mm_set1_epi32(0x0fc0fc00)); - - const __m128i t1_0 = _mm_mulhi_epu16(t0_0, _mm_set1_epi32(0x04000040)); - const __m128i t1_1 = _mm_mulhi_epu16(t0_1, _mm_set1_epi32(0x04000040)); - const __m128i t1_2 = _mm_mulhi_epu16(t0_2, _mm_set1_epi32(0x04000040)); - const __m128i t1_3 = _mm_mulhi_epu16(t0_3, _mm_set1_epi32(0x04000040)); - - const __m128i t2_0 = _mm_and_si128(in0, _mm_set1_epi32(0x003f03f0)); - const __m128i t2_1 = _mm_and_si128(in1, _mm_set1_epi32(0x003f03f0)); - const __m128i t2_2 = _mm_and_si128(in2, _mm_set1_epi32(0x003f03f0)); - const __m128i t2_3 = _mm_and_si128(in3, _mm_set1_epi32(0x003f03f0)); - - const __m128i t3_0 = _mm_mullo_epi16(t2_0, _mm_set1_epi32(0x01000010)); - const __m128i t3_1 = _mm_mullo_epi16(t2_1, _mm_set1_epi32(0x01000010)); - const __m128i t3_2 = _mm_mullo_epi16(t2_2, _mm_set1_epi32(0x01000010)); - const __m128i t3_3 = _mm_mullo_epi16(t2_3, _mm_set1_epi32(0x01000010)); - - const __m128i input0 = _mm_or_si128(t1_0, t3_0); - const __m128i input1 = _mm_or_si128(t1_1, t3_1); - const __m128i input2 = _mm_or_si128(t1_2, t3_2); - const __m128i input3 = _mm_or_si128(t1_3, t3_3); - - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(input0)); - out += 16; - - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(input1)); - out += 16; - - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(input2)); - out += 16; - - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(input3)); - out += 16; - } - for (; i + 16 <= srclen; i += 12) { - - __m128i in = _mm_loadu_si128(reinterpret_cast(input + i)); - - // bytes from groups A, B and C are needed in separate 32-bit lanes - // in = [DDDD|CCCC|BBBB|AAAA] - // - // an input triplet has layout - // [????????|ccdddddd|bbbbcccc|aaaaaabb] - // byte 3 byte 2 byte 1 byte 0 -- byte 3 comes from the next - // triplet - // - // shuffling changes the order of bytes: 1, 0, 2, 1 - // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] - // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ - // processed bits - in = _mm_shuffle_epi8(in, shuf); - - // unpacking - - // t0 = [0000cccc|cc000000|aaaaaa00|00000000] - const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); - // t1 = [00000000|00cccccc|00000000|00aaaaaa] - // (c * (1 << 10), a * (1 << 6)) >> 16 (note: an unsigned - // multiplication) - const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); - - // t2 = [00000000|00dddddd|000000bb|bbbb0000] - const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); - // t3 = [00dddddd|00000000|00bbbbbb|00000000]( - // (d * (1 << 8), b * (1 << 4)) - const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); - - // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 - const __m128i indices = _mm_or_si128(t1, t3); - - _mm_storeu_si128(reinterpret_cast<__m128i *>(out), - lookup_pshufb_improved(indices)); - out += 16; - } - - return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, - srclen - i, options); -} - -// --- decoding ----------------------------------------------- - -static simdutf_really_inline void compress(__m128i data, uint16_t mask, - char *output) { - if (mask == 0) { - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data); - return; - } - - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - - __m128i shufmask = _mm_set_epi64x(tables::base64::thintable_epi8[mask2], - tables::base64::thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(data, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = tables::base64::BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = _mm_loadu_si128(reinterpret_cast( - tables::base64::pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); -} - -static simdutf_really_inline void base64_decode(char *out, __m128i str) { - // credit: aqrit - - const __m128i pack_shuffle = - _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); - - const __m128i t0 = _mm_maddubs_epi16(str, _mm_set1_epi32(0x01400140)); - const __m128i t1 = _mm_madd_epi16(t0, _mm_set1_epi32(0x00011000)); - const __m128i t2 = _mm_shuffle_epi8(t1, pack_shuffle); - // Store the output: - // this writes 16 bytes, but we only need 12. - _mm_storeu_si128((__m128i *)out, t2); -} - -// decode 64 bytes and output 48 bytes -static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); - base64_decode(out + 12, - _mm_loadu_si128(reinterpret_cast(src + 16))); - base64_decode(out + 24, - _mm_loadu_si128(reinterpret_cast(src + 32))); - base64_decode(out + 36, - _mm_loadu_si128(reinterpret_cast(src + 48))); -} - -static inline void base64_decode_block_safe(char *out, const char *src) { - base64_decode(out, _mm_loadu_si128(reinterpret_cast(src))); - base64_decode(out + 12, - _mm_loadu_si128(reinterpret_cast(src + 16))); - base64_decode(out + 24, - _mm_loadu_si128(reinterpret_cast(src + 32))); - char buffer[16]; - base64_decode(buffer, - _mm_loadu_si128(reinterpret_cast(src + 48))); - std::memcpy(out + 36, buffer, 12); -} - -// --- decoding - base64 class -------------------------------- - -class block64 { - __m128i chunks[4]; - -public: - // The caller of this function is responsible to ensure that there are 64 - // bytes available from reading at src. - simdutf_really_inline block64(const char *src) { - chunks[0] = _mm_loadu_si128(reinterpret_cast(src)); - chunks[1] = _mm_loadu_si128(reinterpret_cast(src + 16)); - chunks[2] = _mm_loadu_si128(reinterpret_cast(src + 32)); - chunks[3] = _mm_loadu_si128(reinterpret_cast(src + 48)); - } - -public: - // The caller of this function is responsible to ensure that there are 128 - // bytes available from reading at src. The data is read into a block64 - // structure. - simdutf_really_inline block64(const char16_t *src) { - const auto m1 = _mm_loadu_si128(reinterpret_cast(src)); - const auto m2 = _mm_loadu_si128(reinterpret_cast(src + 8)); - const auto m3 = - _mm_loadu_si128(reinterpret_cast(src + 16)); - const auto m4 = - _mm_loadu_si128(reinterpret_cast(src + 24)); - const auto m5 = - _mm_loadu_si128(reinterpret_cast(src + 32)); - const auto m6 = - _mm_loadu_si128(reinterpret_cast(src + 40)); - const auto m7 = - _mm_loadu_si128(reinterpret_cast(src + 48)); - const auto m8 = - _mm_loadu_si128(reinterpret_cast(src + 56)); - chunks[0] = _mm_packus_epi16(m1, m2); - chunks[1] = _mm_packus_epi16(m3, m4); - chunks[2] = _mm_packus_epi16(m5, m6); - chunks[3] = _mm_packus_epi16(m7, m8); - } - -public: - simdutf_really_inline void copy_block(char *output) { - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), chunks[0]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 16), chunks[1]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 32), chunks[2]); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 48), chunks[3]); - } - -public: - simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) { - if (is_power_of_two(mask)) { - return compress_block_single(mask, output); - } - - uint64_t nmask = ~mask; - compress(chunks[0], uint16_t(mask), output); - compress(chunks[1], uint16_t(mask >> 16), - output + count_ones(nmask & 0xFFFF)); - compress(chunks[2], uint16_t(mask >> 32), - output + count_ones(nmask & 0xFFFFFFFF)); - compress(chunks[3], uint16_t(mask >> 48), - output + count_ones(nmask & 0xFFFFFFFFFFFFULL)); - return count_ones(nmask); - } - -private: - simdutf_really_inline size_t compress_block_single(uint64_t mask, - char *output) { - const size_t pos64 = trailing_zeroes(mask); - const int8_t pos = pos64 & 0xf; - switch (pos64 >> 4) { - case 0b00: { - const __m128i v0 = _mm_set1_epi8(char(pos - 1)); - const __m128i v1 = - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const __m128i v2 = _mm_cmpgt_epi8(v1, v0); - const __m128i sh = _mm_sub_epi8(v1, v2); - const __m128i compressed = _mm_shuffle_epi8(chunks[0], sh); - - _mm_storeu_si128((__m128i *)(output + 0 * 16), compressed); - _mm_storeu_si128((__m128i *)(output + 1 * 16 - 1), chunks[1]); - _mm_storeu_si128((__m128i *)(output + 2 * 16 - 1), chunks[2]); - _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); - } break; - case 0b01: { - _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); - - const __m128i v0 = _mm_set1_epi8(char(pos - 1)); - const __m128i v1 = - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const __m128i v2 = _mm_cmpgt_epi8(v1, v0); - const __m128i sh = _mm_sub_epi8(v1, v2); - const __m128i compressed = _mm_shuffle_epi8(chunks[1], sh); - - _mm_storeu_si128((__m128i *)(output + 1 * 16), compressed); - _mm_storeu_si128((__m128i *)(output + 2 * 16 - 1), chunks[2]); - _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); - } break; - case 0b10: { - _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); - _mm_storeu_si128((__m128i *)(output + 1 * 16), chunks[1]); - - const __m128i v0 = _mm_set1_epi8(char(pos - 1)); - const __m128i v1 = - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const __m128i v2 = _mm_cmpgt_epi8(v1, v0); - const __m128i sh = _mm_sub_epi8(v1, v2); - const __m128i compressed = _mm_shuffle_epi8(chunks[2], sh); - - _mm_storeu_si128((__m128i *)(output + 2 * 16), compressed); - _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), chunks[3]); - } break; - case 0b11: { - _mm_storeu_si128((__m128i *)(output + 0 * 16), chunks[0]); - _mm_storeu_si128((__m128i *)(output + 1 * 16), chunks[1]); - _mm_storeu_si128((__m128i *)(output + 2 * 16), chunks[2]); - - const __m128i v0 = _mm_set1_epi8(char(pos - 1)); - const __m128i v1 = - _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const __m128i v2 = _mm_cmpgt_epi8(v1, v0); - const __m128i sh = _mm_sub_epi8(v1, v2); - const __m128i compressed = _mm_shuffle_epi8(chunks[3], sh); - - _mm_storeu_si128((__m128i *)(output + 3 * 16), compressed); - } break; - } - - return 63; - } - -public: - template - simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) { - uint32_t err0 = 0; - uint32_t err1 = 0; - uint32_t err2 = 0; - uint32_t err3 = 0; - uint64_t m0 = to_base64_mask(&chunks[0], &err0); - uint64_t m1 = to_base64_mask(&chunks[1], &err1); - uint64_t m2 = to_base64_mask(&chunks[2], &err2); - uint64_t m3 = to_base64_mask(&chunks[3], &err3); - if (!ignore_garbage) { - *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | - ((uint64_t)err3 << 48); - } - return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); - } - -private: - template - simdutf_really_inline uint16_t to_base64_mask(__m128i *src, uint32_t *error) { - const __m128i ascii_space_tbl = - _mm_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, - 0x0, 0xc, 0xd, 0x0, 0x0); - // credit: aqrit - __m128i delta_asso; - if (base64_url) { - delta_asso = _mm_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, - 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF); - } else { - - delta_asso = - _mm_setr_epi8(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F); - } - __m128i delta_values; - if (base64_url) { - delta_values = _mm_setr_epi8(0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), - uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), - 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), - uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9)); - } else { - delta_values = - _mm_setr_epi8(int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)); - } - __m128i check_asso; - if (base64_url) { - check_asso = _mm_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6); - } else { - check_asso = - _mm_setr_epi8(0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F); - } - __m128i check_values; - if (base64_url) { - check_values = _mm_setr_epi8(uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), - uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), - uint8_t(0xB6), uint8_t(0xA6), uint8_t(0xB5), - uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, - uint8_t(0x80), 0x0, uint8_t(0x80)); - } else { - check_values = - _mm_setr_epi8(int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), - int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)); - } - const __m128i shifted = _mm_srli_epi32(*src, 3); - - const __m128i delta_hash = - _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, *src), shifted); - const __m128i check_hash = - _mm_avg_epu8(_mm_shuffle_epi8(check_asso, *src), shifted); - - const __m128i out = - _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), *src); - const __m128i chk = - _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), *src); - const int mask = _mm_movemask_epi8(chk); - if (!ignore_garbage && mask) { - __m128i ascii_space = - _mm_cmpeq_epi8(_mm_shuffle_epi8(ascii_space_tbl, *src), *src); - *error = (mask ^ _mm_movemask_epi8(ascii_space)); - } - *src = out; - return (uint16_t)mask; - } - -public: - simdutf_really_inline void base64_decode_block(char *out) { - base64_decode(out, chunks[0]); - base64_decode(out + 12, chunks[1]); - base64_decode(out + 24, chunks[2]); - base64_decode(out + 36, chunks[3]); - } - -public: - simdutf_really_inline void base64_decode_block_safe(char *out) { - base64_decode(out, chunks[0]); - base64_decode(out + 12, chunks[1]); - base64_decode(out + 24, chunks[2]); - char buffer[16]; - base64_decode(buffer, chunks[3]); - std::memcpy(out + 36, buffer, 12); - } -}; -/* end file src/westmere/sse_base64.cpp */ -#endif // SIMDUTF_FEATURE_BASE64 - -} // unnamed namespace -} // namespace westmere -} // namespace simdutf - -/* begin file src/generic/buf_block_reader.h */ -namespace simdutf { -namespace westmere { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with -// spaces -template struct buf_block_reader { -public: - simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdutf_really_inline size_t block_index(); - simdutf_really_inline bool has_full_block() const; - simdutf_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 - * (in which case this function fills the buffer with spaces and returns 0. In - * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder - * block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdutf_really_inline size_t get_remainder(uint8_t *dst) const; - simdutf_really_inline void advance(); - -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text_64(const uint8_t *text) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text(const simd8x64 &in) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - in.store(reinterpret_cast(buf)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - if (buf[i] < ' ') { - buf[i] = '_'; - } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdutf_unused static char *format_mask(uint64_t mask) { - static char *buf = reinterpret_cast(malloc(64 + 1)); - for (size_t i = 0; i < 64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdutf_really_inline -buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) - : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, - idx{0} {} - -template -simdutf_really_inline size_t buf_block_reader::block_index() { - return idx; -} - -template -simdutf_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdutf_really_inline const uint8_t * -buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdutf_really_inline size_t -buf_block_reader::get_remainder(uint8_t *dst) const { - if (len == idx) { - return 0; - } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, - STEP_SIZE); // std::memset STEP_SIZE because it is more efficient - // to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdutf_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/buf_block_reader.h */ -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_validation { - -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = {255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 0b11110000u - 1, - 0b11100000u - 1, - 0b11000000u - 1}; - const simd8 max_value( - &max_array[sizeof(max_array) - sizeof(simd8)]); - return input.gt_bits(max_value); -} - -struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error |= this->prev_incomplete; - } - - simdutf_really_inline void check_next_input(const simd8x64 &input) { - if (simdutf_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = - is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; - } - } - - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -/* begin file src/generic/utf8_validation/utf8_validator.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_validation { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return !c.errors(); -} - -bool generic_validate_utf8(const char *input, size_t length) { - return generic_validate_utf8( - reinterpret_cast(input), length); -} - -/** - * Validates that the string is actual UTF-8 and stops on errors. - */ -template -result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input + count), length - count); - res.count += count; - return res; - } - reader.advance(); - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input) + count, length - count); - res.count += count; - return res; - } else { - return result(error_code::SUCCESS, length); - } -} - -result generic_validate_utf8_with_errors(const char *input, size_t length) { - return generic_validate_utf8_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_ASCII -/* begin file src/generic/ascii_validation.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace ascii_validation { - -bool generic_validate_ascii(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} - -result generic_validate_ascii_with_errors(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } - reader.advance(); - - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} - -} // namespace ascii_validation -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/ascii_validation.h */ -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - // transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_to_utf16 { - -using namespace simd; - -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_to_utf16 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - template - simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); - if (howmany == 0) { - return 0; - } - utf16_output += howmany; - } - return utf16_output - start; - } - - template - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf16_output += res.count; - } - } - return result(error_code::SUCCESS, utf16_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -/* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 2; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - - size_t iterations = 0; - size_t pos = 0; - size_t count = 0; - for (; pos + N <= size; pos += N) { - const auto input = - vector_i8::load(reinterpret_cast(in + pos)); - - const auto continuation = input > int8_t(-65); - const auto utf_4bytes = vector_u8(input.value) >= uint8_t(240); - - local -= vector_u8(continuation); - local -= vector_u8(utf_4bytes); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_to_utf32 { - -using namespace simd; - -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { - size_t pos = 0; - char32_t *start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - size_t max_starting_point = (pos + 64) - 12; - while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - } - } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_to_utf32 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // we have an error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); - if (howmany == 0) { - return 0; - } - utf32_output += howmany; - } - return utf32_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf32_output += res.count; - } - } - return result(error_code::SUCCESS, utf32_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -/* begin file src/generic/utf32.h */ -#include - -namespace simdutf { -namespace westmere { -namespace { -namespace utf32 { - -template T min(T a, T b) { return a <= b ? a : b; } - -simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, - size_t length) { - using vector_u32 = simd32; - - const char32_t *start = input; - - // we add up to three ones in a single iteration (see the vectorized loop in - // section #2 below) - const size_t max_increment = 3; - - const size_t N = vector_u32::ELEMENTS; - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - const auto v_0000007f = vector_u32::splat(0x0000007f); - const auto v_000007ff = vector_u32::splat(0x000007ff); - const auto v_0000ffff = vector_u32::splat(0x0000ffff); -#else - const auto v_ffffff80 = vector_u32::splat(0xffffff80); - const auto v_fffff800 = vector_u32::splat(0xfffff800); - const auto v_ffff0000 = vector_u32::splat(0xffff0000); - const auto one = vector_u32::splat(1); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - size_t counter = 0; - - // 1. vectorized loop unrolled 4 times - { - // we use vector of uint32 counters, this is why this limit is used - const size_t max_iterations = - std::numeric_limits::max() / (max_increment * 4); - size_t blocks = length / (N * 4); - length -= blocks * (N * 4); - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - simd32 acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in0 = vector_u32(input + 0 * N); - const auto in1 = vector_u32(input + 1 * N); - const auto in2 = vector_u32(input + 2 * N); - const auto in3 = vector_u32(input + 3 * N); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in0 > v_0000007f); - acc -= as_vector_u32(in1 > v_0000007f); - acc -= as_vector_u32(in2 > v_0000007f); - acc -= as_vector_u32(in3 > v_0000007f); - - acc -= as_vector_u32(in0 > v_000007ff); - acc -= as_vector_u32(in1 > v_000007ff); - acc -= as_vector_u32(in2 > v_000007ff); - acc -= as_vector_u32(in3 > v_000007ff); - - acc -= as_vector_u32(in0 > v_0000ffff); - acc -= as_vector_u32(in1 > v_0000ffff); - acc -= as_vector_u32(in2 > v_0000ffff); - acc -= as_vector_u32(in3 > v_0000ffff); -#else - acc += min(one, in0 & v_ffffff80); - acc += min(one, in1 & v_ffffff80); - acc += min(one, in2 & v_ffffff80); - acc += min(one, in3 & v_ffffff80); - - acc += min(one, in0 & v_fffff800); - acc += min(one, in1 & v_fffff800); - acc += min(one, in2 & v_fffff800); - acc += min(one, in3 & v_fffff800); - - acc += min(one, in0 & v_ffff0000); - acc += min(one, in1 & v_ffff0000); - acc += min(one, in2 & v_ffff0000); - acc += min(one, in3 & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += 4 * N; - } - - counter += acc.sum(); - } - } - - // 2. vectorized loop for tail - { - const size_t max_iterations = - std::numeric_limits::max() / max_increment; - size_t blocks = length / N; - length -= blocks * N; - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - auto acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in = vector_u32(input); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in > v_0000007f); - acc -= as_vector_u32(in > v_000007ff); - acc -= as_vector_u32(in > v_0000ffff); -#else - acc += min(one, in & v_ffffff80); - acc += min(one, in & v_fffff800); - acc += min(one, in & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += N; - } - - counter += acc.sum(); - } - } - - const size_t consumed = input - start; - if (consumed != 0) { - // We don't count 0th bytes in the vectorized loops above, this - // is why we need to count them in the end. - counter += consumed; - } - - return counter + scalar::utf32::utf8_length_from_utf32(input, length); -} - -} // namespace utf32 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/generic/utf8.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); - } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} - -#ifdef SIMDUTF_SIMD_HAS_BYTEMASK -simdutf_really_inline size_t count_code_points_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 4; - - size_t pos = 0; - size_t count = 0; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - size_t iterations = 0; - for (; pos + 4 * N <= size; pos += 4 * N) { - const auto input0 = - simd8::load(reinterpret_cast(in + pos + 0 * N)); - const auto input1 = - simd8::load(reinterpret_cast(in + pos + 1 * N)); - const auto input2 = - simd8::load(reinterpret_cast(in + pos + 2 * N)); - const auto input3 = - simd8::load(reinterpret_cast(in + pos + 3 * N)); - const auto mask0 = input0 > int8_t(-65); - const auto mask1 = input1 > int8_t(-65); - const auto mask2 = input2 > int8_t(-65); - const auto mask3 = input3 > int8_t(-65); - - local -= vector_u8(mask0); - local -= vector_u8(mask1); - local -= vector_u8(mask2); - local -= vector_u8(mask3); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} -#endif // SIMDUTF_SIMD_HAS_BYTEMASK - -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/generic/utf16.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf16 { - -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF); - count += count_ones(not_pair) / 2; - } - return count + - scalar::utf16::count_code_points(in + pos, size - pos); -} - -template -simdutf_really_inline size_t utf8_length_from_utf16(const char16_t *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos < size / 32 * 32; pos += 32) { - simd16x32 input(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input.swap_bytes(); - } - uint64_t ascii_mask = input.lteq(0x7F); - uint64_t twobyte_mask = input.lteq(0x7FF); - uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF); - - size_t ascii_count = count_ones(ascii_mask) / 2; - size_t twobyte_count = count_ones(twobyte_mask & ~ascii_mask) / 2; - size_t threebyte_count = count_ones(not_pair_mask & ~twobyte_mask) / 2; - size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2; - count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + - ascii_count; - } - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} - -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; - - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; - } - - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf16.h */ -/* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf16 { - -using namespace simd; - -template -simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, - size_t size) { - size_t pos = 0; - - using vector_u16 = simd16; - constexpr size_t N = vector_u16::ELEMENTS; - - const auto one = vector_u16::splat(1); - - auto v_count = vector_u16::zero(); - - // each char16 yields at least one byte - size_t count = size / N * N; - - // in a single iteration the increment is 0, 1 or 2, despite we have - // three additions - constexpr size_t max_iterations = 65535 / 2; - size_t iteration = max_iterations; - - for (; pos < size / N * N; pos += N) { - auto input = vector_u16::load(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input = input.swap_bytes(); - } - // 0xd800 .. 0xdbff - low surrogate - // 0xdc00 .. 0xdfff - high surrogate - const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); - - // c0 - chars that yield 2- or 3-byte UTF-8 codes - const auto c0 = min(input & uint16_t(0xff80), one); - - // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) - const auto c1 = min(input & uint16_t(0xf800), one); - - /* - Explanation how the counting works. - - In the case of a non-surrogate character we count: - * always 1 -- see how `count` is initialized above; - * c0 = 1 if the current char yields 2 or 3 bytes; - * c1 = 1 if the current char yields 3 bytes. - - Thus, we always have correct count for the current char: - from 1, 2 or 3 bytes. - - A trickier part is how we count surrogate pairs. Whether - we encounter a surrogate (low or high), we count it as - 3 chars and then minus 1 (`is_surrogate` is -1 or 0). - Each surrogate char yields 2. A surrogate pair, that - is a low surrogate followed by a high one, yields - the expected 4 bytes. - - It also correctly handles cases when low surrogate is - processed by the this loop, but high surrogate is counted - by the scalar procedure. The scalar procedure uses exactly - the described approach, thanks to that for valid UTF-16 - strings it always count correctly. - */ - v_count += c0; - v_count += c1; - v_count += vector_u16(is_surrogate); - - iteration -= 1; - if (iteration == 0) { - count += v_count.sum(); - v_count = vector_u16::zero(); - iteration = max_iterations; - } - } - - if (iteration > 0) { - count += v_count.sum(); - } - - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ -#endif // SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/validate_utf16.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf16 { -/* - UTF-16 validation - -------------------------------------------------- - - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We are going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ -template -const result validate_utf16_with_errors(const char16_t *input, size_t size) { - if (simdutf_unlikely(size == 0)) { - return result(error_code::SUCCESS, 0); - } - - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 - // and yields a single vector having only higher bytes of characters. - const auto in = utf16_gather_high_bytes(in0, in1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/validate_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // For UTF-8 to Latin 1, we can allow any ASCII character, and any - // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or - // 0b11000010 and nothing else. - // - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - constexpr const uint8_t FORBIDDEN = 0xff; - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - FORBIDDEN, - // 1110____ ________ - FORBIDDEN, - // 1111____ ________ - FORBIDDEN); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - FORBIDDEN, - // ____0101 ________ - FORBIDDEN, - // ____011_ ________ - FORBIDDEN, FORBIDDEN, - - // ____1___ ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, - // ____1101 ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 16; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); - if (howmany == 0) { - return 0; - } - latin1_output += howmany; - } - return latin1_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - latin1_output += res.count; - } - } - return result(error_code::SUCCESS, latin1_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline size_t convert_valid(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last - // 16 bytes, and if the data is valid, then it is entirely safe because 16 - // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally - // assume that you have valid UTF-8 input, so we are going to go back from the - // end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (pos < size) { - size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, - latin1_output); - latin1_output += howmany; - } - return latin1_output - start; -} - -} // namespace utf8_to_latin1 -} // namespace -} // namespace westmere -} // namespace simdutf - // namespace simdutf -/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/validate_utf32.h */ -namespace simdutf { -namespace westmere { -namespace { -namespace utf32 { - -simdutf_really_inline bool validate(const char32_t *input, size_t size) { - if (simdutf_unlikely(size == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return true; - } - - const char32_t *end = input + size; - - using vector_u32 = simd32; - - const auto standardmax = vector_u32::splat(0x10ffff); - const auto offset = vector_u32::splat(0xffff2000); - const auto standardoffsetmax = vector_u32::splat(0xfffff7ff); - auto currentmax = vector_u32::zero(); - auto currentoffsetmax = vector_u32::zero(); - - constexpr size_t N = vector_u32::ELEMENTS; - - while (input + N < end) { - auto in = vector_u32(input); - if (!match_system(endianness::BIG)) { - in.swap_bytes(); - } - - currentmax = max(currentmax, in); - currentoffsetmax = max(currentoffsetmax, in + offset); - input += N; - } - - const auto too_large = currentmax > standardmax; - if (too_large.any()) { - return false; - } - - const auto surrogate = currentoffsetmax > standardoffsetmax; - if (surrogate.any()) { - return false; - } - - return scalar::utf32::validate(input, end - input); -} - -simdutf_really_inline result validate_with_errors(const char32_t *input, - size_t size) { - if (simdutf_unlikely(size == 0)) { - // empty input is valid UTF-32. protect the implementation from - // handling nullptr - return result(error_code::SUCCESS, 0); - } - - const char32_t *start = input; - const char32_t *end = input + size; - - using vector_u32 = simd32; - - const auto standardmax = vector_u32::splat(0x10ffff + 1); - const auto surrogate_mask = vector_u32::splat(0xfffff800); - const auto surrogate_byte = vector_u32::splat(0x0000d800); - - constexpr size_t N = vector_u32::ELEMENTS; - - while (input + N < end) { - auto in = vector_u32(input); - if (!match_system(endianness::BIG)) { - in.swap_bytes(); - } - - const auto too_large = in >= standardmax; - const auto surrogate = (in & surrogate_mask) == surrogate_byte; - - const auto combined = too_large | surrogate; - if (simdutf_unlikely(combined.any())) { - const size_t consumed = input - start; - auto sr = scalar::utf32::validate_with_errors(input, end - input); - sr.count += consumed; - - return sr; - } - - input += N; - } - - const size_t consumed = input - start; - auto sr = scalar::utf32::validate_with_errors(input, end - input); - sr.count += consumed; - - return sr; -} - -} // namespace utf32 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/validate_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/generic/base64.h */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ -namespace simdutf { -namespace westmere { -namespace { -namespace base64 { - -/* - The following template function implements API for Base64 decoding. - - An implementation is responsible for providing the `block64` type and - associated methods that perform actual conversion. Please refer - to any vectorized implementation to learn the API of these procedures. -*/ -template -full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (!ignore_garbage && srclen > 0 && - scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - char *end_of_safe_64byte_zone = - (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; - - const chartype *const srcinit = src; - const char *const dstinit = dst; - const chartype *const srcend = src + srclen; - - constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block_size must be at least two"); - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b(src); - src += 64; - uint64_t error = 0; - const uint64_t badcharmask = - b.to_base64_mask(&error); - if (!ignore_garbage && error) { - src -= 64; - const size_t error_offset = trailing_zeroes(error); - return {error_code::INVALID_BASE64_CHARACTER, - size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; - } - if (badcharmask != 0) { - bufferptr += b.compress_block(badcharmask, bufferptr); - } else if (bufferptr != buffer) { - b.copy_block(bufferptr); - bufferptr += 64; - } else { - if (dst >= end_of_safe_64byte_zone) { - b.base64_decode_block_safe(dst); - } else { - b.base64_decode_block(dst); - } - dst += 48; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 2); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); - } else { - base64_decode_block(dst, buffer + (block_size - 2) * 64); - } - dst += 48; - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } - } - - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { - - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if (!ignore_garbage && - (!scalar::base64::is_eight_byte(*src) || val > 64)) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } - } - - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer_start); - } else { - base64_decode_block(dst, buffer_start); - } - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; -#if !SIMDUTF_IS_BIG_ENDIAN - triple = scalar::u32_swap_bytes(triple); -#endif - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; -#if !SIMDUTF_IS_BIG_ENDIAN - triple = scalar::u32_swap_bytes(triple); -#endif - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - if (!ignore_garbage) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - } else { - while (to_base64[uint8_t(*(src - 1))] >= 64) { - src--; - } - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - r.input_count += size_t(src - srcinit); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - return r; - } else { - r.output_count += size_t(dst - dstinit); - } - if (!ignore_garbage && last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } - } - return r; - } - if (!ignore_garbage && equalsigns > 0) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } - } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} - -} // namespace base64 -} // unnamed namespace -} // namespace westmere -} // namespace simdutf -/* end file src/generic/base64.h */ -#endif // SIMDUTF_FEATURE_BASE64 - -// -// Implementation-specific overrides -// - -namespace simdutf { -namespace westmere { - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - - int out = 0; - uint32_t utf16_err = (length % 2); - uint32_t utf32_err = (length % 4); - uint32_t ends_with_high = 0; - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - const __m128i standardmax = _mm_set1_epi32(0x10ffff); - const __m128i offset = _mm_set1_epi32(0xffff2000); - const __m128i standardoffsetmax = _mm_set1_epi32(0xfffff7ff); - __m128i currentmax = _mm_setzero_si128(); - __m128i currentoffsetmax = _mm_setzero_si128(); - - utf8_checker c{}; - buf_block_reader<64> reader(reinterpret_cast(input), length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - // utf8 checks - c.check_next_input(in); - - // utf16le checks - auto in0 = simd16(in.chunks[0]); - auto in1 = simd16(in.chunks[1]); - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - const auto packed1 = simd16::pack(t0, t1); - auto in2 = simd16(in.chunks[2]); - auto in3 = simd16(in.chunks[3]); - const auto t2 = in2.shr<8>(); - const auto t3 = in3.shr<8>(); - const auto packed2 = simd16::pack(t2, t3); - - const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8; - const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8; - const uint32_t surrogates_bitmask = - (surrogates_wordmask_hi.to_bitmask() << 16) | - surrogates_wordmask_lo.to_bitmask(); - const auto vL_lo = (packed1 & v_fc) == v_dc; - const auto vL_hi = (packed2 & v_fc) == v_dc; - const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask(); - const uint32_t H = L ^ surrogates_bitmask; - utf16_err |= (((H << 1) | ends_with_high) != L); - ends_with_high = (H & 0x80000000) != 0; - - // utf32le checks - currentmax = _mm_max_epu32(in.chunks[0], currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax); - currentmax = _mm_max_epu32(in.chunks[1], currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax); - currentmax = _mm_max_epu32(in.chunks[2], currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax); - currentmax = _mm_max_epu32(in.chunks[3], currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax); - - reader.advance(); - } - - uint8_t block[64]{}; - size_t idx = reader.block_index(); - std::memcpy(block, &input[idx], length - idx); - simd::simd8x64 in(block); - c.check_next_input(in); - - // utf16le last block check - auto in0 = simd16(in.chunks[0]); - auto in1 = simd16(in.chunks[1]); - const auto t0 = in0.shr<8>(); - const auto t1 = in1.shr<8>(); - const auto packed1 = simd16::pack(t0, t1); - auto in2 = simd16(in.chunks[2]); - auto in3 = simd16(in.chunks[3]); - const auto t2 = in2.shr<8>(); - const auto t3 = in3.shr<8>(); - const auto packed2 = simd16::pack(t2, t3); - - const auto surrogates_wordmask_lo = (packed1 & v_f8) == v_d8; - const auto surrogates_wordmask_hi = (packed2 & v_f8) == v_d8; - const uint32_t surrogates_bitmask = - (surrogates_wordmask_hi.to_bitmask() << 16) | - surrogates_wordmask_lo.to_bitmask(); - const auto vL_lo = (packed1 & v_fc) == v_dc; - const auto vL_hi = (packed2 & v_fc) == v_dc; - const uint32_t L = (vL_hi.to_bitmask() << 16) | vL_lo.to_bitmask(); - const uint32_t H = L ^ surrogates_bitmask; - utf16_err |= (((H << 1) | ends_with_high) != L); - // this is required to check for last byte ending in high and end of input - // is reached - ends_with_high = (H & 0x80000000) != 0; - utf16_err |= ends_with_high; - - // utf32le last block check - currentmax = _mm_max_epu32(in.chunks[0], currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in.chunks[0], offset), currentoffsetmax); - currentmax = _mm_max_epu32(in.chunks[1], currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in.chunks[1], offset), currentoffsetmax); - currentmax = _mm_max_epu32(in.chunks[2], currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in.chunks[2], offset), currentoffsetmax); - currentmax = _mm_max_epu32(in.chunks[3], currentmax); - currentoffsetmax = - _mm_max_epu32(_mm_add_epi32(in.chunks[3], offset), currentoffsetmax); - - reader.advance(); - - c.check_eof(); - bool is_valid_utf8 = !c.errors(); - __m128i is_zero = - _mm_xor_si128(_mm_max_epu32(currentmax, standardmax), standardmax); - utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0); - - is_zero = _mm_xor_si128(_mm_max_epu32(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - utf32_err |= (_mm_test_all_zeros(is_zero, is_zero) == 0); - if (is_valid_utf8) { - out |= encoding_type::UTF8; - } - if (utf16_err == 0) { - out |= encoding_type::UTF16_LE; - } - if (utf32_err == 0) { - out |= encoding_type::UTF32_LE; - } - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return westmere::utf8_validation::generic_validate_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return westmere::utf8_validation::generic_validate_utf8_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return westmere::ascii_validation::generic_validate_ascii(buf, len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return westmere::ascii_validation::generic_validate_ascii_with_errors(buf, - len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-16. protect the implementation from - // handling nullptr - return true; - } - const auto res = - westmere::utf16::validate_utf16_with_errors(buf, len); - if (res.is_err()) { - return false; - } - - if (res.count == len) - return true; - - return scalar::utf16::validate(buf + res.count, - len - res.count); -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid UTF-16. protect the implementation from - // handling nullptr - return true; - } - const auto res = - westmere::utf16::validate_utf16_with_errors(buf, len); - if (res.is_err()) { - return false; - } - - if (res.count == len) - return true; - - return scalar::utf16::validate(buf + res.count, - len - res.count); -} - -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - const result res = - westmere::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - const result scalar_res = - scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - const result res = - westmere::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - result scalar_res = scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return utf16fix_sse(input, len, output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return utf16fix_sse(input, len, output); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - return utf32::validate(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - return utf32::validate_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - - std::pair ret = - sse_convert_latin1_to_utf8(buf, len, utf8_output); - size_t converted_chars = ret.second - utf8_output; - - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - sse_convert_latin1_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } - converted_chars += scalar_converted_chars; - } - return converted_chars; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - sse_convert_latin1_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - sse_convert_latin1_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t converted_chars = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_converted_chars == 0) { - return 0; - } - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert_with_errors(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return westmere::utf8_to_latin1::convert_valid(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert(buf, len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return utf8_to_utf32::convert_valid(input, size, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - sse_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - sse_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - sse_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - sse_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: we could provide an optimized function. - return convert_utf16be_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: we could provide an optimized function. - return convert_utf16le_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - sse_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - sse_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf16_to_utf8_with_errors( - buf, len, utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf16_to_utf8_with_errors( - buf, len, utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - sse_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - // if (ret.first != buf + len) { - if (ret.first < buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf32_to_latin1_with_errors(buf, len, - latin1_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: we could provide an optimized function. - return convert_utf32_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - sse_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - sse_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - sse_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf16_to_utf32_with_errors( - buf, len, utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf16_to_utf32_with_errors( - buf, len, utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf32_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - sse_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - sse_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf32_to_utf16_with_errors( - buf, len, utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - westmere::sse_convert_utf32_to_utf16_with_errors( - buf, len, utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16le(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16be(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16le_to_utf32(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16be_to_utf32(buf, len, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - utf16::change_endianness_utf16(input, length, output); -} - -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} - -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16_bytemask(input, - length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t len) const noexcept { - const uint8_t *str = reinterpret_cast(input); - size_t answer = len / sizeof(__m128i) * sizeof(__m128i); - size_t i = 0; - if (answer >= 2048) { // long strings optimization - __m128i two_64bits = _mm_setzero_si128(); - while (i + sizeof(__m128i) <= len) { - __m128i runner = _mm_setzero_si128(); - size_t iterations = (len - i) / sizeof(__m128i); - if (iterations > 255) { - iterations = 255; - } - size_t max_i = i + iterations * sizeof(__m128i) - sizeof(__m128i); - for (; i + 4 * sizeof(__m128i) <= max_i; i += 4 * sizeof(__m128i)) { - __m128i input1 = _mm_loadu_si128((const __m128i *)(str + i)); - __m128i input2 = - _mm_loadu_si128((const __m128i *)(str + i + sizeof(__m128i))); - __m128i input3 = - _mm_loadu_si128((const __m128i *)(str + i + 2 * sizeof(__m128i))); - __m128i input4 = - _mm_loadu_si128((const __m128i *)(str + i + 3 * sizeof(__m128i))); - __m128i input12 = - _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input1), - _mm_cmpgt_epi8(_mm_setzero_si128(), input2)); - __m128i input34 = - _mm_add_epi8(_mm_cmpgt_epi8(_mm_setzero_si128(), input3), - _mm_cmpgt_epi8(_mm_setzero_si128(), input4)); - __m128i input1234 = _mm_add_epi8(input12, input34); - runner = _mm_sub_epi8(runner, input1234); - } - for (; i <= max_i; i += sizeof(__m128i)) { - __m128i more_input = _mm_loadu_si128((const __m128i *)(str + i)); - runner = _mm_sub_epi8(runner, - _mm_cmpgt_epi8(_mm_setzero_si128(), more_input)); - } - two_64bits = - _mm_add_epi64(two_64bits, _mm_sad_epu8(runner, _mm_setzero_si128())); - } - answer += - _mm_extract_epi64(two_64bits, 0) + _mm_extract_epi64(two_64bits, 1); - } else if (answer > 0) { // short string optimization - for (; i + 2 * sizeof(__m128i) <= len; i += 2 * sizeof(__m128i)) { - __m128i latin = _mm_loadu_si128((const __m128i *)(input + i)); - uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin); - answer += count_ones(non_ascii); - latin = _mm_loadu_si128((const __m128i *)(input + i) + 1); - non_ascii = (uint16_t)_mm_movemask_epi8(latin); - answer += count_ones(non_ascii); - } - for (; i + sizeof(__m128i) <= len; i += sizeof(__m128i)) { - __m128i latin = _mm_loadu_si128((const __m128i *)(input + i)); - uint16_t non_ascii = (uint16_t)_mm_movemask_epi8(latin); - answer += count_ones(non_ascii); - } - } - return answer + scalar::latin1::utf8_length_from_latin1( - reinterpret_cast(str + i), len - i); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::utf16_length_from_utf8_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return utf32::utf8_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const __m128i v_00000000 = _mm_setzero_si128(); - const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); - size_t pos = 0; - size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - __m128i in = _mm_loadu_si128((__m128i *)(input + pos)); - const __m128i surrogate_bytemask = - _mm_cmpeq_epi32(_mm_and_si128(in, v_ffff0000), v_00000000); - const uint16_t surrogate_bitmask = - static_cast(_mm_movemask_epi8(surrogate_bytemask)); - size_t surrogate_count = (16 - count_ones(surrogate_bitmask)) / 4; - count += 4 + surrogate_count; - } - return count + - scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } else { - return base64::compress_decode_base64( - output, input, length, options, last_chunk_options); - } - } -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - if (options & base64_url) { - return encode_base64(output, input, length, options); - } else { - return encode_base64(output, input, length, options); - } -} -#endif // SIMDUTF_FEATURE_BASE64 - -} // namespace westmere -} // namespace simdutf - -/* begin file src/simdutf/westmere/end.h */ -#if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE -// nothing needed. -#else -SIMDUTF_UNTARGET_REGION -#endif - -#undef SIMDUTF_SIMD_HAS_BYTEMASK -/* end file src/simdutf/westmere/end.h */ -/* end file src/westmere/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_LSX -/* begin file src/lsx/implementation.cpp */ -/* begin file src/simdutf/lsx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lsx" -// #define SIMDUTF_IMPLEMENTATION lsx -#define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 -/* end file src/simdutf/lsx/begin.h */ -namespace simdutf { -namespace lsx { -namespace { -#ifndef SIMDUTF_LSX_H - #error "lsx.h must be included" -#endif -using namespace simd; - -#if SIMDUTF_FEATURE_UTF8 -// convert vmskltz/vmskgez/vmsknz to -// simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index -const uint8_t lsx_1_2_utf8_bytes_mask[] = { - 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, - 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, - 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, - 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, - 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, - 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, - 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104, - 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63, - 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148, - 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147, - 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152, - 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143, - 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164, - 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163, - 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168, - 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, - 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, - 255}; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 -simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { - return __lsx_vshuf4i_b(vec, 0b10110001); -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ - SIMDUTF_FEATURE_UTF8 -simdutf_really_inline bool is_ascii(const simd8x64 &input) { - return input.is_ascii(); -} -#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || - // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_really_inline simd8 -must_be_2_3_continuation(const simd8 prev2, - const simd8 prev3) { - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - return is_third_byte ^ is_fourth_byte; -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) -// common functions for utf8 conversions -simdutf_really_inline __m128i convert_utf8_3_byte_to_utf16(__m128i in) { - // Low half contains 10bbbbbb|10cccccc - // High half contains 1110aaaa|1110aaaa - const v16u8 sh = {2, 1, 5, 4, 8, 7, 11, 10, 0, 0, 3, 3, 6, 6, 9, 9}; - const v8u16 v0fff = {0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff}; - - __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, (__m128i)sh); - // 1110aaaa => aaaa0000 - __m128i perm_high = __lsx_vslli_b(__lsx_vbsrl_v(perm, 8), 4); - // 10bbbbbb 10cccccc => 0010bbbb bbcccccc - __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), /* perm >> 2*/ - perm, __lsx_vrepli_h(0x3f) /* 0x003f */); - // 0010bbbb bbcccccc => aaaabbbb bbcccccc - composed = __lsx_vbitsel_v(perm_high, composed, (__m128i)v0fff); - - return composed; -} - -simdutf_really_inline __m128i convert_utf8_2_byte_to_utf16(__m128i in) { - // 10bbbbb 110aaaaa => 00bbbbb 000aaaaa - __m128i composed = __lsx_vand_v(in, __lsx_vldi(0x3f)); - // 00bbbbbb 000aaaaa => 00000aaa aabbbbbb - composed = __lsx_vbitsel_v( - __lsx_vsrli_h(__lsx_vslli_h(composed, 8), 2), /* (aaaaa << 8) >> 2 */ - __lsx_vsrli_h(composed, 8), /* bbbbbb >> 8 */ - __lsx_vrepli_h(0x3f)); /* 0x003f */ - return composed; -} - -simdutf_really_inline __m128i -convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { - // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. - // This is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. - __m128i sh = - __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]), - 0); - // Shuffle - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 110aaaaa 10bbbbbb - __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000000 00bbbbbb - __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_h(0x7f)); // 6 or 7 bits - // 1 byte: 00000000 00000000 - // 2 byte: 00000aaa aa000000 - const __m128i v1f00 = lsx_splat_u16(0x1f00); - __m128i composed = __lsx_vsrli_h(__lsx_vand_v(perm, v1f00), 2); // 5 bits - // Combine with a shift right accumulate - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000aaa aabbbbbb - composed = __lsx_vadd_h(ascii, composed); - return composed; -} -#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || - // SIMDUTF_FEATURE_UTF32) - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/lsx/lsx_validate_utf16.cpp */ -template -simd8 utf16_gather_high_bytes(const simd16 in0, - const simd16 in1) { - if (big_endian) { - const auto mask = simd16(0x00ff); - const auto t0 = in0 & mask; - const auto t1 = in1 & mask; - - return simd16::pack(t0, t1); - } else { - return simd16::pack_shifted_right<8>(in0, in1); - } -} -/* end file src/lsx/lsx_validate_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/lsx/lsx_validate_utf32le.cpp */ -const char32_t *lsx_validate_utf32le(const char32_t *input, size_t size) { - const char32_t *end = input + size; - - __m128i offset = lsx_splat_u32(0xffff2000); - __m128i standardoffsetmax = lsx_splat_u32(0xfffff7ff); - __m128i standardmax = lsx_splat_u32(0x10ffff); - __m128i currentmax = lsx_splat_u32(0); - __m128i currentoffsetmax = lsx_splat_u32(0); - - while (input + 4 < end) { - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - currentmax = __lsx_vmax_wu(in, currentmax); - // 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF - currentoffsetmax = - __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); - - input += 4; - } - - __m128i is_zero = - __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); - if (__lsx_bnz_v(is_zero)) { - return nullptr; - } - - is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (__lsx_bnz_v(is_zero)) { - return nullptr; - } - - return input; -} - -const result lsx_validate_utf32le_with_errors(const char32_t *input, - size_t size) { - const char32_t *start = input; - const char32_t *end = input + size; - - __m128i offset = lsx_splat_u32(0xffff2000); - __m128i standardoffsetmax = lsx_splat_u32(0xfffff7ff); - __m128i standardmax = lsx_splat_u32(0x10ffff); - __m128i currentmax = lsx_splat_u32(0); - __m128i currentoffsetmax = lsx_splat_u32(0); - - while (input + 4 < end) { - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - currentmax = __lsx_vmax_wu(in, currentmax); - currentoffsetmax = - __lsx_vmax_wu(__lsx_vadd_w(in, offset), currentoffsetmax); - - __m128i is_zero = - __lsx_vxor_v(__lsx_vmax_wu(currentmax, standardmax), standardmax); - if (__lsx_bnz_v(is_zero)) { - return result(error_code::TOO_LARGE, input - start); - } - - is_zero = __lsx_vxor_v(__lsx_vmax_wu(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (__lsx_bnz_v(is_zero)) { - return result(error_code::SURROGATE, input - start); - } - - input += 4; - } - - return result(error_code::SUCCESS, input - start); -} -/* end file src/lsx/lsx_validate_utf32le.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_latin1_to_utf8.cpp */ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ - -std::pair -lsx_convert_latin1_to_utf8(const char *latin1_input, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char *end = latin1_input + len; - - __m128i zero = __lsx_vldi(0); - // We always write 16 bytes, of which more than the first 8 bytes - // are valid. A safety margin of 8 is more than sufficient. - while (end - latin1_input >= 16) { - __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); - uint32_t ascii = __lsx_vpickve2gr_hu(__lsx_vmskgez_b(in8), 0); - if (ascii == 0xffff) { // ASCII fast path!!!! - __lsx_vst(in8, utf8_output, 0); - utf8_output += 16; - latin1_input += 16; - continue; - } - // We just fallback on UTF-16 code. This could be optimized/simplified - // further. - __m128i in16 = __lsx_vilvl_b(zero, in8); - // 1. prepare 2-byte values - // input 8-bit word : [aabb|bbbb] x 8 - // expected output : [1100|00aa|10bb|bbbb] x 8 - // t0 = [0000|00aa|bbbb|bb00] - __m128i t0 = __lsx_vslli_h(in16, 2); - // t1 = [0000|00aa|0000|0000] - __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x300)); - // t3 = [0000|00aa|00bb|bbbb] - __m128i t2 = __lsx_vbitsel_v(t1, in16, __lsx_vrepli_h(0x3f)); - // t4 = [1100|00aa|10bb|bbbb] - __m128i t3 = __lsx_vor_v(t2, __lsx_vreplgr2vr_h(uint16_t(0xc080))); - // merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = __lsx_vsle_hu(in16, __lsx_vrepli_h(0x7F)); - __m128i utf8_unpacked = __lsx_vbitsel_v(t3, in16, one_byte_bytemask); - - const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[(ascii & 0xff)]][0]; - __m128i shuffle = __lsx_vld(row + 1, 0); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); - - // store bytes - __lsx_vst(utf8_packed, utf8_output, 0); - // adjust pointers - latin1_input += 8; - utf8_output += row[0]; - - } // while - - return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); -} -/* end file src/lsx/lsx_convert_latin1_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_latin1_to_utf16.cpp */ -std::pair -lsx_convert_latin1_to_utf16le(const char *buf, size_t len, - char16_t *utf16_output) { - const char *end = buf + len; - - __m128i zero = __lsx_vldi(0); - while (end - buf >= 16) { - __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); - - __m128i inlow = __lsx_vilvl_b(zero, in8); - __m128i inhigh = __lsx_vilvh_b(zero, in8); - __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); - __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); - - utf16_output += 16; - buf += 16; - } - - return std::make_pair(buf, utf16_output); -} - -std::pair -lsx_convert_latin1_to_utf16be(const char *buf, size_t len, - char16_t *utf16_output) { - const char *end = buf + len; - __m128i zero = __lsx_vldi(0); - while (end - buf >= 16) { - __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); - - __m128i inlow = __lsx_vilvl_b(in8, zero); - __m128i inhigh = __lsx_vilvh_b(in8, zero); - __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); - __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); - utf16_output += 16; - buf += 16; - } - - return std::make_pair(buf, utf16_output); -} -/* end file src/lsx/lsx_convert_latin1_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_latin1_to_utf32.cpp */ -std::pair -lsx_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - const char *end = buf + len; - - while (end - buf >= 16) { - __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); - - __m128i zero = __lsx_vldi(0); - __m128i in16low = __lsx_vilvl_b(zero, in8); - __m128i in16high = __lsx_vilvh_b(zero, in8); - __m128i in32_0 = __lsx_vilvl_h(zero, in16low); - __m128i in32_1 = __lsx_vilvh_h(zero, in16low); - __m128i in32_2 = __lsx_vilvl_h(zero, in16high); - __m128i in32_3 = __lsx_vilvh_h(zero, in16high); - - __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); - __lsx_vst(in32_1, reinterpret_cast(utf32_output + 4), 0); - __lsx_vst(in32_2, reinterpret_cast(utf32_output + 8), 0); - __lsx_vst(in32_3, reinterpret_cast(utf32_output + 12), 0); - - utf32_output += 16; - buf += 16; - } - - return std::make_pair(buf, utf32_output); -} -/* end file src/lsx/lsx_convert_latin1_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/lsx/lsx_convert_utf8_to_utf16.cpp */ -// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 16, usually 12). -template -size_t convert_masked_utf8_to_utf16(const char *input, - uint64_t utf8_end_of_code_point_mask, - char16_t *&utf16_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - - // We first try a few fast paths. - // The obvious first test is ASCII, which actually consumes the full 16. - if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { - // We process in chunks of 16 bytes - // The routine in simd.h is reused. - simd8 temp{in}; - temp.store_ascii_as_utf16(utf16_output); - utf16_output += 16; // We wrote 16 16-bit characters. - return 16; // We consumed 16 bytes. - } - - uint64_t buffer[2]; - // 3 byte sequences are the next most common, as seen in CJK, which has long - // sequences of these. - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte - // UTF-16 code units. - __m128i composed = convert_utf8_3_byte_to_utf16(in); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 4; // We wrote 4 16-bit characters. - return 12; // We consumed 12 bytes. - } - - // 2 byte sequences occur in short bursts in languages like Greek and Russian. - if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xAAAA) { - // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte - // UTF-16 code units. - __m128i composed = convert_utf8_2_byte_to_utf16(in); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 6; // We wrote 6 16-bit characters. - return 12; // We consumed 12 bytes. - } - - /// We do not have a fast path available, or the fast path is unimportant, so - /// we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - const __m128i zero = __lsx_vldi(0); - if (idx < 64) { - // SIX (6) input code-code units - // Convert to UTF-16 - __m128i composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - // Store - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 6; // We wrote 6 16-bit characters. - return consumed; - } else if (idx < 145) { - // FOUR (4) input code-code units - // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // XXX: depending on the system scalar instructions might be faster. - // 1 byte: 00000000 00000000 0ccccccc - // 2 byte: 00000000 110bbbbb 10cccccc - // 3 byte: 1110aaaa 10bbbbbb 10cccccc - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(zero, in, sh); - // 1 byte: 00000000 0ccccccc - // 2 byte: xx0bbbbb x0cccccc - // 3 byte: xxbbbbbb x0cccccc - __m128i lowperm = __lsx_vpickev_h(perm, perm); - // 1 byte: 00000000 00000000 - // 2 byte: 00000000 00000000 - // 3 byte: 00000000 1110aaaa - __m128i highperm = __lsx_vpickod_h(perm, perm); - // 3 byte: aaaa0000 00000000 - highperm = __lsx_vslli_h(highperm, 12); - // ASCII - // 1 byte: 00000000 0ccccccc - // 2+byte: 00000000 00cccccc - __m128i ascii = __lsx_vand_v(lowperm, __lsx_vrepli_h(0x7f)); - // 1 byte: 00000000 00000000 - // 2 byte: xx0bbbbb 00000000 - // 3 byte: xxbbbbbb 00000000 - __m128i middlebyte = __lsx_vand_v(lowperm, lsx_splat_u16(0xFF00)); - // 1 byte: 00000000 0ccccccc - // 2 byte: 0010bbbb bbcccccc - // 3 byte: 0010bbbb bbcccccc - __m128i composed = __lsx_vor_v(__lsx_vsrli_h(middlebyte, 2), ascii); - - __m128i v0fff = __lsx_vreplgr2vr_h(uint16_t(0xfff)); - // aaaabbbb bbcccccc - composed = __lsx_vbitsel_v(highperm, composed, v0fff); - - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 4; // We wrote 4 16-bit codepoints - return consumed; - } else if (idx < 209) { - // THREE (3) input code-code units - if (input_utf8_end_of_code_point_mask == 0x888) { - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but - // it is easier when we can assume they are all pairs. This version does - // not use the LUT, but 4 byte sequences are less common and the overhead - // of the extra memory access is less important than the early branch - // overhead in shorter sequences. - - __m128i expected_mask = - (__m128i)v16u8{0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, - 0xf8, 0xc0, 0xc0, 0xc0, 0x0, 0x0, 0x0, 0x0}; - __m128i expected = - (__m128i)v16u8{0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, - 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; - __m128i check = __lsx_vseq_b(__lsx_vand_v(in, expected_mask), expected); - if (__lsx_bz_b(check)) - return 12; - // Swap byte pairs - // 10dddddd 10cccccc|10bbbbbb 11110aaa - // 10cccccc 10dddddd|11110aaa 10bbbbbb - __m128i swap = lsx_swap_bytes(in); - // Shift left 2 bits - // cccccc00 dddddd00 xxxxxxxx bbbbbb00 - __m128i shift = __lsx_vslli_b(swap, 2); - // Create a magic number containing the low 2 bits of the trail surrogate - // and all the corrections needed to create the pair. UTF-8 4b prefix = - // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) - // surrogate high = +0x0000|0xD800 - // surrogate low = +0xDC00|0x0000 - // ------------------------------- - // = +0xDC00|0xE7C0 - __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xDC00E7C0)); - // Generate unadjusted trail surrogate minus lowest 2 bits - // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 - __m128i trail = __lsx_vbitsel_v(shift, swap, lsx_splat_u32(0x0000ff00)); - // Insert low 2 bits of trail surrogate to magic number for later - // 11011100 00000000 11100111 110000cc - __m128i magic_with_low_2 = __lsx_vor_v(__lsx_vsrli_w(shift, 30), magic); - - // Generate lead surrogate - // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx - // 000000cc ccdddddd|xxxxxxxx xxxxxxxx - __m128i lead = __lsx_vbitsel_v( - __lsx_vsrli_h(__lsx_vand_v(shift, __lsx_vldi(0x3F)), 4), swap, - __lsx_vrepli_h(0x3f /* 0x003f*/)); - - // Blend pairs - // 000000cc ccdddddd|11110aaa bbbbbb00 - __m128i blend = __lsx_vbitsel_v(lead, trail, lsx_splat_u32(0x0000FFFF)); - - // Add magic number to finish the result - // 110111CC CCDDDDDD|110110AA BBBBBBCC - __m128i composed = __lsx_vadd_h(blend, magic_with_low_2); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - // __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - __lsx_vst(composed, reinterpret_cast(buffer), 0); - std::memcpy(utf16_output, buffer, 12); - utf16_output += 6; // We 3 32-bit surrogate pairs. - return 12; // We consumed 12 bytes. - } - // 3 1-4 byte sequences - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // 1 byte: 00000000 00000000 00000000 0ddddddd - // 3 byte: 00000000 00000000 110ccccc 10dddddd - // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd - // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(zero, in, sh); - // added to fix issue https://github.com/simdutf/simdutf/issues/514 - // We only want to write 2 * 16-bit code units when that is actually what we - // have. Unfortunately, we cannot trust the input. So it is possible to get - // 0xff as an input byte and it should not result in a surrogate pair. We - // need to check for that. - uint32_t permbuffer[4]; - __lsx_vst(perm, permbuffer, 0); - // Mask the low and middle bytes - // 00000000 00000000 00000000 0ddddddd - __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7f)); - // Because the surrogates need more work, the high surrogate is computed - // first. - __m128i middlehigh = __lsx_vslli_w(perm, 2); - // 00000000 00000000 00cccccc 00000000 - __m128i middlebyte = __lsx_vand_v(perm, lsx_splat_u32(0x00003F00)); - // Start assembling the sequence. Since the 4th byte is in the same position - // as it would be in a surrogate and there is no dependency, shift left - // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: - // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx - __m128i ab = __lsx_vbitsel_v(middlehigh, perm, lsx_splat_u32(0xFF000000)); - // Top 16 bits contains the high ten bits of the surrogate pair before - // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa - // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction - __m128i v_fffc0000 = __lsx_vreplgr2vr_w(uint32_t(0xFFFC0000)); - __m128i abc = __lsx_vbitsel_v(__lsx_vslli_w(middlebyte, 4), ab, v_fffc0000); - // Combine the low 6 or 7 bits by a shift right accumulate - // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct - // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o - // correction - __m128i composed = __lsx_vor_v(ascii, __lsx_vsrli_w(abc, 6)); - // After this is for surrogates - // Blend the low and high surrogates - // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd - __m128i mixed = __lsx_vbitsel_v(abc, composed, lsx_splat_u32(0x0000FFFF)); - // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits - // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: - // 11110aaa bbbbbbcc|000000cc ccdddddd - __m128i v_ffff03ff = __lsx_vreplgr2vr_w(uint32_t(0xFFFF03FF)); - __m128i masked_pair = __lsx_vand_v(mixed, v_ffff03ff); - // Correct the remaining UTF-8 prefix, surrogate offset, and add the - // surrogate prefixes in one magic 16-bit addition. similar magic number but - // without the continue byte adjust and halfword swapped UTF-8 4b prefix = - // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) - // surrogate high = +0xD800|0x0000 - // surrogate low = +0x0000|0xDC00 - // ----------------------------------- - // = +0xE7C0|0xDC00 - __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xE7C0DC00)); - // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete - __m128i surrogates = __lsx_vadd_w(masked_pair, magic); - // If the high bit is 1 (s32 less than zero), this needs a surrogate pair - __m128i is_pair = __lsx_vslt_w(perm, zero); - // Select either the 4 byte surrogate pair or the 2 byte solo codepoint - // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd - // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - __m128i selected = __lsx_vbitsel_v(composed, surrogates, is_pair); - // Byte swap if necessary - if (!match_system(big_endian)) { - selected = lsx_swap_bytes(selected); - } - // Attempting to shuffle and store would be complex, just scalarize. - uint32_t buffer_tmp[4]; - __lsx_vst(selected, buffer_tmp, 0); - // Test for the top bit of the surrogate mask. Remove due to issue 514 - // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : - // 0x00800000; - for (size_t i = 0; i < 3; i++) { - // Surrogate - // Used to be if (buffer[i] & SURROGATE_MASK) { - // See discussion above. - // patch for issue https://github.com/simdutf/simdutf/issues/514 - if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { - utf16_output[0] = uint16_t(buffer_tmp[i] >> 16); - utf16_output[1] = uint16_t(buffer_tmp[i] & 0xFFFF); - utf16_output += 2; - } else { - utf16_output[0] = uint16_t(buffer_tmp[i] & 0xFFFF); - utf16_output++; - } - } - return consumed; - } else { - // here we know that there is an error but we do not handle errors - return 12; - } -} -/* end file src/lsx/lsx_convert_utf8_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lsx/lsx_convert_utf8_to_utf32.cpp */ -// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_utf32(const char *input, - uint64_t utf8_end_of_code_point_mask, - char32_t *&utf32_out) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - uint32_t *&utf32_output = reinterpret_cast(utf32_out); - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xFFF; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - if ((utf8_end_of_code_point_mask & 0xffff) == 0xffff) { - // We process in chunks of 16 bytes. - // use fast implementation in src/simdutf/arm64/simd.h - // Ideally the compiler can keep the tables in registers. - simd8 temp{in}; - temp.store_ascii_as_utf32_tbl(utf32_out); - utf32_output += 16; // We wrote 16 32-bit characters. - return 16; // We consumed 16 bytes. - } - __m128i zero = __lsx_vldi(0); - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte - // UTF-32 code units. Convert to UTF-16 - __m128i composed_utf16 = convert_utf8_3_byte_to_utf16(in); - __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); - - __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); - utf32_output += 4; // We wrote 4 32-bit characters. - return 12; // We consumed 12 bytes. - } - // 2 byte sequences occur in short bursts in languages like Greek and Russian. - if (input_utf8_end_of_code_point_mask == 0xaaa) { - // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte - // UTF-32 code units. Convert to UTF-16 - __m128i composed_utf16 = convert_utf8_2_byte_to_utf16(in); - - __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); - __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); - - __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); - __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); - utf32_output += 6; - return 12; // We consumed 12 bytes. - } - /// Either no fast path or an unimportant fast path. - - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - - if (idx < 64) { - // SIX (6) input code-code units - // Convert to UTF-16 - __m128i composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); - __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); - __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); - - __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); - __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); - utf32_output += 6; - return consumed; - } else if (idx < 145) { - // FOUR (4) input code-code units - // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // Shuffle - // 1 byte: 00000000 00000000 0ccccccc - // 2 byte: 00000000 110bbbbb 10cccccc - // 3 byte: 1110aaaa 10bbbbbb 10cccccc - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(zero, in, sh); - // Split - // 00000000 00000000 0ccccccc - __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); // 6 or 7 bits - // Note: unmasked - // xxxxxxxx aaaaxxxx xxxxxxxx - __m128i high = - __lsx_vsrli_w(__lsx_vand_v(perm, __lsx_vldi(0xf)), 4); // 4 bits - // Use 16 bit bic instead of and. - // The top bits will be corrected later in the bsl - // 00000000 10bbbbbb 00000000 - __m128i middle = - __lsx_vand_v(perm, lsx_splat_u32(0x0000FF00)); // 5 or 6 bits - // Combine low and middle with shift right accumulate - // 00000000 00xxbbbb bbcccccc - __m128i lowmid = __lsx_vor_v(ascii, __lsx_vsrli_w(middle, 2)); - // Insert top 4 bits from high byte with bitwise select - // 00000000 aaaabbbb bbcccccc - __m128i composed = __lsx_vbitsel_v(lowmid, high, lsx_splat_u32(0x0000F000)); - __lsx_vst(composed, utf32_output, 0); - utf32_output += 4; // We wrote 4 32-bit characters. - return consumed; - } else if (idx < 209) { - // THREE (3) input code-code units - if (input_utf8_end_of_code_point_mask == 0x888) { - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-32 code units. This uses the same method as the fixed 3 byte - // version, reversing and shift left insert. However, there is no need for - // a shuffle mask now, just rev16 and rev32. - // - // This version does not use the LUT, but 4 byte sequences are less common - // and the overhead of the extra memory access is less important than the - // early branch overhead in shorter sequences, so it comes last. - - // Swap pairs of bytes - // 10dddddd|10cccccc|10bbbbbb|11110aaa - // 10cccccc 10dddddd|11110aaa 10bbbbbb - __m128i swap = lsx_swap_bytes(in); - // Shift left and insert - // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb - __m128i merge1 = __lsx_vbitsel_v(__lsx_vsrli_h(swap, 2), swap, - __lsx_vrepli_h(0x3f /*0x003F*/)); - // Shift insert again - // xxxxxxxx xxxaaabb bbbbcccc ccdddddd - __m128i merge2 = - __lsx_vbitsel_v(__lsx_vslli_w(merge1, 12), /* merge1 << 12 */ - __lsx_vsrli_w(merge1, 16), /* merge1 >> 16 */ - lsx_splat_u32(0x00000FFF)); - // Clear the garbage - // 00000000 000aaabb bbbbcccc ccdddddd - __m128i composed = __lsx_vand_v(merge2, lsx_splat_u32(0x1FFFFF)); - // Store - __lsx_vst(composed, utf32_output, 0); - utf32_output += 3; // We wrote 3 32-bit characters. - return 12; // We consumed 12 bytes. - } - // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit - // due to surrogates no longer being involved. - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // 1 byte: 00000000 00000000 00000000 0ddddddd - // 2 byte: 00000000 00000000 110ccccc 10dddddd - // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd - // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(zero, in, sh); - - // Ascii - __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); - __m128i middle = __lsx_vand_v(perm, lsx_splat_u32(0x00003f00)); - // 00000000 00000000 0000cccc ccdddddd - __m128i cd = __lsx_vor_v(__lsx_vsrli_w(middle, 2), ascii); - - __m128i correction = __lsx_vand_v(perm, lsx_splat_u32(0x00400000)); - __m128i corrected = __lsx_vadd_b(perm, __lsx_vsrli_w(correction, 1)); - // Insert twice - // 00000000 000aaabb bbbbxxxx xxxxxxxx - __m128i corrected_srli2 = - __lsx_vsrli_w(__lsx_vand_v(corrected, __lsx_vrepli_b(0x7)), 2); - __m128i ab = - __lsx_vbitsel_v(corrected_srli2, corrected, __lsx_vrepli_h(0x3f)); - ab = __lsx_vsrli_w(ab, 4); - // 00000000 000aaabb bbbbcccc ccdddddd - __m128i composed = __lsx_vbitsel_v(ab, cd, lsx_splat_u32(0x00000FFF)); - // Store - __lsx_vst(composed, utf32_output, 0); - utf32_output += 3; // We wrote 3 32-bit characters. - return consumed; - } else { - // here we know that there is an error but we do not handle errors - return 12; - } -} -/* end file src/lsx/lsx_convert_utf8_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_utf8_to_latin1.cpp */ -size_t convert_masked_utf8_to_latin1(const char *input, - uint64_t utf8_end_of_code_point_mask, - char *&latin1_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - - // We first try a few fast paths. - // The obvious first test is ASCII, which actually consumes the full 16. - if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { - // We process in chunks of 16 bytes - __lsx_vst(in, reinterpret_cast(latin1_output), 0); - latin1_output += 16; // We wrote 16 18-bit characters. - return 16; // We consumed 16 bytes. - } - /// We do not have a fast path available, or the fast path is unimportant, so - /// we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - // this indicates an invalid input: - if (idx >= 64) { - return consumed; - } - // Here we should have (idx < 64), if not, there is a bug in the validation or - // elsewhere. SIX (6) input code-code units this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 - // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy - // scenario we process SIX (6) input code-code units. The max length in bytes - // of six code code units spanning between 1 and 2 bytes each is 12 bytes. - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // Shuffle - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 110aaaaa 10bbbbbb - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); - // ascii mask - // 1 byte: 11111111 11111111 - // 2 byte: 00000000 00000000 - __m128i ascii_mask = __lsx_vslt_bu(perm, __lsx_vldi(0x80)); - // utf8 mask - // 1 byte: 00000000 00000000 - // 2 byte: 00111111 00111111 - __m128i utf8_mask = __lsx_vand_v(__lsx_vsle_bu(__lsx_vldi(0x80), perm), - __lsx_vldi(0b00111111)); - // mask - // 1 byte: 11111111 11111111 - // 2 byte: 00111111 00111111 - __m128i mask = __lsx_vor_v(utf8_mask, ascii_mask); - - __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), perm, mask); - // writing 8 bytes even though we only care about the first 6 bytes. - __m128i latin1_packed = __lsx_vpickev_b(__lsx_vldi(0), composed); - - uint64_t buffer[2]; - // __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - __lsx_vst(latin1_packed, reinterpret_cast(buffer), 0); - std::memcpy(latin1_output, buffer, 6); - latin1_output += 6; // We wrote 6 bytes. - return consumed; -} -/* end file src/lsx/lsx_convert_utf8_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_utf16_to_latin1.cpp */ -template -std::pair -lsx_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - while (end - buf >= 16) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); - if (!match_system(big_endian)) { - in = lsx_swap_bytes(in); - in1 = lsx_swap_bytes(in1); - } - if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { - // 1. pack the bytes - __m128i latin1_packed = __lsx_vpickev_b(in1, in); - // 2. store (8 bytes) - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - // 3. adjust pointers - buf += 16; - latin1_output += 16; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} - -template -std::pair -lsx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - while (end - buf >= 16) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); - if (!match_system(big_endian)) { - in = lsx_swap_bytes(in); - in1 = lsx_swap_bytes(in1); - } - if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { - // 1. pack the bytes - __m128i latin1_packed = __lsx_vpickev_b(in1, in); - // 2. store (8 bytes) - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - // 3. adjust pointers - buf += 16; - latin1_output += 16; - } else { - // Let us do a scalar fallback. - for (int k = 0; k < 16; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/lsx/lsx_convert_utf16_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 -/* begin file src/lsx/lsx_convert_utf16_to_utf8.cpp */ -/* - The vectorized algorithm works on single SSE register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole SSE register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two SSE registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ -template -std::pair -lsx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char16_t *end = buf + len; - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - if (!match_system(big_endian)) { - in = lsx_swap_bytes(in); - } - if (__lsx_bz_v( - __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! - // It is common enough that we have sequences of 16 consecutive ASCII - // characters. - __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); - if (!match_system(big_endian)) { - nextin = lsx_swap_bytes(nextin); - } - if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(nextin, in); - // 2. store (16 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } else { - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(in, in); - // 2. store (8 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } - } - - __m128i zero = __lsx_vldi(0); - if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - // t0 = [000a|aaaa|bbbb|bb00] - __m128i t0 = __lsx_vslli_h(in, 2); - // t1 = [000a|aaaa|0000|0000] - __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); - // t2 = [0000|0000|00bb|bbbb] - __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); - // t3 = [000a|aaaa|00bb|bbbb] - __m128i t3 = __lsx_vor_v(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); - __m128i t4 = __lsx_vor_v(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = - __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); - __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); - // 3. prepare bitmask for 8-bit lookup - uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); - // 4. pack the bytes - const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle = __lsx_vld(row, 1); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); - // 5. store bytes - __lsx_vst(utf8_packed, utf8_output, 0); - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - __m128i surrogates_bytemask = __lsx_vseq_h( - __lsx_vand_v(in, lsx_splat_u16(0xf800)), lsx_splat_u16(0xd800)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (__lsx_bz_v(surrogates_bytemask)) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m128i t0 = __lsx_vpickev_b(in, in); - t0 = __lsx_vilvl_b(t0, t0); - - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] - __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); - __m128i t1 = __lsx_vand_v(t0, v_3f7f); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m128i s0 = __lsx_vsrli_h(in, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m128i s1 = __lsx_vslli_h(in, 2); - // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] - s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3f00)); - - // [00bb|bbbb|0000|aaaa] - __m128i s2 = __lsx_vor_v(s0, s1); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); - __m128i s3 = __lsx_vor_v(s2, v_c0e0); - __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); - __m128i m0 = - __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); - __m128i s4 = __lsx_vxor_v(s3, m0); - - // 4. expand code units 16-bit => 32-bit - __m128i out0 = __lsx_vilvl_h(s4, t2); - __m128i out1 = __lsx_vilvh_h(s4, t2); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); - - __m128i one_or_two_bytes_bytemask_low = - __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); - __m128i one_or_two_bytes_bytemask_high = - __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); - - __m128i one_byte_bytemask_low = - __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_high = - __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); - - const uint32_t mask0 = __lsx_vpickve2gr_bu( - __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, - one_byte_bytemask_low)), - 0); - const uint32_t mask1 = __lsx_vpickve2gr_bu( - __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, - one_byte_bytemask_high)), - 0); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); - - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; - __lsx_vst(utf8_1, utf8_output, 0); - utf8_output += row1[0]; - - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - return std::make_pair(buf, reinterpret_cast(utf8_output)); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -lsx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char16_t *start = buf; - const char16_t *end = buf + len; - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - if (!match_system(big_endian)) { - in = lsx_swap_bytes(in); - } - if (__lsx_bz_v( - __lsx_vslt_hu(__lsx_vrepli_h(0x7F), in))) { // ASCII fast path!!!! - // It is common enough that we have sequences of 16 consecutive ASCII - // characters. - __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); - if (!match_system(big_endian)) { - nextin = lsx_swap_bytes(nextin); - } - if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), nextin))) { - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(nextin, in); - // 2. store (16 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } else { - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(in, in); - // 2. store (8 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - in = nextin; - } - } - - __m128i v_07ff = __lsx_vreplgr2vr_h(uint16_t(0x7ff)); - __m128i zero = __lsx_vldi(0); - if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, in))) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - // t0 = [000a|aaaa|bbbb|bb00] - __m128i t0 = __lsx_vslli_h(in, 2); - // t1 = [000a|aaaa|0000|0000] - __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); - // t2 = [0000|0000|00bb|bbbb] - __m128i t2 = __lsx_vand_v(in, __lsx_vrepli_h(0x3f)); - // t3 = [000a|aaaa|00bb|bbbb] - __m128i t3 = __lsx_vor_v(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - __m128i v_c080 = __lsx_vreplgr2vr_h(uint16_t(0xc080)); - __m128i t4 = __lsx_vor_v(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = - __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F /*0x007F*/)); - __m128i utf8_unpacked = __lsx_vbitsel_v(t4, in, one_byte_bytemask); - // 3. prepare bitmask for 8-bit lookup - uint32_t m2 = __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); - // 4. pack the bytes - const uint8_t *row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle = __lsx_vld(row, 1); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); - // 5. store bytes - __lsx_vst(utf8_packed, utf8_output, 0); - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } - __m128i surrogates_bytemask = __lsx_vseq_h( - __lsx_vand_v(in, lsx_splat_u16(0xf800)), lsx_splat_u16(0xd800)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (__lsx_bz_v(surrogates_bytemask)) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m128i t0 = __lsx_vpickev_b(in, in); - t0 = __lsx_vilvl_b(t0, t0); - - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] - __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); - __m128i t1 = __lsx_vand_v(t0, v_3f7f); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m128i s0 = __lsx_vsrli_h(in, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m128i s1 = __lsx_vslli_h(in, 2); - // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] - s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3f00)); - - // [00bb|bbbb|0000|aaaa] - __m128i s2 = __lsx_vor_v(s0, s1); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); - __m128i s3 = __lsx_vor_v(s2, v_c0e0); - __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(in, v_07ff); - __m128i m0 = - __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); - __m128i s4 = __lsx_vxor_v(s3, m0); - - // 4. expand code units 16-bit => 32-bit - __m128i out0 = __lsx_vilvl_h(s4, t2); - __m128i out1 = __lsx_vilvh_h(s4, t2); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m128i one_byte_bytemask = __lsx_vsle_hu(in, __lsx_vrepli_h(0x7F)); - - __m128i one_or_two_bytes_bytemask_low = - __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); - __m128i one_or_two_bytes_bytemask_high = - __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); - - __m128i one_byte_bytemask_low = - __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_high = - __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); - - const uint32_t mask0 = __lsx_vpickve2gr_bu( - __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_low, - one_byte_bytemask_low)), - 0); - const uint32_t mask1 = __lsx_vpickve2gr_bu( - __lsx_vmskltz_h(__lsx_vor_v(one_or_two_bytes_bytemask_high, - one_byte_bytemask_high)), - 0); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); - - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; - __lsx_vst(utf8_1, utf8_output, 0); - utf8_output += row1[0]; - - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - reinterpret_cast(utf8_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf8_output)); -} -/* end file src/lsx/lsx_convert_utf16_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF8 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lsx/lsx_convert_utf16_to_utf32.cpp */ -template -std::pair -lsx_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_out) { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - const char16_t *end = buf + len; - - __m128i zero = __lsx_vldi(0); - __m128i v_f800 = lsx_splat_u16(0xf800); - __m128i v_d800 = lsx_splat_u16(0xd800); - - while (end - buf >= 8) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - if (!match_system(big_endian)) { - in = lsx_swap_bytes(in); - } - - __m128i surrogates_bytemask = - __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (__lsx_bz_v(surrogates_bytemask)) { - // case: no surrogate pairs, extend all 16-bit code units to 32-bit code - // units - __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); - __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(buf, reinterpret_cast(utf32_output)); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -lsx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_out) { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - const char16_t *start = buf; - const char16_t *end = buf + len; - - __m128i zero = __lsx_vldi(0); - __m128i v_f800 = lsx_splat_u16(0xf800); - __m128i v_d800 = lsx_splat_u16(0xd800); - - while (end - buf >= 8) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - if (!match_system(big_endian)) { - in = lsx_swap_bytes(in); - } - - __m128i surrogates_bytemask = - __lsx_vseq_h(__lsx_vand_v(in, v_f800), v_d800); - if (__lsx_bz_v(surrogates_bytemask)) { - // case: no surrogate pairs, extend all 16-bit code units to 32-bit code - // units - __lsx_vst(__lsx_vilvl_h(zero, in), utf32_output, 0); - __lsx_vst(__lsx_vilvh_h(zero, in), utf32_output, 16); - utf32_output += 8; - buf += 8; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf32_output)); -} -/* end file src/lsx/lsx_convert_utf16_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lsx/lsx_convert_utf32_to_latin1.cpp */ -std::pair -lsx_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *end = buf + len; - const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; - __m128i v_ff = __lsx_vrepli_w(0xFF); - - while (end - buf >= 16) { - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); - - __m128i in12 = __lsx_vor_v(in1, in2); - if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { - // 1. pack the bytes - __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); - // 2. store (8 bytes) - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} - -std::pair -lsx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *start = buf; - const char32_t *end = buf + len; - - const v16u8 shuf_mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}; - __m128i v_ff = __lsx_vrepli_w(0xFF); - - while (end - buf >= 16) { - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in2 = __lsx_vld(reinterpret_cast(buf), 16); - - __m128i in12 = __lsx_vor_v(in1, in2); - - if (__lsx_bz_v(__lsx_vslt_wu(v_ff, in12))) { - // 1. pack the bytes - __m128i latin1_packed = __lsx_vshuf_b(in2, in1, (__m128i)shuf_mask); - // 2. store (8 bytes) - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - // 3. adjust pointers - buf += 8; - latin1_output += 8; - } else { - // Let us do a scalar fallback. - for (int k = 0; k < 8; k++) { - uint32_t word = buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/lsx/lsx_convert_utf32_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lsx/lsx_convert_utf32_to_utf8.cpp */ -std::pair -lsx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char32_t *end = buf + len; - - __m128i v_c080 = lsx_splat_u16(0xc080); - __m128i v_07ff = lsx_splat_u16(0x07ff); - __m128i v_dfff = lsx_splat_u16(0xdfff); - __m128i v_d800 = lsx_splat_u16(0xd800); - __m128i forbidden_bytemask = __lsx_vldi(0x0); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf > std::ptrdiff_t(16 + safety_margin)) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); - - // Check if no bits set above 16th - if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { - // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) - __m128i utf16_packed = __lsx_vpickev_h(nextin, in); - - if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), - utf16_packed))) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); - // 2. store (8 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - continue; // we are done for this round! - } - __m128i zero = __lsx_vldi(0); - if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - - // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); - // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); - // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); - // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = __lsx_vor_v(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = __lsx_vor_v(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = - __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); - __m128i utf8_unpacked = - __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); - // 3. prepare bitmask for 8-bit lookup - uint32_t m2 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle = __lsx_vld(row, 1); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); - // 5. store bytes - __lsx_vst(utf8_packed, utf8_output, 0); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } else { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - forbidden_bytemask = __lsx_vor_v( - __lsx_vand_v( - __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 - forbidden_bytemask); - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single - UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three - UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); - t0 = __lsx_vilvl_b(t0, t0); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); - __m128i t1 = __lsx_vand_v(t0, v_3f7f); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m128i s1 = __lsx_vslli_h(utf16_packed, 2); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3F00)); - // [00bb|bbbb|0000|aaaa] - __m128i s2 = __lsx_vor_v(s0, s1); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); - __m128i s3 = __lsx_vor_v(s2, v_c0e0); - __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); - __m128i m0 = - __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); - __m128i s4 = __lsx_vxor_v(s3, m0); - - // 4. expand code units 16-bit => 32-bit - __m128i out0 = __lsx_vilvl_h(s4, t2); - __m128i out1 = __lsx_vilvh_h(s4, t2); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m128i one_byte_bytemask = - __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); - - __m128i one_or_two_bytes_bytemask_u16_to_u32_low = - __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); - __m128i one_or_two_bytes_bytemask_u16_to_u32_high = - __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); - - __m128i one_byte_bytemask_u16_to_u32_low = - __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_u16_to_u32_high = - __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); - - const uint32_t mask0 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( - one_or_two_bytes_bytemask_u16_to_u32_low, - one_byte_bytemask_u16_to_u32_low)), - 0); - const uint32_t mask1 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( - one_or_two_bytes_bytemask_u16_to_u32_high, - one_byte_bytemask_u16_to_u32_high)), - 0); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); - - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; - __lsx_vst(utf8_1, utf8_output, 0); - utf8_output += row1[0]; - - buf += 8; - } - // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes. - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - // check for invalid input - if (__lsx_bnz_v(forbidden_bytemask)) { - return std::make_pair(nullptr, reinterpret_cast(utf8_output)); - } - - return std::make_pair(buf, reinterpret_cast(utf8_output)); -} - -std::pair -lsx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char32_t *start = buf; - const char32_t *end = buf + len; - - __m128i v_c080 = lsx_splat_u16(0xc080); - __m128i v_07ff = lsx_splat_u16(0x07ff); - __m128i v_dfff = lsx_splat_u16(0xdfff); - __m128i v_d800 = lsx_splat_u16(0xd800); - __m128i forbidden_bytemask = __lsx_vldi(0x0); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf > std::ptrdiff_t(16 + safety_margin)) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - __m128i nextin = __lsx_vld(reinterpret_cast(buf), 16); - - // Check if no bits set above 16th - if (__lsx_bz_v(__lsx_vpickod_h(in, nextin))) { - // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (lsx_convert_utf16_to_utf8.cpp) - __m128i utf16_packed = __lsx_vpickev_h(nextin, in); - - if (__lsx_bz_v(__lsx_vslt_hu(__lsx_vrepli_h(0x7F), - utf16_packed))) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - __m128i utf8_packed = __lsx_vpickev_b(utf16_packed, utf16_packed); - // 2. store (8 bytes) - __lsx_vst(utf8_packed, utf8_output, 0); - // 3. adjust pointers - buf += 8; - utf8_output += 8; - continue; // we are done for this round! - } - __m128i zero = __lsx_vldi(0); - if (__lsx_bz_v(__lsx_vslt_hu(v_07ff, utf16_packed))) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - - // t0 = [000a|aaaa|bbbb|bb00] - const __m128i t0 = __lsx_vslli_h(utf16_packed, 2); - // t1 = [000a|aaaa|0000|0000] - const __m128i t1 = __lsx_vand_v(t0, lsx_splat_u16(0x1f00)); - // t2 = [0000|0000|00bb|bbbb] - const __m128i t2 = __lsx_vand_v(utf16_packed, __lsx_vrepli_h(0x3f)); - // t3 = [000a|aaaa|00bb|bbbb] - const __m128i t3 = __lsx_vor_v(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m128i t4 = __lsx_vor_v(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - __m128i one_byte_bytemask = - __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F /*0x007F*/)); - __m128i utf8_unpacked = - __lsx_vbitsel_v(t4, utf16_packed, one_byte_bytemask); - // 3. prepare bitmask for 8-bit lookup - uint32_t m2 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(one_byte_bytemask), 0); - // 4. pack the bytes - const uint8_t *row = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lsx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle = __lsx_vld(row, 1); - __m128i utf8_packed = __lsx_vshuf_b(zero, utf8_unpacked, shuffle); - // 5. store bytes - __lsx_vst(utf8_packed, utf8_output, 0); - - // 6. adjust pointers - buf += 8; - utf8_output += row[0]; - continue; - } else { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - forbidden_bytemask = __lsx_vor_v( - __lsx_vand_v( - __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 - forbidden_bytemask); - if (__lsx_bnz_v(forbidden_bytemask)) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf8_output)); - } - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - single - UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two - UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three - UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- precompute - either byte 1 for case #2 or byte 2 for case #3. Note that they - differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, taking - into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m128i t0 = __lsx_vpickev_b(utf16_packed, utf16_packed); - t0 = __lsx_vilvl_b(t0, t0); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - __m128i v_3f7f = __lsx_vreplgr2vr_h(uint16_t(0x3F7F)); - __m128i t1 = __lsx_vand_v(t0, v_3f7f); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m128i t2 = __lsx_vor_v(t1, lsx_splat_u16(0x8000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m128i s0 = __lsx_vsrli_h(utf16_packed, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m128i s1 = __lsx_vslli_h(utf16_packed, 2); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - s1 = __lsx_vand_v(s1, lsx_splat_u16(0x3F00)); - // [00bb|bbbb|0000|aaaa] - __m128i s2 = __lsx_vor_v(s0, s1); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m128i v_c0e0 = __lsx_vreplgr2vr_h(uint16_t(0xC0E0)); - __m128i s3 = __lsx_vor_v(s2, v_c0e0); - // __m128i v_07ff = vmovq_n_u16((uint16_t)0x07FF); - __m128i one_or_two_bytes_bytemask = __lsx_vsle_hu(utf16_packed, v_07ff); - __m128i m0 = - __lsx_vandn_v(one_or_two_bytes_bytemask, lsx_splat_u16(0x4000)); - __m128i s4 = __lsx_vxor_v(s3, m0); - - // 4. expand code units 16-bit => 32-bit - __m128i out0 = __lsx_vilvl_h(s4, t2); - __m128i out1 = __lsx_vilvh_h(s4, t2); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m128i one_byte_bytemask = - __lsx_vsle_hu(utf16_packed, __lsx_vrepli_h(0x7F)); - - __m128i one_or_two_bytes_bytemask_u16_to_u32_low = - __lsx_vilvl_h(one_or_two_bytes_bytemask, zero); - __m128i one_or_two_bytes_bytemask_u16_to_u32_high = - __lsx_vilvh_h(one_or_two_bytes_bytemask, zero); - - __m128i one_byte_bytemask_u16_to_u32_low = - __lsx_vilvl_h(one_byte_bytemask, one_byte_bytemask); - __m128i one_byte_bytemask_u16_to_u32_high = - __lsx_vilvh_h(one_byte_bytemask, one_byte_bytemask); - - const uint32_t mask0 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( - one_or_two_bytes_bytemask_u16_to_u32_low, - one_byte_bytemask_u16_to_u32_low)), - 0); - const uint32_t mask1 = - __lsx_vpickve2gr_bu(__lsx_vmskltz_h(__lsx_vor_v( - one_or_two_bytes_bytemask_u16_to_u32_high, - one_byte_bytemask_u16_to_u32_high)), - 0); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; - __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = __lsx_vshuf_b(zero, out0, shuffle0); - - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = __lsx_vshuf_b(zero, out1, shuffle1); - - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; - __lsx_vst(utf8_1, utf8_output, 0); - utf8_output += row1[0]; - - buf += 8; - } - // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes. - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf8_output)); -} -/* end file src/lsx/lsx_convert_utf32_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lsx/lsx_convert_utf32_to_utf16.cpp */ -template -std::pair -lsx_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_out) { - uint16_t *utf16_output = reinterpret_cast(utf16_out); - const char32_t *end = buf + len; - - __m128i forbidden_bytemask = __lsx_vrepli_h(0); - __m128i v_d800 = lsx_splat_u16(0xd800); - __m128i v_dfff = lsx_splat_u16(0xdfff); - while (end - buf >= 8) { - __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); - - // Check if no bits set above 16th - if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { - __m128i utf16_packed = __lsx_vpickev_h(in1, in0); - forbidden_bytemask = __lsx_vor_v( - __lsx_vand_v( - __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 - forbidden_bytemask); - - if (!match_system(big_endian)) { - utf16_packed = lsx_swap_bytes(utf16_packed); - } - __lsx_vst(utf16_packed, utf16_output, 0); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 3; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = - uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } - - // check for invalid input - if (__lsx_bnz_v(forbidden_bytemask)) { - return std::make_pair(nullptr, reinterpret_cast(utf16_output)); - } - return std::make_pair(buf, reinterpret_cast(utf16_output)); -} - -template -std::pair -lsx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_out) { - uint16_t *utf16_output = reinterpret_cast(utf16_out); - const char32_t *start = buf; - const char32_t *end = buf + len; - - __m128i forbidden_bytemask = __lsx_vrepli_h(0); - __m128i v_d800 = lsx_splat_u16(0xd800); - __m128i v_dfff = lsx_splat_u16(0xdfff); - - while (end - buf >= 8) { - __m128i in0 = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); - // Check if no bits set above 16th - if (__lsx_bz_v(__lsx_vpickod_h(in1, in0))) { - __m128i utf16_packed = __lsx_vpickev_h(in1, in0); - - forbidden_bytemask = __lsx_vor_v( - __lsx_vand_v( - __lsx_vsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lsx_vsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 - forbidden_bytemask); - if (__lsx_bnz_v(forbidden_bytemask)) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf16_output)); - } - - if (!match_system(big_endian)) { - utf16_packed = lsx_swap_bytes(utf16_packed); - } - - __lsx_vst(utf16_packed, utf16_output, 0); - utf16_output += 8; - buf += 8; - } else { - size_t forward = 3; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = - uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf16_output)); -} -/* end file src/lsx/lsx_convert_utf32_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/lsx/lsx_base64.cpp */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ - -template -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // credit: Wojciech Muła - // SSE (lookup: pshufb improved unrolled) - const uint8_t *input = (const uint8_t *)src; - static const char *lookup_tbl = - isbase64url - ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" - : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - uint8_t *out = (uint8_t *)dst; - - v16u8 shuf; - __m128i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, - base64_tbl2, base64_tbl3; - if (srclen >= 16) { - shuf = v16u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; - v_fc0fc00 = __lsx_vreplgr2vr_w(uint32_t(0x0fc0fc00)); - v_3f03f0 = __lsx_vreplgr2vr_w(uint32_t(0x003f03f0)); - shift_r = __lsx_vreplgr2vr_w(uint32_t(0x0006000a)); - shift_l = __lsx_vreplgr2vr_w(uint32_t(0x00080004)); - base64_tbl0 = __lsx_vld(lookup_tbl, 0); - base64_tbl1 = __lsx_vld(lookup_tbl, 16); - base64_tbl2 = __lsx_vld(lookup_tbl, 32); - base64_tbl3 = __lsx_vld(lookup_tbl, 48); - } - - size_t i = 0; - for (; i + 52 <= srclen; i += 48) { - __m128i in0 = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 0); - __m128i in1 = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); - __m128i in2 = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 2); - __m128i in3 = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 3); - - in0 = __lsx_vshuf_b(in0, in0, (__m128i)shuf); - in1 = __lsx_vshuf_b(in1, in1, (__m128i)shuf); - in2 = __lsx_vshuf_b(in2, in2, (__m128i)shuf); - in3 = __lsx_vshuf_b(in3, in3, (__m128i)shuf); - - __m128i t0_0 = __lsx_vand_v(in0, v_fc0fc00); - __m128i t0_1 = __lsx_vand_v(in1, v_fc0fc00); - __m128i t0_2 = __lsx_vand_v(in2, v_fc0fc00); - __m128i t0_3 = __lsx_vand_v(in3, v_fc0fc00); - - __m128i t1_0 = __lsx_vsrl_h(t0_0, shift_r); - __m128i t1_1 = __lsx_vsrl_h(t0_1, shift_r); - __m128i t1_2 = __lsx_vsrl_h(t0_2, shift_r); - __m128i t1_3 = __lsx_vsrl_h(t0_3, shift_r); - - __m128i t2_0 = __lsx_vand_v(in0, v_3f03f0); - __m128i t2_1 = __lsx_vand_v(in1, v_3f03f0); - __m128i t2_2 = __lsx_vand_v(in2, v_3f03f0); - __m128i t2_3 = __lsx_vand_v(in3, v_3f03f0); - - __m128i t3_0 = __lsx_vsll_h(t2_0, shift_l); - __m128i t3_1 = __lsx_vsll_h(t2_1, shift_l); - __m128i t3_2 = __lsx_vsll_h(t2_2, shift_l); - __m128i t3_3 = __lsx_vsll_h(t2_3, shift_l); - - __m128i input0 = __lsx_vor_v(t1_0, t3_0); - __m128i input0_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input0); - __m128i input0_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, - __lsx_vsub_b(input0, __lsx_vldi(32))); - __m128i input0_mask = __lsx_vslei_bu(input0, 31); - __m128i input0_result = - __lsx_vbitsel_v(input0_shuf1, input0_shuf0, input0_mask); - __lsx_vst(input0_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; - - __m128i input1 = __lsx_vor_v(t1_1, t3_1); - __m128i input1_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input1); - __m128i input1_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, - __lsx_vsub_b(input1, __lsx_vldi(32))); - __m128i input1_mask = __lsx_vslei_bu(input1, 31); - __m128i input1_result = - __lsx_vbitsel_v(input1_shuf1, input1_shuf0, input1_mask); - __lsx_vst(input1_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; - - __m128i input2 = __lsx_vor_v(t1_2, t3_2); - __m128i input2_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input2); - __m128i input2_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, - __lsx_vsub_b(input2, __lsx_vldi(32))); - __m128i input2_mask = __lsx_vslei_bu(input2, 31); - __m128i input2_result = - __lsx_vbitsel_v(input2_shuf1, input2_shuf0, input2_mask); - __lsx_vst(input2_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; - - __m128i input3 = __lsx_vor_v(t1_3, t3_3); - __m128i input3_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, input3); - __m128i input3_shuf1 = __lsx_vshuf_b(base64_tbl3, base64_tbl2, - __lsx_vsub_b(input3, __lsx_vldi(32))); - __m128i input3_mask = __lsx_vslei_bu(input3, 31); - __m128i input3_result = - __lsx_vbitsel_v(input3_shuf1, input3_shuf0, input3_mask); - __lsx_vst(input3_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; - } - for (; i + 16 <= srclen; i += 12) { - - __m128i in = __lsx_vld(reinterpret_cast(input + i), 0); - - // bytes from groups A, B and C are needed in separate 32-bit lanes - // in = [DDDD|CCCC|BBBB|AAAA] - // - // an input triplet has layout - // [????????|ccdddddd|bbbbcccc|aaaaaabb] - // byte 3 byte 2 byte 1 byte 0 -- byte 3 comes from the next - // triplet - // - // shuffling changes the order of bytes: 1, 0, 2, 1 - // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] - // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ - // processed bits - in = __lsx_vshuf_b(in, in, (__m128i)shuf); - - // unpacking - // t0 = [0000cccc|cc000000|aaaaaa00|00000000] - __m128i t0 = __lsx_vand_v(in, v_fc0fc00); - // t1 = [00000000|00cccccc|00000000|00aaaaaa] - // ((c >> 6), (a >> 10)) - __m128i t1 = __lsx_vsrl_h(t0, shift_r); - - // t2 = [00000000|00dddddd|000000bb|bbbb0000] - __m128i t2 = __lsx_vand_v(in, v_3f03f0); - // t3 = [00dddddd|00000000|00bbbbbb|00000000] - // ((d << 8), (b << 4)) - __m128i t3 = __lsx_vsll_h(t2, shift_l); - - // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 - __m128i indices = __lsx_vor_v(t1, t3); - - __m128i indices_shuf0 = __lsx_vshuf_b(base64_tbl1, base64_tbl0, indices); - __m128i indices_shuf1 = __lsx_vshuf_b( - base64_tbl3, base64_tbl2, __lsx_vsub_b(indices, __lsx_vldi(32))); - __m128i indices_mask = __lsx_vslei_bu(indices, 31); - __m128i indices_result = - __lsx_vbitsel_v(indices_shuf1, indices_shuf0, indices_mask); - - __lsx_vst(indices_result, reinterpret_cast<__m128i *>(out), 0); - out += 16; - } - - return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, - srclen - i, options); -} - -static inline void compress(__m128i data, uint16_t mask, char *output) { - if (mask == 0) { - __lsx_vst(data, reinterpret_cast<__m128i *>(output), 0); - return; - } - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - - v2u64 shufmask = {tables::base64::thintable_epi8[mask1], - tables::base64::thintable_epi8[mask2]}; - - // we increment by 0x08 the second half of the mask - v4u32 hi = {0, 0, 0x08080808, 0x08080808}; - __m128i shufmask1 = __lsx_vadd_b((__m128i)shufmask, (__m128i)hi); - - // this is the version "nearly pruned" - __m128i pruned = __lsx_vshuf_b(data, data, shufmask1); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = tables::base64::BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - __lsx_vld(reinterpret_cast( - tables::base64::pshufb_combine_table + pop1 * 8), - 0); - __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); - - __lsx_vst(answer, reinterpret_cast<__m128i *>(output), 0); -} - -struct block64 { - __m128i chunks[4]; -}; - -template -static inline uint16_t to_base64_mask(__m128i *src, bool *error) { - const v16u8 ascii_space_tbl = {0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}; - // credit: aqrit - /* - '0'(0x30)-'9'(0x39) => delta_values_index = 4 - 'A'(0x41)-'Z'(0x5a) => delta_values_index = 4/5/12(4+8) - 'a'(0x61)-'z'(0x7a) => delta_values_index = 6/7/14(6+8) - '+'(0x2b) => delta_values_index = 3 - '/'(0x2f) => delta_values_index = 2+8 = 10 - '-'(0x2d) => delta_values_index = 2+8 = 10 - '_'(0x5f) => delta_values_index = 5+8 = 13 - */ - v16u8 delta_asso = {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, - 0x0, 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}; - v16i8 delta_values; - if (base64_url) { - delta_values = - v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), - int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}; - } else { - delta_values = - v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}; - } - - v16u8 check_asso; - if (base64_url) { - check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x06, 0x0B, 0x12}; - } else { - check_asso = v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F}; - } - - v16i8 check_values; - if (base64_url) { - check_values = v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), - int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}; - } else { - check_values = - v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), - int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}; - } - - const __m128i shifted = __lsx_vsrli_b(*src, 3); - __m128i asso_index = __lsx_vand_v(*src, __lsx_vldi(0xF)); - const __m128i delta_hash = - __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)delta_asso, (__m128i)delta_asso, - (__m128i)asso_index), - shifted); - const __m128i check_hash = - __lsx_vavgr_bu(__lsx_vshuf_b((__m128i)check_asso, (__m128i)check_asso, - (__m128i)asso_index), - shifted); - - const __m128i out = - __lsx_vsadd_b(__lsx_vshuf_b((__m128i)delta_values, (__m128i)delta_values, - (__m128i)delta_hash), - *src); - const __m128i chk = - __lsx_vsadd_b(__lsx_vshuf_b((__m128i)check_values, (__m128i)check_values, - (__m128i)check_hash), - *src); - unsigned int mask = __lsx_vpickve2gr_hu(__lsx_vmskltz_b(chk), 0); - if (mask) { - __m128i ascii_space = __lsx_vseq_b(__lsx_vshuf_b((__m128i)ascii_space_tbl, - (__m128i)ascii_space_tbl, - (__m128i)asso_index), - *src); - *error |= - (mask != __lsx_vpickve2gr_hu(__lsx_vmskltz_b((__m128i)ascii_space), 0)); - } - - *src = out; - return (uint16_t)mask; -} - -template -static inline uint64_t to_base64_mask(block64 *b, bool *error) { - *error = 0; - uint64_t m0 = to_base64_mask(&b->chunks[0], error); - uint64_t m1 = to_base64_mask(&b->chunks[1], error); - uint64_t m2 = to_base64_mask(&b->chunks[2], error); - uint64_t m3 = to_base64_mask(&b->chunks[3], error); - return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48); -} - -static inline void copy_block(block64 *b, char *output) { - __lsx_vst(b->chunks[0], reinterpret_cast<__m128i *>(output), 0); - __lsx_vst(b->chunks[1], reinterpret_cast<__m128i *>(output), 16); - __lsx_vst(b->chunks[2], reinterpret_cast<__m128i *>(output), 32); - __lsx_vst(b->chunks[3], reinterpret_cast<__m128i *>(output), 48); -} - -static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { - uint64_t nmask = ~mask; - uint64_t count = - __lsx_vpickve2gr_d(__lsx_vpcnt_h(__lsx_vreplgr2vr_d(nmask)), 0); - uint16_t *count_ptr = (uint16_t *)&count; - compress(b->chunks[0], uint16_t(mask), output); - compress(b->chunks[1], uint16_t(mask >> 16), output + count_ptr[0]); - compress(b->chunks[2], uint16_t(mask >> 32), - output + count_ptr[0] + count_ptr[1]); - compress(b->chunks[3], uint16_t(mask >> 48), - output + count_ptr[0] + count_ptr[1] + count_ptr[2]); - return count_ones(nmask); -} - -// The caller of this function is responsible to ensure that there are 64 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = __lsx_vld(reinterpret_cast(src), 0); - b->chunks[1] = __lsx_vld(reinterpret_cast(src), 16); - b->chunks[2] = __lsx_vld(reinterpret_cast(src), 32); - b->chunks[3] = __lsx_vld(reinterpret_cast(src), 48); -} - -// The caller of this function is responsible to ensure that there are 128 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char16_t *src) { - __m128i m1 = __lsx_vld(reinterpret_cast(src), 0); - __m128i m2 = __lsx_vld(reinterpret_cast(src), 16); - __m128i m3 = __lsx_vld(reinterpret_cast(src), 32); - __m128i m4 = __lsx_vld(reinterpret_cast(src), 48); - __m128i m5 = __lsx_vld(reinterpret_cast(src), 64); - __m128i m6 = __lsx_vld(reinterpret_cast(src), 80); - __m128i m7 = __lsx_vld(reinterpret_cast(src), 96); - __m128i m8 = __lsx_vld(reinterpret_cast(src), 112); - b->chunks[0] = __lsx_vssrlni_bu_h(m2, m1, 0); - b->chunks[1] = __lsx_vssrlni_bu_h(m4, m3, 0); - b->chunks[2] = __lsx_vssrlni_bu_h(m6, m5, 0); - b->chunks[3] = __lsx_vssrlni_bu_h(m8, m7, 0); -} - -static inline void base64_decode(char *out, __m128i str) { - __m128i t0 = __lsx_vor_v( - __lsx_vslli_w(str, 26), - __lsx_vslli_w(__lsx_vand_v(str, lsx_splat_u32(0x0000FF00)), 12)); - __m128i t1 = __lsx_vsrli_w(__lsx_vand_v(str, lsx_splat_u32(0x003F0000)), 2); - __m128i t2 = __lsx_vor_v(t0, t1); - __m128i t3 = __lsx_vor_v(t2, __lsx_vsrli_w(str, 16)); - const v16u8 pack_shuffle = {3, 2, 1, 7, 6, 5, 11, 10, - 9, 15, 14, 13, 0, 0, 0, 0}; - t3 = __lsx_vshuf_b(t3, t3, (__m128i)pack_shuffle); - - // Store the output: - // we only need 12. - __lsx_vstelm_d(t3, out, 0, 0); - __lsx_vstelm_w(t3, out + 8, 0, 2); -} -// decode 64 bytes and output 48 bytes -static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, __lsx_vld(reinterpret_cast(src), 0)); - base64_decode(out + 12, - __lsx_vld(reinterpret_cast(src), 16)); - base64_decode(out + 24, - __lsx_vld(reinterpret_cast(src), 32)); - base64_decode(out + 36, - __lsx_vld(reinterpret_cast(src), 48)); -} -static inline void base64_decode_block_safe(char *out, const char *src) { - base64_decode_block(out, src); -} -static inline void base64_decode_block(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); - base64_decode(out + 12, b->chunks[1]); - base64_decode(out + 24, b->chunks[2]); - base64_decode(out + 36, b->chunks[3]); -} -static inline void base64_decode_block_safe(char *out, block64 *b) { - base64_decode_block(out, b); -} - -template -full_result -compress_decode_base64(char *dst, const char_type *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - const char_type *const srcinit = src; - const char *const dstinit = dst; - const char_type *const srcend = src + srclen; - - constexpr size_t block_size = 10; - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const char_type *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b; - load_block(&b, src); - src += 64; - bool error = false; - uint64_t badcharmask = to_base64_mask(&b, &error); - if (badcharmask) { - if (error && !ignore_garbage) { - src -= 64; - while (src < srcend && scalar::base64::is_eight_byte(*src) && - to_base64[uint8_t(*src)] <= 64) { - src++; - } - if (src < srcend) { - // should never happen - } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - } - - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); - } else { - // optimization opportunity: if bufferptr == buffer and mask == 0, we - // can avoid the call to compress_block and decode directly. - copy_block(&b, bufferptr); - bufferptr += 64; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 1); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } - } - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if ((!scalar::base64::is_eight_byte(*src) || val > 64) && - !ignore_garbage) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } - } - - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - base64_decode_block(dst, buffer_start); - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::u32_swap_bytes(triple); - std::memcpy(dst, &triple, 4); - - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::u32_swap_bytes(triple); - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - if (!ignore_garbage) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - } else { - while (to_base64[uint8_t(*(src - 1))] >= 64) { - src--; - } - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - r.input_count += size_t(src - srcinit); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - return r; - } else { - r.output_count += size_t(dst - dstinit); - } - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } - } - return r; - } - if (equalsigns > 0 && !ignore_garbage) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } - } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} -/* end file src/lsx/lsx_base64.cpp */ -#endif // SIMDUTF_FEATURE_BASE64 - -} // namespace -} // namespace lsx -} // namespace simdutf - -/* begin file src/generic/buf_block_reader.h */ -namespace simdutf { -namespace lsx { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with -// spaces -template struct buf_block_reader { -public: - simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdutf_really_inline size_t block_index(); - simdutf_really_inline bool has_full_block() const; - simdutf_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 - * (in which case this function fills the buffer with spaces and returns 0. In - * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder - * block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdutf_really_inline size_t get_remainder(uint8_t *dst) const; - simdutf_really_inline void advance(); - -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text_64(const uint8_t *text) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text(const simd8x64 &in) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - in.store(reinterpret_cast(buf)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - if (buf[i] < ' ') { - buf[i] = '_'; - } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdutf_unused static char *format_mask(uint64_t mask) { - static char *buf = reinterpret_cast(malloc(64 + 1)); - for (size_t i = 0; i < 64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdutf_really_inline -buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) - : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, - idx{0} {} - -template -simdutf_really_inline size_t buf_block_reader::block_index() { - return idx; -} - -template -simdutf_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdutf_really_inline const uint8_t * -buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdutf_really_inline size_t -buf_block_reader::get_remainder(uint8_t *dst) const { - if (len == idx) { - return 0; - } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, - STEP_SIZE); // std::memset STEP_SIZE because it is more efficient - // to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdutf_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/buf_block_reader.h */ -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8_validation { - -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = {255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 0b11110000u - 1, - 0b11100000u - 1, - 0b11000000u - 1}; - const simd8 max_value( - &max_array[sizeof(max_array) - sizeof(simd8)]); - return input.gt_bits(max_value); -} - -struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error |= this->prev_incomplete; - } - - simdutf_really_inline void check_next_input(const simd8x64 &input) { - if (simdutf_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = - is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; - } - } - - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -/* begin file src/generic/utf8_validation/utf8_validator.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8_validation { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return !c.errors(); -} - -bool generic_validate_utf8(const char *input, size_t length) { - return generic_validate_utf8( - reinterpret_cast(input), length); -} - -/** - * Validates that the string is actual UTF-8 and stops on errors. - */ -template -result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input + count), length - count); - res.count += count; - return res; - } - reader.advance(); - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input) + count, length - count); - res.count += count; - return res; - } else { - return result(error_code::SUCCESS, length); - } -} - -result generic_validate_utf8_with_errors(const char *input, size_t length) { - return generic_validate_utf8_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_ASCII -/* begin file src/generic/ascii_validation.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace ascii_validation { - -bool generic_validate_ascii(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} - -result generic_validate_ascii_with_errors(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } - reader.advance(); - - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} - -} // namespace ascii_validation -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/ascii_validation.h */ -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - // transcoding from UTF-8 to Latin 1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // For UTF-8 to Latin 1, we can allow any ASCII character, and any - // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or - // 0b11000010 and nothing else. - // - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - constexpr const uint8_t FORBIDDEN = 0xff; - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - FORBIDDEN, - // 1110____ ________ - FORBIDDEN, - // 1111____ ________ - FORBIDDEN); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - FORBIDDEN, - // ____0101 ________ - FORBIDDEN, - // ____011_ ________ - FORBIDDEN, FORBIDDEN, - - // ____1___ ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, - // ____1101 ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 16; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); - if (howmany == 0) { - return 0; - } - latin1_output += howmany; - } - return latin1_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - latin1_output += res.count; - } - } - return result(error_code::SUCCESS, latin1_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline size_t convert_valid(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last - // 16 bytes, and if the data is valid, then it is entirely safe because 16 - // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally - // assume that you have valid UTF-8 input, so we are going to go back from the - // end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (pos < size) { - size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, - latin1_output); - latin1_output += howmany; - } - return latin1_output - start; -} - -} // namespace utf8_to_latin1 -} // namespace -} // namespace lsx -} // namespace simdutf - // namespace simdutf -/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - // transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8_to_utf16 { - -using namespace simd; - -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8_to_utf16 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - template - simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); - if (howmany == 0) { - return 0; - } - utf16_output += howmany; - } - return utf16_output - start; - } - - template - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf16_output += res.count; - } - } - return result(error_code::SUCCESS, utf16_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -/* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 2; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - - size_t iterations = 0; - size_t pos = 0; - size_t count = 0; - for (; pos + N <= size; pos += N) { - const auto input = - vector_i8::load(reinterpret_cast(in + pos)); - - const auto continuation = input > int8_t(-65); - const auto utf_4bytes = vector_u8(input.value) >= uint8_t(240); - - local -= vector_u8(continuation); - local -= vector_u8(utf_4bytes); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - // transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8_to_utf32 { - -using namespace simd; - -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { - size_t pos = 0; - char32_t *start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - size_t max_starting_point = (pos + 64) - 12; - while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - } - } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8_to_utf32 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // we have an error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); - if (howmany == 0) { - return 0; - } - utf32_output += howmany; - } - return utf32_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf32_output += res.count; - } - } - return result(error_code::SUCCESS, utf32_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/generic/utf8.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); - } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} - -#ifdef SIMDUTF_SIMD_HAS_BYTEMASK -simdutf_really_inline size_t count_code_points_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 4; - - size_t pos = 0; - size_t count = 0; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - size_t iterations = 0; - for (; pos + 4 * N <= size; pos += 4 * N) { - const auto input0 = - simd8::load(reinterpret_cast(in + pos + 0 * N)); - const auto input1 = - simd8::load(reinterpret_cast(in + pos + 1 * N)); - const auto input2 = - simd8::load(reinterpret_cast(in + pos + 2 * N)); - const auto input3 = - simd8::load(reinterpret_cast(in + pos + 3 * N)); - const auto mask0 = input0 > int8_t(-65); - const auto mask1 = input1 > int8_t(-65); - const auto mask2 = input2 > int8_t(-65); - const auto mask3 = input3 > int8_t(-65); - - local -= vector_u8(mask0); - local -= vector_u8(mask1); - local -= vector_u8(mask2); - local -= vector_u8(mask3); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} -#endif // SIMDUTF_SIMD_HAS_BYTEMASK - -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/generic/utf16/count_code_points_bytemask.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf16 { - -using namespace simd; - -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - using vector_u16 = simd16; - constexpr size_t N = vector_u16::ELEMENTS; - - size_t pos = 0; - size_t count = 0; - - constexpr size_t max_itertions = 65535; - const auto one = vector_u16::splat(1); - const auto zero = vector_u16::zero(); - - size_t itertion = 0; - - auto counters = zero; - for (; pos < size / N * N; pos += N) { - auto input = vector_u16::load(in + pos); - if (!match_system(big_endian)) { - input = input.swap_bytes(); - } - - const auto t0 = input & uint16_t(0xfc00); - const auto t1 = t0 ^ uint16_t(0xdc00); - - // t2[0] == 1 iff input[0] outside range 0xdc00..dfff (the word is not a - // high surrogate) - const auto t2 = min(t1, one); - - counters += t2; - - itertion += 1; - if (itertion == max_itertions) { - count += counters.sum(); - counters = zero; - itertion = 0; - } - } - - if (itertion > 0) { - count += counters.sum(); - } - - return count + - scalar::utf16::count_code_points(in + pos, size - pos); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf16/count_code_points_bytemask.h */ -/* begin file src/generic/utf16/change_endianness.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf16 { - -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; - - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; - } - - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf16/change_endianness.h */ -/* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf16 { - -using namespace simd; - -template -simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, - size_t size) { - size_t pos = 0; - - using vector_u16 = simd16; - constexpr size_t N = vector_u16::ELEMENTS; - - const auto one = vector_u16::splat(1); - - auto v_count = vector_u16::zero(); - - // each char16 yields at least one byte - size_t count = size / N * N; - - // in a single iteration the increment is 0, 1 or 2, despite we have - // three additions - constexpr size_t max_iterations = 65535 / 2; - size_t iteration = max_iterations; - - for (; pos < size / N * N; pos += N) { - auto input = vector_u16::load(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input = input.swap_bytes(); - } - // 0xd800 .. 0xdbff - low surrogate - // 0xdc00 .. 0xdfff - high surrogate - const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); - - // c0 - chars that yield 2- or 3-byte UTF-8 codes - const auto c0 = min(input & uint16_t(0xff80), one); - - // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) - const auto c1 = min(input & uint16_t(0xf800), one); - - /* - Explanation how the counting works. - - In the case of a non-surrogate character we count: - * always 1 -- see how `count` is initialized above; - * c0 = 1 if the current char yields 2 or 3 bytes; - * c1 = 1 if the current char yields 3 bytes. - - Thus, we always have correct count for the current char: - from 1, 2 or 3 bytes. - - A trickier part is how we count surrogate pairs. Whether - we encounter a surrogate (low or high), we count it as - 3 chars and then minus 1 (`is_surrogate` is -1 or 0). - Each surrogate char yields 2. A surrogate pair, that - is a low surrogate followed by a high one, yields - the expected 4 bytes. - - It also correctly handles cases when low surrogate is - processed by the this loop, but high surrogate is counted - by the scalar procedure. The scalar procedure uses exactly - the described approach, thanks to that for valid UTF-16 - strings it always count correctly. - */ - v_count += c0; - v_count += c1; - v_count += vector_u16(is_surrogate); - - iteration -= 1; - if (iteration == 0) { - count += v_count.sum(); - v_count = vector_u16::zero(); - iteration = max_iterations; - } - } - - if (iteration > 0) { - count += v_count.sum(); - } - - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ -/* begin file src/generic/utf16/utf32_length_from_utf16.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf16 { - -template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf16/utf32_length_from_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/validate_utf16.h */ -namespace simdutf { -namespace lsx { -namespace { -namespace utf16 { -/* - UTF-16 validation - -------------------------------------------------- - - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We are going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ -template -const result validate_utf16_with_errors(const char16_t *input, size_t size) { - if (simdutf_unlikely(size == 0)) { - return result(error_code::SUCCESS, 0); - } - - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 - // and yields a single vector having only higher bytes of characters. - const auto in = utf16_gather_high_bytes(in0, in1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/validate_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -/* begin file src/generic/utf32.h */ -#include - -namespace simdutf { -namespace lsx { -namespace { -namespace utf32 { - -template T min(T a, T b) { return a <= b ? a : b; } - -simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, - size_t length) { - using vector_u32 = simd32; - - const char32_t *start = input; - - // we add up to three ones in a single iteration (see the vectorized loop in - // section #2 below) - const size_t max_increment = 3; - - const size_t N = vector_u32::ELEMENTS; - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - const auto v_0000007f = vector_u32::splat(0x0000007f); - const auto v_000007ff = vector_u32::splat(0x000007ff); - const auto v_0000ffff = vector_u32::splat(0x0000ffff); -#else - const auto v_ffffff80 = vector_u32::splat(0xffffff80); - const auto v_fffff800 = vector_u32::splat(0xfffff800); - const auto v_ffff0000 = vector_u32::splat(0xffff0000); - const auto one = vector_u32::splat(1); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - size_t counter = 0; - - // 1. vectorized loop unrolled 4 times - { - // we use vector of uint32 counters, this is why this limit is used - const size_t max_iterations = - std::numeric_limits::max() / (max_increment * 4); - size_t blocks = length / (N * 4); - length -= blocks * (N * 4); - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - simd32 acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in0 = vector_u32(input + 0 * N); - const auto in1 = vector_u32(input + 1 * N); - const auto in2 = vector_u32(input + 2 * N); - const auto in3 = vector_u32(input + 3 * N); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in0 > v_0000007f); - acc -= as_vector_u32(in1 > v_0000007f); - acc -= as_vector_u32(in2 > v_0000007f); - acc -= as_vector_u32(in3 > v_0000007f); - - acc -= as_vector_u32(in0 > v_000007ff); - acc -= as_vector_u32(in1 > v_000007ff); - acc -= as_vector_u32(in2 > v_000007ff); - acc -= as_vector_u32(in3 > v_000007ff); - - acc -= as_vector_u32(in0 > v_0000ffff); - acc -= as_vector_u32(in1 > v_0000ffff); - acc -= as_vector_u32(in2 > v_0000ffff); - acc -= as_vector_u32(in3 > v_0000ffff); -#else - acc += min(one, in0 & v_ffffff80); - acc += min(one, in1 & v_ffffff80); - acc += min(one, in2 & v_ffffff80); - acc += min(one, in3 & v_ffffff80); - - acc += min(one, in0 & v_fffff800); - acc += min(one, in1 & v_fffff800); - acc += min(one, in2 & v_fffff800); - acc += min(one, in3 & v_fffff800); - - acc += min(one, in0 & v_ffff0000); - acc += min(one, in1 & v_ffff0000); - acc += min(one, in2 & v_ffff0000); - acc += min(one, in3 & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += 4 * N; - } - - counter += acc.sum(); - } - } - - // 2. vectorized loop for tail - { - const size_t max_iterations = - std::numeric_limits::max() / max_increment; - size_t blocks = length / N; - length -= blocks * N; - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - auto acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in = vector_u32(input); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in > v_0000007f); - acc -= as_vector_u32(in > v_000007ff); - acc -= as_vector_u32(in > v_0000ffff); -#else - acc += min(one, in & v_ffffff80); - acc += min(one, in & v_fffff800); - acc += min(one, in & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += N; - } - - counter += acc.sum(); - } - } - - const size_t consumed = input - start; - if (consumed != 0) { - // We don't count 0th bytes in the vectorized loops above, this - // is why we need to count them in the end. - counter += consumed; - } - - return counter + scalar::utf32::utf8_length_from_utf32(input, length); -} - -} // namespace utf32 -} // unnamed namespace -} // namespace lsx -} // namespace simdutf -/* end file src/generic/utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 - -// -// Implementation-specific overrides -// -namespace simdutf { -namespace lsx { - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - // todo: reimplement as a one-pass algorithm. - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - int out = 0; - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } - } - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return lsx::utf8_validation::generic_validate_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return lsx::utf8_validation::generic_validate_utf8_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return lsx::ascii_validation::generic_validate_ascii(buf, len); -} - -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return lsx::ascii_validation::generic_validate_ascii_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const auto res = - lsx::utf16::validate_utf16_with_errors(buf, len); - - if (res.is_err()) { - return false; - } - - if (res.count != len) { - return scalar::utf16::validate(buf + res.count, - len - res.count); - } - - return true; -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const auto res = - lsx::utf16::validate_utf16_with_errors(buf, len); - - if (res.is_err()) { - return false; - } - - if (res.count != len) { - return scalar::utf16::validate(buf + res.count, - len - res.count); - } - - return true; -} - -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - const result res = - lsx::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - const result scalar_res = - scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - const result res = - lsx::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - const result scalar_res = - scalar::utf16::validate_with_errors(buf + res.count, - len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const char32_t *tail = lsx_validate_utf32le(buf, len); - if (tail) { - return scalar::utf32::validate(tail, len - (tail - buf)); - } else { - return false; - } -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - result res = lsx_validate_utf32le_with_errors(buf, len); - if (res.count != len) { - result scalar_res = - scalar::utf32::validate_with_errors(buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - lsx_convert_latin1_to_utf8(buf, len, utf8_output); - size_t converted_chars = ret.second - utf8_output; - - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - lsx_convert_latin1_to_utf16le(buf, len, utf16_output); - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - lsx_convert_latin1_to_utf16be(buf, len, utf16_output); - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - lsx_convert_latin1_to_utf32(buf, len, utf32_output); - size_t converted_chars = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert(buf, len, latin1_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - utf8_to_latin1::validating_transcoder converter; - return converter.convert_with_errors(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - return lsx::utf8_to_latin1::convert_valid(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert(buf, len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return utf8_to_utf32::convert_valid(input, size, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lsx_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lsx_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lsx_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lsx_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf16be_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf16le_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - lsx_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - lsx_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lsx_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lsx_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - if (simdutf_unlikely(len == 0)) { - return 0; - } - std::pair ret = - lsx_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lsx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - lsx_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - lsx_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lsx_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lsx_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lsx_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lsx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = scalar::utf32_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lsx_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( - ret.first, len - (ret.first - buf), ret.second); - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf32_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - lsx_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - lsx_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lsx_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lsx_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16le(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16be(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16le_to_utf32(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16be_to_utf32(buf, len, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - utf16::change_endianness_utf16(input, length, output); -} - -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} - -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - const uint8_t *data = reinterpret_cast(input); - const uint8_t *data_end = data + length; - uint64_t result = 0; - while (data_end - data > 16) { - uint64_t two_bytes = 0; - __m128i input_vec = __lsx_vld(data, 0); - two_bytes = - __lsx_vpickve2gr_hu(__lsx_vpcnt_h(__lsx_vmskltz_b(input_vec)), 0); - result += 16 + two_bytes; - data += 16; - } - return result + scalar::latin1::utf8_length_from_latin1((const char *)data, - data_end - data); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16_bytemask(input, - length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::utf16_length_from_utf8_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return utf32::utf8_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - const __m128i v_ffff = lsx_splat_u32(0x0000ffff); - size_t pos = 0; - size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); - const __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); - size_t surrogate_count = __lsx_vpickve2gr_bu( - __lsx_vpcnt_b(__lsx_vmskltz_w(surrogate_bytemask)), 0); - count += 4 + surrogate_count; - } - return count + - scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - if (options & base64_url) { - return encode_base64(output, input, length, options); - } else { - return encode_base64(output, input, length, options); - } -} -#endif // SIMDUTF_FEATURE_BASE64 -} // namespace lsx -} // namespace simdutf - -/* begin file src/simdutf/lsx/end.h */ -#undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP -/* end file src/simdutf/lsx/end.h */ -/* end file src/lsx/implementation.cpp */ -#endif -#if SIMDUTF_IMPLEMENTATION_LASX -/* begin file src/lasx/implementation.cpp */ -/* begin file src/simdutf/lasx/begin.h */ -// redefining SIMDUTF_IMPLEMENTATION to "lasx" -// #define SIMDUTF_IMPLEMENTATION lasx -#define SIMDUTF_SIMD_HAS_UNSIGNED_CMP 1 -/* end file src/simdutf/lasx/begin.h */ -namespace simdutf { -namespace lasx { -namespace { -#ifndef SIMDUTF_LASX_H - #error "lasx.h must be included" -#endif -using namespace simd; - -#if SIMDUTF_FEATURE_UTF8 -// convert vmskltz/vmskgez/vmsknz to -// simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes index -const uint8_t lasx_1_2_utf8_bytes_mask[] = { - 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, - 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, - 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88, - 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79, - 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, - 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, - 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104, - 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63, - 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148, - 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147, - 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152, - 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143, - 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164, - 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163, - 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168, - 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, - 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254, - 255}; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 -simdutf_really_inline __m128i lsx_swap_bytes(__m128i vec) { - return __lsx_vshuf4i_b(vec, 0b10110001); -} -simdutf_really_inline __m256i lasx_swap_bytes(__m256i vec) { - return __lasx_xvshuf4i_b(vec, 0b10110001); -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || \ - SIMDUTF_FEATURE_UTF8 -simdutf_really_inline bool is_ascii(const simd8x64 &input) { - return input.is_ascii(); -} -#endif // SIMDUTF_FEATURE_ASCII || SIMDUTF_FEATURE_DETECT_ENCODING || - // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_really_inline simd8 -must_be_2_3_continuation(const simd8 prev2, - const simd8 prev3) { - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - return is_third_byte ^ is_fourth_byte; -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32) -// common functions for utf8 conversions -simdutf_really_inline __m128i convert_utf8_3_byte_to_utf16(__m128i in) { - // Low half contains 10bbbbbb|10cccccc - // High half contains 1110aaaa|1110aaaa - const v16u8 sh = {2, 1, 5, 4, 8, 7, 11, 10, 0, 0, 3, 3, 6, 6, 9, 9}; - const v8u16 v0fff = {0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff, 0xfff}; - - __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, (__m128i)sh); - // 1110aaaa => aaaa0000 - __m128i perm_high = __lsx_vslli_b(__lsx_vbsrl_v(perm, 8), 4); - // 10bbbbbb 10cccccc => 0010bbbb bbcccccc - __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), /* perm >> 2*/ - perm, __lsx_vrepli_h(0x3f) /* 0x003f */); - // 0010bbbb bbcccccc => aaaabbbb bbcccccc - composed = __lsx_vbitsel_v(perm_high, composed, (__m128i)v0fff); - - return composed; -} - -simdutf_really_inline __m128i convert_utf8_2_byte_to_utf16(__m128i in) { - // 10bbbbb 110aaaaa => 00bbbbb 000aaaaa - __m128i composed = __lsx_vand_v(in, __lsx_vldi(0x3f)); - // 00bbbbbb 000aaaaa => 00000aaa aabbbbbb - composed = __lsx_vbitsel_v( - __lsx_vsrli_h(__lsx_vslli_h(composed, 8), 2), /* (aaaaa << 8) >> 2 */ - __lsx_vsrli_h(composed, 8), /* bbbbbb >> 8 */ - __lsx_vrepli_h(0x3f)); /* 0x003f */ - return composed; -} - -simdutf_really_inline __m128i -convert_utf8_1_to_2_byte_to_utf16(__m128i in, size_t shufutf8_idx) { - // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters. - // This is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. - __m128i sh = - __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]), - 0); - // Shuffle - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 110aaaaa 10bbbbbb - __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000000 00bbbbbb - __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_h(0x7f)); // 6 or 7 bits - // 1 byte: 00000000 00000000 - // 2 byte: 00000aaa aa000000 - __m128i v1f00 = lsx_splat_u16(0x1f00); - __m128i composed = __lsx_vsrli_h(__lsx_vand_v(perm, v1f00), 2); // 5 bits - // Combine with a shift right accumulate - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 00000aaa aabbbbbb - composed = __lsx_vadd_h(ascii, composed); - return composed; -} -#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 || - // SIMDUTF_FEATURE_UTF32) - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/lasx/lasx_validate_utf16.cpp */ -template -simd8 utf16_gather_high_bytes(const simd16 in0, - const simd16 in1) { - if (big_endian) { - const auto mask = simd16(0x00ff); - const auto t0 = in0 & mask; - const auto t1 = in1 & mask; - - return simd16::pack(t0, t1); - } else { - return simd16::pack_shifted_right<8>(in0, in1); - } -} -/* end file src/lasx/lasx_validate_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/lasx/lasx_validate_utf32le.cpp */ -const char32_t *lasx_validate_utf32le(const char32_t *input, size_t size) { - const char32_t *end = input + size; - - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)input & 0x1F) && input < end) { - uint32_t word = *input++; - if (word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) { - return nullptr; - } - } - - __m256i offset = lasx_splat_u32(0xffff2000); - __m256i standardoffsetmax = lasx_splat_u32(0xfffff7ff); - __m256i standardmax = lasx_splat_u32(0x10ffff); - __m256i currentmax = __lasx_xvldi(0x0); - __m256i currentoffsetmax = __lasx_xvldi(0x0); - - while (input + 8 < end) { - __m256i in = __lasx_xvld(reinterpret_cast(input), 0); - currentmax = __lasx_xvmax_wu(in, currentmax); - // 0xD8__ + 0x2000 = 0xF8__ => 0xF8__ > 0xF7FF - currentoffsetmax = - __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); - input += 8; - } - __m256i is_zero = - __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); - if (__lasx_xbnz_v(is_zero)) { - return nullptr; - } - - is_zero = __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (__lasx_xbnz_v(is_zero)) { - return nullptr; - } - return input; -} - -const result lasx_validate_utf32le_with_errors(const char32_t *input, - size_t size) { - const char32_t *start = input; - const char32_t *end = input + size; - - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)input & 0x1F) && input < end) { - uint32_t word = *input; - if (word > 0x10FFFF) { - return result(error_code::TOO_LARGE, input - start); - } - if (word >= 0xD800 && word <= 0xDFFF) { - return result(error_code::SURROGATE, input - start); - } - input++; - } - - __m256i offset = lasx_splat_u32(0xffff2000); - __m256i standardoffsetmax = lasx_splat_u32(0xfffff7ff); - __m256i standardmax = lasx_splat_u32(0x10ffff); - __m256i currentmax = __lasx_xvldi(0x0); - __m256i currentoffsetmax = __lasx_xvldi(0x0); - - while (input + 8 < end) { - __m256i in = __lasx_xvld(reinterpret_cast(input), 0); - currentmax = __lasx_xvmax_wu(in, currentmax); - currentoffsetmax = - __lasx_xvmax_wu(__lasx_xvadd_w(in, offset), currentoffsetmax); - - __m256i is_zero = - __lasx_xvxor_v(__lasx_xvmax_wu(currentmax, standardmax), standardmax); - if (__lasx_xbnz_v(is_zero)) { - return result(error_code::TOO_LARGE, input - start); - } - is_zero = - __lasx_xvxor_v(__lasx_xvmax_wu(currentoffsetmax, standardoffsetmax), - standardoffsetmax); - if (__lasx_xbnz_v(is_zero)) { - return result(error_code::SURROGATE, input - start); - } - input += 8; - } - - return result(error_code::SUCCESS, input - start); -} -/* end file src/lasx/lasx_validate_utf32le.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_latin1_to_utf8.cpp */ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ - -std::pair -lasx_convert_latin1_to_utf8(const char *latin1_input, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const size_t safety_margin = 12; - const char *end = latin1_input + len; - - // We always write 16 bytes, of which more than the first 8 bytes - // are valid. A safety margin of 8 is more than sufficient. - while (end - latin1_input >= std::ptrdiff_t(16 + safety_margin)) { - __m128i in8 = __lsx_vld(reinterpret_cast(latin1_input), 0); - uint32_t ascii_mask = __lsx_vpickve2gr_wu(__lsx_vmskgez_b(in8), 0); - if (ascii_mask == 0xFFFF) { - __lsx_vst(in8, utf8_output, 0); - utf8_output += 16; - latin1_input += 16; - continue; - } - // We just fallback on UTF-16 code. This could be optimized/simplified - // further. - __m256i in16 = __lasx_vext2xv_hu_bu(____m256i(in8)); - // 1. prepare 2-byte values - // input 8-bit word : [aabb|bbbb] x 16 - // expected output : [1100|00aa|10bb|bbbb] x 16 - // t0 = [0000|00aa|bbbb|bb00] - __m256i t0 = __lasx_xvslli_h(in16, 2); - // t1 = [0000|00aa|0000|0000] - __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x300)); - // t3 = [0000|00aa|00bb|bbbb] - __m256i t2 = __lasx_xvbitsel_v(t1, in16, __lasx_xvrepli_h(0x3f)); - // t4 = [1100|00aa|10bb|bbbb] - __m256i t3 = __lasx_xvor_v(t2, __lasx_xvreplgr2vr_h(uint16_t(0xc080))); - // merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = __lasx_xvsle_hu(in16, __lasx_xvrepli_h(0x7F)); - __m256i utf8_unpacked = __lasx_xvbitsel_v(t3, in16, one_byte_bytemask); - - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[(ascii_mask & 0xFF)]][0]; - __m128i shuffle0 = __lsx_vld(row0 + 1, 0); - __m128i utf8_unpacked_lo = lasx_extracti128_lo(utf8_unpacked); - __m128i utf8_packed0 = - __lsx_vshuf_b(utf8_unpacked_lo, utf8_unpacked_lo, shuffle0); - __lsx_vst(utf8_packed0, utf8_output, 0); - utf8_output += row0[0]; - - const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[(ascii_mask >> 8)]][0]; - __m128i shuffle1 = __lsx_vld(row1 + 1, 0); - __m128i utf8_unpacked_hi = lasx_extracti128_hi(utf8_unpacked); - __m128i utf8_packed1 = - __lsx_vshuf_b(utf8_unpacked_hi, utf8_unpacked_hi, shuffle1); - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - latin1_input += 16; - } // while - - return std::make_pair(latin1_input, reinterpret_cast(utf8_output)); -} -/* end file src/lasx/lasx_convert_latin1_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_latin1_to_utf16.cpp */ -std::pair -lasx_convert_latin1_to_utf16le(const char *buf, size_t len, - char16_t *utf16_output) { - const char *end = buf + len; - - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf16_output & 0x1F) && buf < end) { - *utf16_output++ = uint8_t(*buf) & 0xFF; - buf++; - } - - while (end - buf >= 32) { - __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); - - __m256i inlow = __lasx_vext2xv_hu_bu(in8); - __m256i in8_high = __lasx_xvpermi_q(in8, in8, 0b00000001); - __m256i inhigh = __lasx_vext2xv_hu_bu(in8_high); - __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); - __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); - - utf16_output += 32; - buf += 32; - } - - if (end - buf >= 16) { - __m128i zero = __lsx_vldi(0); - __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); - - __m128i inlow = __lsx_vilvl_b(zero, in8); - __m128i inhigh = __lsx_vilvh_b(zero, in8); - __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); - __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); - - utf16_output += 16; - buf += 16; - } - return std::make_pair(buf, utf16_output); -} - -std::pair -lasx_convert_latin1_to_utf16be(const char *buf, size_t len, - char16_t *utf16_output) { - const char *end = buf + len; - - while (((uint64_t)utf16_output & 0x1F) && buf < end) { - *utf16_output++ = (uint16_t(*buf++) << 8); - } - - __m256i zero = __lasx_xvldi(0); - while (end - buf >= 32) { - __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); - - __m256i in8_shuf = __lasx_xvpermi_d(in8, 0b11011000); - - __m256i inlow = __lasx_xvilvl_b(in8_shuf, zero); - __m256i inhigh = __lasx_xvilvh_b(in8_shuf, zero); - __lasx_xvst(inlow, reinterpret_cast(utf16_output), 0); - __lasx_xvst(inhigh, reinterpret_cast(utf16_output), 32); - utf16_output += 32; - buf += 32; - } - - if (end - buf >= 16) { - __m128i zero_128 = __lsx_vldi(0); - __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); - - __m128i inlow = __lsx_vilvl_b(in8, zero_128); - __m128i inhigh = __lsx_vilvh_b(in8, zero_128); - __lsx_vst(inlow, reinterpret_cast(utf16_output), 0); - __lsx_vst(inhigh, reinterpret_cast(utf16_output), 16); - utf16_output += 16; - buf += 16; - } - - return std::make_pair(buf, utf16_output); -} -/* end file src/lasx/lasx_convert_latin1_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_latin1_to_utf32.cpp */ -std::pair -lasx_convert_latin1_to_utf32(const char *buf, size_t len, - char32_t *utf32_output) { - const char *end = buf + len; - - // LASX requires 32-byte alignment, otherwise performance will be degraded - while (((uint64_t)utf32_output & 0x1F) && buf < end) { - *utf32_output++ = ((uint32_t)*buf) & 0xFF; - buf++; - } - - while (end - buf >= 32) { - __m256i in8 = __lasx_xvld(reinterpret_cast(buf), 0); - - __m256i in32_0 = __lasx_vext2xv_wu_bu(in8); - __lasx_xvst(in32_0, reinterpret_cast(utf32_output), 0); - - __m256i in8_1 = __lasx_xvpermi_d(in8, 0b00000001); - __m256i in32_1 = __lasx_vext2xv_wu_bu(in8_1); - __lasx_xvst(in32_1, reinterpret_cast(utf32_output), 32); - - __m256i in8_2 = __lasx_xvpermi_d(in8, 0b00000010); - __m256i in32_2 = __lasx_vext2xv_wu_bu(in8_2); - __lasx_xvst(in32_2, reinterpret_cast(utf32_output), 64); - - __m256i in8_3 = __lasx_xvpermi_d(in8, 0b00000011); - __m256i in32_3 = __lasx_vext2xv_wu_bu(in8_3); - __lasx_xvst(in32_3, reinterpret_cast(utf32_output), 96); - - utf32_output += 32; - buf += 32; - } - - if (end - buf >= 16) { - __m128i in8 = __lsx_vld(reinterpret_cast(buf), 0); - - __m128i zero = __lsx_vldi(0); - __m128i in16low = __lsx_vilvl_b(zero, in8); - __m128i in16high = __lsx_vilvh_b(zero, in8); - __m128i in32_0 = __lsx_vilvl_h(zero, in16low); - __m128i in32_1 = __lsx_vilvh_h(zero, in16low); - __m128i in32_2 = __lsx_vilvl_h(zero, in16high); - __m128i in32_3 = __lsx_vilvh_h(zero, in16high); - - __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); - __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); - __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); - __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); - - utf32_output += 16; - buf += 16; - } - - return std::make_pair(buf, utf32_output); -} -/* end file src/lasx/lasx_convert_latin1_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/lasx/lasx_convert_utf8_to_utf16.cpp */ -// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 16, usually 12). -template -size_t convert_masked_utf8_to_utf16(const char *input, - uint64_t utf8_end_of_code_point_mask, - char16_t *&utf16_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - - // We first try a few fast paths. - // The obvious first test is ASCII, which actually consumes the full 16. - if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { - __m128i zero = __lsx_vldi(0); - if (match_system(big_endian)) { - __lsx_vst(__lsx_vilvl_b(zero, in), - reinterpret_cast(utf16_output), 0); - __lsx_vst(__lsx_vilvh_b(zero, in), - reinterpret_cast(utf16_output), 16); - } else { - __lsx_vst(__lsx_vilvl_b(in, zero), - reinterpret_cast(utf16_output), 0); - __lsx_vst(__lsx_vilvh_b(in, zero), - reinterpret_cast(utf16_output), 16); - } - utf16_output += 16; // We wrote 16 16-bit characters. - return 16; // We consumed 16 bytes. - } - - // 3 byte sequences are the next most common, as seen in CJK, which has long - // sequences of these. - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte - // UTF-16 code units. - __m128i composed = convert_utf8_3_byte_to_utf16(in); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 4; // We wrote 4 16-bit characters. - return 12; // We consumed 12 bytes. - } - - // 2 byte sequences occur in short bursts in languages like Greek and Russian. - if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xAAAA) { - // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte - // UTF-16 code units. - __m128i composed = convert_utf8_2_byte_to_utf16(in); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 8; // We wrote 6 16-bit characters. - return 16; // We consumed 12 bytes. - } - - /// We do not have a fast path available, or the fast path is unimportant, so - /// we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - const __m128i zero = __lsx_vldi(0); - if (idx < 64) { - // SIX (6) input code-code units - // Convert to UTF-16 - __m128i composed = convert_utf8_1_to_2_byte_to_utf16(in, idx); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - // Store - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 6; // We wrote 6 16-bit characters. - return consumed; - } else if (idx < 145) { - // FOUR (4) input code-code units - // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // XXX: depending on the system scalar instructions might be faster. - // 1 byte: 00000000 00000000 0ccccccc - // 2 byte: 00000000 110bbbbb 10cccccc - // 3 byte: 1110aaaa 10bbbbbb 10cccccc - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(zero, in, sh); - // 1 byte: 00000000 0ccccccc - // 2 byte: xx0bbbbb x0cccccc - // 3 byte: xxbbbbbb x0cccccc - __m128i lowperm = __lsx_vpickev_h(perm, perm); - // 1 byte: 00000000 00000000 - // 2 byte: 00000000 00000000 - // 3 byte: 00000000 1110aaaa - __m128i highperm = __lsx_vpickod_h(perm, perm); - // 3 byte: aaaa0000 00000000 - highperm = __lsx_vslli_h(highperm, 12); - // ASCII - // 1 byte: 00000000 0ccccccc - // 2+byte: 00000000 00cccccc - __m128i ascii = __lsx_vand_v(lowperm, __lsx_vrepli_h(0x7f)); - // 1 byte: 00000000 00000000 - // 2 byte: xx0bbbbb 00000000 - // 3 byte: xxbbbbbb 00000000 - __m128i middlebyte = __lsx_vand_v(lowperm, lsx_splat_u16(0xFF00)); - // 1 byte: 00000000 0ccccccc - // 2 byte: 0010bbbb bbcccccc - // 3 byte: 0010bbbb bbcccccc - __m128i composed = __lsx_vor_v(__lsx_vsrli_h(middlebyte, 2), ascii); - - __m128i v0fff = __lsx_vreplgr2vr_h(uint16_t(0xfff)); - // aaaabbbb bbcccccc - composed = __lsx_vbitsel_v(highperm, composed, v0fff); - - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 4; // We wrote 4 16-bit codepoints - return consumed; - } else if (idx < 209) { - // THREE (3) input code-code units - if (input_utf8_end_of_code_point_mask == 0x888) { - __m128i expected_mask = - (__m128i)v16u8{0xf8, 0xc0, 0xc0, 0xc0, 0xf8, 0xc0, 0xc0, 0xc0, - 0xf8, 0xc0, 0xc0, 0xc0, 0x0, 0x0, 0x0, 0x0}; - __m128i expected = - (__m128i)v16u8{0xf0, 0x80, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, - 0xf0, 0x80, 0x80, 0x80, 0x0, 0x0, 0x0, 0x0}; - __m128i check = __lsx_vseq_b(__lsx_vand_v(in, expected_mask), expected); - if (__lsx_bz_b(check)) - return 12; - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-16 pairs. Generating surrogate pairs is a little tricky though, but - // it is easier when we can assume they are all pairs. This version does - // not use the LUT, but 4 byte sequences are less common and the overhead - // of the extra memory access is less important than the early branch - // overhead in shorter sequences. - - // Swap byte pairs - // 10dddddd 10cccccc|10bbbbbb 11110aaa - // 10cccccc 10dddddd|11110aaa 10bbbbbb - __m128i swap = lsx_swap_bytes(in); - // Shift left 2 bits - // cccccc00 dddddd00 xxxxxxxx bbbbbb00 - __m128i shift = __lsx_vslli_b(swap, 2); - // Create a magic number containing the low 2 bits of the trail surrogate - // and all the corrections needed to create the pair. UTF-8 4b prefix = - // -0x0000|0xF000 surrogate offset = -0x0000|0x0040 (0x10000 << 6) - // surrogate high = +0x0000|0xD800 - // surrogate low = +0xDC00|0x0000 - // ------------------------------- - // = +0xDC00|0xE7C0 - __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xDC00E7C0)); - // Generate unadjusted trail surrogate minus lowest 2 bits - // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00 - __m128i trail = __lsx_vbitsel_v(shift, swap, lsx_splat_u32(0x0000FF00)); - // Insert low 2 bits of trail surrogate to magic number for later - // 11011100 00000000 11100111 110000cc - __m128i magic_with_low_2 = __lsx_vor_v(__lsx_vsrli_w(shift, 30), magic); - - // Generate lead surrogate - // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx - // 000000cc ccdddddd|xxxxxxxx xxxxxxxx - __m128i lead = __lsx_vbitsel_v( - __lsx_vsrli_h(__lsx_vand_v(shift, __lsx_vldi(0x3F)), 4), swap, - __lsx_vrepli_h(0x3f /* 0x003f*/)); - - // Blend pairs - // 000000cc ccdddddd|11110aaa bbbbbb00 - __m128i blend = __lsx_vbitsel_v(lead, trail, lsx_splat_u32(0x0000FFFF)); - - // Add magic number to finish the result - // 110111CC CCDDDDDD|110110AA BBBBBBCC - __m128i composed = __lsx_vadd_h(blend, magic_with_low_2); - // Byte swap if necessary - if (!match_system(big_endian)) { - composed = lsx_swap_bytes(composed); - } - __lsx_vst(composed, reinterpret_cast(utf16_output), 0); - utf16_output += 6; // We 3 32-bit surrogate pairs. - return 12; // We consumed 12 bytes. - } - // 3 1-4 byte sequences - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // 1 byte: 00000000 00000000 00000000 0ddddddd - // 3 byte: 00000000 00000000 110ccccc 10dddddd - // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd - // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(zero, in, sh); - // added to fix issue https://github.com/simdutf/simdutf/issues/514 - // We only want to write 2 * 16-bit code units when that is actually what we - // have. Unfortunately, we cannot trust the input. So it is possible to get - // 0xff as an input byte and it should not result in a surrogate pair. We - // need to check for that. - uint32_t permbuffer[4]; - __lsx_vst(perm, permbuffer, 0); - // Mask the low and middle bytes - // 00000000 00000000 00000000 0ddddddd - __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7f)); - // Because the surrogates need more work, the high surrogate is computed - // first. - __m128i middlehigh = __lsx_vslli_w(perm, 2); - // 00000000 00000000 00cccccc 00000000 - __m128i middlebyte = __lsx_vand_v(perm, lsx_splat_u32(0x00003F00)); - // Start assembling the sequence. Since the 4th byte is in the same position - // as it would be in a surrogate and there is no dependency, shift left - // instead of right. 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx 4 byte: - // 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx - __m128i ab = __lsx_vbitsel_v(middlehigh, perm, lsx_splat_u32(0xFF000000)); - // Top 16 bits contains the high ten bits of the surrogate pair before - // correction 3 byte: 00000000 10bbbbcc|cccc0000 00000000 4 byte: 11110aaa - // bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction - __m128i v_fffc0000 = __lsx_vreplgr2vr_w(uint32_t(0xFFFC0000)); - __m128i abc = __lsx_vbitsel_v(__lsx_vslli_w(middlebyte, 4), ab, v_fffc0000); - // Combine the low 6 or 7 bits by a shift right accumulate - // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct - // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o - // correction - __m128i composed = __lsx_vor_v(ascii, __lsx_vsrli_w(abc, 6)); - // After this is for surrogates - // Blend the low and high surrogates - // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd - __m128i mixed = __lsx_vbitsel_v(abc, composed, lsx_splat_u32(0x0000FFFF)); - // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits - // yet as 0x10000 was not subtracted from the codepoint yet. 4 byte: - // 11110aaa bbbbbbcc|000000cc ccdddddd - __m128i v_ffff03ff = __lsx_vreplgr2vr_w(uint32_t(0xFFFF03FF)); - __m128i masked_pair = __lsx_vand_v(mixed, v_ffff03ff); - // Correct the remaining UTF-8 prefix, surrogate offset, and add the - // surrogate prefixes in one magic 16-bit addition. similar magic number but - // without the continue byte adjust and halfword swapped UTF-8 4b prefix = - // -0xF000|0x0000 surrogate offset = -0x0040|0x0000 (0x10000 << 6) - // surrogate high = +0xD800|0x0000 - // surrogate low = +0x0000|0xDC00 - // ----------------------------------- - // = +0xE7C0|0xDC00 - __m128i magic = __lsx_vreplgr2vr_w(uint32_t(0xE7C0DC00)); - // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete - __m128i surrogates = __lsx_vadd_w(masked_pair, magic); - // If the high bit is 1 (s32 less than zero), this needs a surrogate pair - __m128i is_pair = __lsx_vslt_w(perm, zero); - // Select either the 4 byte surrogate pair or the 2 byte solo codepoint - // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd - // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - __m128i selected = __lsx_vbitsel_v(composed, surrogates, is_pair); - // Byte swap if necessary - if (!match_system(big_endian)) { - selected = lsx_swap_bytes(selected); - } - // Attempting to shuffle and store would be complex, just scalarize. - uint32_t buffer_tmp[4]; - __lsx_vst(selected, buffer_tmp, 0); - // Test for the top bit of the surrogate mask. Remove due to issue 514 - // const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : - // 0x00800000; - for (size_t i = 0; i < 3; i++) { - // Surrogate - // Used to be if (buffer[i] & SURROGATE_MASK) { - // See discussion above. - // patch for issue https://github.com/simdutf/simdutf/issues/514 - if ((permbuffer[i] & 0xf8000000) == 0xf0000000) { - utf16_output[0] = uint16_t(buffer_tmp[i] >> 16); - utf16_output[1] = uint16_t(buffer_tmp[i] & 0xFFFF); - utf16_output += 2; - } else { - utf16_output[0] = uint16_t(buffer_tmp[i] & 0xFFFF); - utf16_output++; - } - } - return consumed; - } else { - // here we know that there is an error but we do not handle errors - return 12; - } -} -/* end file src/lasx/lasx_convert_utf8_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lasx/lasx_convert_utf8_to_utf32.cpp */ -// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the -// end of the code points. Only the least significant 12 bits of the mask -// are accessed. -// It returns how many bytes were consumed (up to 12). -size_t convert_masked_utf8_to_utf32(const char *input, - uint64_t utf8_end_of_code_point_mask, - char32_t *&utf32_out) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - uint32_t *&utf32_output = reinterpret_cast(utf32_out); - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xFFF; - // - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - // - // We first try a few fast paths. - if ((utf8_end_of_code_point_mask & 0xffff) == 0xffff) { - // We process in chunks of 16 bytes. - // use fast implementation in src/simdutf/arm64/simd.h - // Ideally the compiler can keep the tables in registers. - __m128i zero = __lsx_vldi(0); - __m128i in16low = __lsx_vilvl_b(zero, in); - __m128i in16high = __lsx_vilvh_b(zero, in); - __m128i in32_0 = __lsx_vilvl_h(zero, in16low); - __m128i in32_1 = __lsx_vilvh_h(zero, in16low); - __m128i in32_2 = __lsx_vilvl_h(zero, in16high); - __m128i in32_3 = __lsx_vilvh_h(zero, in16high); - - __lsx_vst(in32_0, reinterpret_cast(utf32_output), 0); - __lsx_vst(in32_1, reinterpret_cast(utf32_output), 16); - __lsx_vst(in32_2, reinterpret_cast(utf32_output), 32); - __lsx_vst(in32_3, reinterpret_cast(utf32_output), 48); - - utf32_output += 16; // We wrote 16 32-bit characters. - return 16; // We consumed 16 bytes. - } - __m128i zero = __lsx_vldi(0); - if (input_utf8_end_of_code_point_mask == 0x924) { - // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte - // UTF-32 code units. Convert to UTF-16 - __m128i composed_utf16 = convert_utf8_3_byte_to_utf16(in); - __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); - - __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); - utf32_output += 4; // We wrote 4 32-bit characters. - return 12; // We consumed 12 bytes. - } - // 2 byte sequences occur in short bursts in languages like Greek and Russian. - if (input_utf8_end_of_code_point_mask == 0xaaa) { - // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte - // UTF-32 code units. Convert to UTF-16 - __m128i composed_utf16 = convert_utf8_2_byte_to_utf16(in); - - __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); - __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); - - __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); - __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); - utf32_output += 6; - return 12; // We consumed 12 bytes. - } - // Either no fast path or an unimportant fast path. - - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - - if (idx < 64) { - // SIX (6) input code-code units - // Convert to UTF-16 - __m128i composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx); - __m128i utf32_low = __lsx_vilvl_h(zero, composed_utf16); - __m128i utf32_high = __lsx_vilvh_h(zero, composed_utf16); - - __lsx_vst(utf32_low, reinterpret_cast(utf32_output), 0); - __lsx_vst(utf32_high, reinterpret_cast(utf32_output), 16); - utf32_output += 6; - return consumed; - } else if (idx < 145) { - // FOUR (4) input code-code units - // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing. - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // Shuffle - // 1 byte: 00000000 00000000 0ccccccc - // 2 byte: 00000000 110bbbbb 10cccccc - // 3 byte: 1110aaaa 10bbbbbb 10cccccc - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(zero, in, sh); - // Split - // 00000000 00000000 0ccccccc - __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); // 6 or 7 bits - // Note: unmasked - // xxxxxxxx aaaaxxxx xxxxxxxx - __m128i high = - __lsx_vsrli_w(__lsx_vand_v(perm, __lsx_vldi(0xf)), 4); // 4 bits - // Use 16 bit bic instead of and. - // The top bits will be corrected later in the bsl - // 00000000 10bbbbbb 00000000 - __m128i middle = - __lsx_vand_v(perm, lsx_splat_u32(0x0000FF00)); // 5 or 6 bits - // Combine low and middle with shift right accumulate - // 00000000 00xxbbbb bbcccccc - __m128i lowmid = __lsx_vor_v(ascii, __lsx_vsrli_w(middle, 2)); - // Insert top 4 bits from high byte with bitwise select - // 00000000 aaaabbbb bbcccccc - __m128i composed = __lsx_vbitsel_v(lowmid, high, lsx_splat_u32(0x0000F000)); - __lsx_vst(composed, utf32_output, 0); - utf32_output += 4; // We wrote 4 32-bit characters. - return consumed; - } else if (idx < 209) { - // THREE (3) input code-code units - if (input_utf8_end_of_code_point_mask == 0x888) { - // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte - // UTF-32 code units. This uses the same method as the fixed 3 byte - // version, reversing and shift left insert. However, there is no need for - // a shuffle mask now, just rev16 and rev32. - // - // This version does not use the LUT, but 4 byte sequences are less common - // and the overhead of the extra memory access is less important than the - // early branch overhead in shorter sequences, so it comes last. - - // Swap pairs of bytes - // 10dddddd|10cccccc|10bbbbbb|11110aaa - // 10cccccc 10dddddd|11110aaa 10bbbbbb - __m128i swap = lsx_swap_bytes(in); - // Shift left and insert - // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb - __m128i merge1 = __lsx_vbitsel_v(__lsx_vsrli_h(swap, 2), swap, - __lsx_vrepli_h(0x3f /*0x003F*/)); - // Shift insert again - // xxxxxxxx xxxaaabb bbbbcccc ccdddddd - __m128i merge2 = - __lsx_vbitsel_v(__lsx_vslli_w(merge1, 12), /* merge1 << 12 */ - __lsx_vsrli_w(merge1, 16), /* merge1 >> 16 */ - lsx_splat_u32(0x00000FFF)); - // Clear the garbage - // 00000000 000aaabb bbbbcccc ccdddddd - __m128i composed = __lsx_vand_v(merge2, lsx_splat_u32(0x1FFFFF)); - // Store - __lsx_vst(composed, utf32_output, 0); - utf32_output += 3; // We wrote 3 32-bit characters. - return 12; // We consumed 12 bytes. - } - // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit - // due to surrogates no longer being involved. - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // 1 byte: 00000000 00000000 00000000 0ddddddd - // 2 byte: 00000000 00000000 110ccccc 10dddddd - // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd - // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(zero, in, sh); - - // Ascii - __m128i ascii = __lsx_vand_v(perm, __lsx_vrepli_w(0x7F)); - __m128i middle = __lsx_vand_v(perm, lsx_splat_u32(0x00003f00)); - // 00000000 00000000 0000cccc ccdddddd - __m128i cd = __lsx_vor_v(__lsx_vsrli_w(middle, 2), ascii); - - __m128i correction = __lsx_vand_v(perm, lsx_splat_u32(0x00400000)); - __m128i corrected = __lsx_vadd_b(perm, __lsx_vsrli_w(correction, 1)); - // Insert twice - // 00000000 000aaabb bbbbxxxx xxxxxxxx - __m128i corrected_srli2 = - __lsx_vsrli_w(__lsx_vand_v(corrected, __lsx_vrepli_b(0x7)), 2); - __m128i ab = - __lsx_vbitsel_v(corrected_srli2, corrected, __lsx_vrepli_h(0x3f)); - ab = __lsx_vsrli_w(ab, 4); - // 00000000 000aaabb bbbbcccc ccdddddd - __m128i composed = __lsx_vbitsel_v(ab, cd, lsx_splat_u32(0x00000FFF)); - // Store - __lsx_vst(composed, utf32_output, 0); - utf32_output += 3; // We wrote 3 32-bit characters. - return consumed; - } else { - // here we know that there is an error but we do not handle errors - return 12; - } -} -/* end file src/lasx/lasx_convert_utf8_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_utf8_to_latin1.cpp */ -size_t convert_masked_utf8_to_latin1(const char *input, - uint64_t utf8_end_of_code_point_mask, - char *&latin1_output) { - // we use an approach where we try to process up to 12 input bytes. - // Why 12 input bytes and not 16? Because we are concerned with the size of - // the lookup tables. Also 12 is nicely divisible by two and three. - // - __m128i in = __lsx_vld(reinterpret_cast(input), 0); - - const uint16_t input_utf8_end_of_code_point_mask = - utf8_end_of_code_point_mask & 0xfff; - // Optimization note: our main path below is load-latency dependent. Thus it - // is maybe beneficial to have fast paths that depend on branch prediction but - // have less latency. This results in more instructions but, potentially, also - // higher speeds. - - // We first try a few fast paths. - // The obvious first test is ASCII, which actually consumes the full 16. - if ((utf8_end_of_code_point_mask & 0xFFFF) == 0xFFFF) { - // We process in chunks of 16 bytes - __lsx_vst(in, reinterpret_cast(latin1_output), 0); - latin1_output += 16; // We wrote 16 18-bit characters. - return 16; // We consumed 16 bytes. - } - /// We do not have a fast path available, or the fast path is unimportant, so - /// we fallback. - const uint8_t idx = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][0]; - - const uint8_t consumed = simdutf::tables::utf8_to_utf16::utf8bigindex - [input_utf8_end_of_code_point_mask][1]; - // this indicates an invalid input: - if (idx >= 64) { - return consumed; - } - // Here we should have (idx < 64), if not, there is a bug in the validation or - // elsewhere. SIX (6) input code-code units this is a relatively easy scenario - // we process SIX (6) input code-code units. The max length in bytes of six - // code code units spanning between 1 and 2 bytes each is 12 bytes. Converts 6 - // 1-2 byte UTF-8 characters to 6 UTF-16 characters. This is a relatively easy - // scenario we process SIX (6) input code-code units. The max length in bytes - // of six code code units spanning between 1 and 2 bytes each is 12 bytes. - __m128i sh = __lsx_vld(reinterpret_cast( - simdutf::tables::utf8_to_utf16::shufutf8[idx]), - 0); - // Shuffle - // 1 byte: 00000000 0bbbbbbb - // 2 byte: 110aaaaa 10bbbbbb - sh = __lsx_vand_v(sh, __lsx_vldi(0x1f)); - __m128i perm = __lsx_vshuf_b(__lsx_vldi(0), in, sh); - // ascii mask - // 1 byte: 11111111 11111111 - // 2 byte: 00000000 00000000 - __m128i ascii_mask = __lsx_vslt_bu(perm, __lsx_vldi(0x80)); - // utf8 mask - // 1 byte: 00000000 00000000 - // 2 byte: 00111111 00111111 - __m128i utf8_mask = __lsx_vand_v(__lsx_vsle_bu(__lsx_vldi(0x80), perm), - __lsx_vldi(0b00111111)); - // mask - // 1 byte: 11111111 11111111 - // 2 byte: 00111111 00111111 - __m128i mask = __lsx_vor_v(utf8_mask, ascii_mask); - - __m128i composed = __lsx_vbitsel_v(__lsx_vsrli_h(perm, 2), perm, mask); - // writing 8 bytes even though we only care about the first 6 bytes. - __m128i latin1_packed = __lsx_vpickev_b(__lsx_vldi(0), composed); - - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - latin1_output += 6; // We wrote 6 bytes. - return consumed; -} -/* end file src/lasx/lasx_convert_utf8_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_utf16_to_latin1.cpp */ -template -std::pair -lasx_convert_utf16_to_latin1(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *end = buf + len; - while (end - buf >= 16) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); - if (!match_system(big_endian)) { - in = lsx_swap_bytes(in); - in1 = lsx_swap_bytes(in1); - } - if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { - // 1. pack the bytes - __m128i latin1_packed = __lsx_vpickev_b(in1, in); - // 2. store (8 bytes) - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - // 3. adjust pointers - buf += 16; - latin1_output += 16; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} - -template -std::pair -lasx_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len, - char *latin1_output) { - const char16_t *start = buf; - const char16_t *end = buf + len; - while (end - buf >= 16) { - __m128i in = __lsx_vld(reinterpret_cast(buf), 0); - __m128i in1 = __lsx_vld(reinterpret_cast(buf), 16); - if (!match_system(big_endian)) { - in = lsx_swap_bytes(in); - in1 = lsx_swap_bytes(in1); - } - if (__lsx_bz_v(__lsx_vpickod_b(in1, in))) { - // 1. pack the bytes - __m128i latin1_packed = __lsx_vpickev_b(in1, in); - // 2. store (8 bytes) - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - // 3. adjust pointers - buf += 16; - latin1_output += 16; - } else { - // Let us do a scalar fallback. - for (int k = 0; k < 16; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/lasx/lasx_convert_utf16_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/* begin file src/lasx/lasx_convert_utf16_to_utf8.cpp */ -/* - The vectorized algorithm works on single LASX register i.e., it - loads eight 16-bit code units. - - We consider three cases: - 1. an input register contains no surrogates and each value - is in range 0x0000 .. 0x07ff. - 2. an input register contains no surrogates and values are - is in range 0x0000 .. 0xffff. - 3. an input register contains surrogates --- i.e. codepoints - can have 16 or 32 bits. - - Ad 1. - - When values are less than 0x0800, it means that a 16-bit code unit - can be converted into: 1) single UTF8 byte (when it's an ASCII - char) or 2) two UTF8 bytes. - - For this case we do only some shuffle to obtain these 2-byte - codes and finally compress the whole LASX register with a single - shuffle. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - Ad 2. - - When values fit in 16-bit code units, but are above 0x07ff, then - a single word may produce one, two or three UTF8 bytes. - - We prepare data for all these three cases in two registers. - The first register contains lower two UTF8 bytes (used in all - cases), while the second one contains just the third byte for - the three-UTF8-bytes case. - - Finally these two registers are interleaved forming eight-element - array of 32-bit values. The array spans two LASX registers. - The bytes from the registers are compressed using two shuffles. - - We need 256-entry lookup table to get a compression pattern - and the number of output bytes in the compressed vector register. - Each entry occupies 17 bytes. - - - To summarize: - - We need two 256-entry tables that have 8704 bytes in total. -*/ -/* - Returns a pair: the first unprocessed byte from buf and utf8_output - A scalar routing should carry on the conversion of the tail. -*/ - -template -std::pair -lasx_convert_utf16_to_utf8(const char16_t *buf, size_t len, char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char16_t *end = buf + len; - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); - __m256i zero = __lasx_xvldi(0); - __m128i zero_128 = __lsx_vldi(0); - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); - if (!match_system(big_endian)) { - in = lasx_swap_bytes(in); - } - if (__lasx_xbnz_h(__lasx_xvslt_hu( - in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! - // 1. pack the bytes - __m256i utf8_packed = - __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); - // 2. store (16 bytes) - __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - - if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 - // expected output : [110a|aaaa|10bb|bbbb] x 16 - // t0 = [000a|aaaa|bbbb|bb00] - __m256i t0 = __lasx_xvslli_h(in, 2); - // t1 = [000a|aaaa|0000|0000] - __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); - // t2 = [0000|0000|00bb|bbbb] - __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); - // t3 = [000a|aaaa|00bb|bbbb] - __m256i t3 = __lasx_xvor_v(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); - __m256i t4 = __lasx_xvor_v(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = - __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); - __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); - // 3. prepare bitmask for 8-bit lookup - __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); - uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); - // 4. pack the bytes - const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m1]][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_packed1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); - - const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_packed2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); - // 5. store bytes - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - __lsx_vst(utf8_packed2, utf8_output, 0); - utf8_output += row2[0]; - - buf += 16; - continue; - } - __m256i surrogates_bytemask = __lasx_xvseq_h( - __lasx_xvand_v(in, lasx_splat_u16(0xf800)), lasx_splat_u16(0xd800)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (__lasx_xbz_v(surrogates_bytemask)) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m256i t0 = __lasx_xvpickev_b(in, in); - t0 = __lasx_xvilvl_b(t0, t0); - - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] - __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); - __m256i t1 = __lasx_xvand_v(t0, v_3f7f); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m256i s0 = __lasx_xvsrli_h(in, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m256i s1 = __lasx_xvslli_h(in, 2); - // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] - s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); - - // [00bb|bbbb|0000|aaaa] - __m256i s2 = __lasx_xvor_v(s0, s1); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); - __m256i s3 = __lasx_xvor_v(s2, v_c0e0); - __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); - __m256i m0 = - __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); - __m256i s4 = __lasx_xvxor_v(s3, m0); - - // 4. expand code units 16-bit => 32-bit - __m256i out0 = __lasx_xvilvl_h(s4, t2); - __m256i out1 = __lasx_xvilvh_h(s4, t2); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); - __m256i one_byte_bytemask_low = - __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); - __m256i one_byte_bytemask_high = - __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); - - __m256i one_or_two_bytes_bytemask_low = - __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); - __m256i one_or_two_bytes_bytemask_high = - __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); - - __m256i mask0 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); - __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( - one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); - - uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 0); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); - __lsx_vst(utf8_1, utf8_output, 0); - utf8_output += row1[0]; - - mask = __lasx_xvpickve2gr_wu(mask0, 4); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); - __lsx_vst(utf8_2, utf8_output, 0); - utf8_output += row2[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 4); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle3 = __lsx_vld(row3, 1); - __m128i utf8_3 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); - __lsx_vst(utf8_3, utf8_output, 0); - utf8_output += row3[0]; - - buf += 16; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - return std::make_pair(buf, reinterpret_cast(utf8_output)); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -lasx_convert_utf16_to_utf8_with_errors(const char16_t *buf, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char16_t *start = buf; - const char16_t *end = buf + len; - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - __m256i v_07ff = __lasx_xvreplgr2vr_h(uint16_t(0x7ff)); - __m256i zero = __lasx_xvldi(0); - __m128i zero_128 = __lsx_vldi(0); - while (end - buf >= std::ptrdiff_t(16 + safety_margin)) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); - if (!match_system(big_endian)) { - in = lasx_swap_bytes(in); - } - if (__lasx_xbnz_h(__lasx_xvslt_hu( - in, __lasx_xvrepli_h(0x7F)))) { // ASCII fast path!!!! - // 1. pack the bytes - __m256i utf8_packed = - __lasx_xvpermi_d(__lasx_xvpickev_b(in, in), 0b00001000); - // 2. store (16 bytes) - __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - - if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, in))) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 16 - // expected output : [110a|aaaa|10bb|bbbb] x 16 - // t0 = [000a|aaaa|bbbb|bb00] - __m256i t0 = __lasx_xvslli_h(in, 2); - // t1 = [000a|aaaa|0000|0000] - __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); - // t2 = [0000|0000|00bb|bbbb] - __m256i t2 = __lasx_xvand_v(in, __lasx_xvrepli_h(0x3f)); - // t3 = [000a|aaaa|00bb|bbbb] - __m256i t3 = __lasx_xvor_v(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - __m256i v_c080 = __lasx_xvreplgr2vr_h(uint16_t(0xc080)); - __m256i t4 = __lasx_xvor_v(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = - __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F /*0x007F*/)); - __m256i utf8_unpacked = __lasx_xvbitsel_v(t4, in, one_byte_bytemask); - // 3. prepare bitmask for 8-bit lookup - __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); - uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); - // 4. pack the bytes - const uint8_t *row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m1]][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_packed1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); - - const uint8_t *row2 = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_packed2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); - // 5. store bytes - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - __lsx_vst(utf8_packed2, utf8_output, 0); - utf8_output += row2[0]; - - buf += 16; - continue; - } - __m256i surrogates_bytemask = __lasx_xvseq_h( - __lasx_xvand_v(in, lasx_splat_u16(0xf800)), lasx_splat_u16(0xd800)); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (__lasx_xbz_v(surrogates_bytemask)) { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & #3 - in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m256i t0 = __lasx_xvpickev_b(in, in); - t0 = __lasx_xvilvl_b(t0, t0); - - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|00cc|cccc] - __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); - __m256i t1 = __lasx_xvand_v(t0, v_3f7f); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m256i s0 = __lasx_xvsrli_h(in, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m256i s1 = __lasx_xvslli_h(in, 2); - // s1: [aabb|bbbb|cccc|cc00] => [00bb|bbbb|0000|0000] - s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); - - // [00bb|bbbb|0000|aaaa] - __m256i s2 = __lasx_xvor_v(s0, s1); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); - __m256i s3 = __lasx_xvor_v(s2, v_c0e0); - __m256i one_or_two_bytes_bytemask = __lasx_xvsle_hu(in, v_07ff); - __m256i m0 = - __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); - __m256i s4 = __lasx_xvxor_v(s3, m0); - - // 4. expand code units 16-bit => 32-bit - __m256i out0 = __lasx_xvilvl_h(s4, t2); - __m256i out1 = __lasx_xvilvh_h(s4, t2); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m256i one_byte_bytemask = __lasx_xvsle_hu(in, __lasx_xvrepli_h(0x7F)); - __m256i one_byte_bytemask_low = - __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); - __m256i one_byte_bytemask_high = - __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); - - __m256i one_or_two_bytes_bytemask_low = - __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); - __m256i one_or_two_bytes_bytemask_high = - __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); - - __m256i mask0 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_low, one_byte_bytemask_low)); - __m256i mask1 = __lasx_xvmskltz_h(__lasx_xvor_v( - one_or_two_bytes_bytemask_high, one_byte_bytemask_high)); - - uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 0); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); - __lsx_vst(utf8_1, utf8_output, 0); - utf8_output += row1[0]; - - mask = __lasx_xvpickve2gr_wu(mask0, 4); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); - __lsx_vst(utf8_2, utf8_output, 0); - utf8_output += row2[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 4); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle3 = __lsx_vld(row3, 1); - __m128i utf8_3 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); - __lsx_vst(utf8_3, utf8_output, 0); - utf8_output += row3[0]; - - buf += 16; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xF800) != 0xD800) { - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - reinterpret_cast(utf8_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf8_output++ = char((value >> 18) | 0b11110000); - *utf8_output++ = char(((value >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((value >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((value & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf8_output)); -} -/* end file src/lasx/lasx_convert_utf16_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lasx/lasx_convert_utf16_to_utf32.cpp */ -template -std::pair -lasx_convert_utf16_to_utf32(const char16_t *buf, size_t len, - char32_t *utf32_out) { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - const char16_t *end = buf + len; - - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf32_output & 0x1f) && buf < end) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[0]) : buf[0]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - buf++; - } else { - if (buf + 1 >= end) { - return std::make_pair(nullptr, - reinterpret_cast(utf32_output)); - } - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[1]) : buf[1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - buf += 2; - } - } - - __m256i v_f800 = lasx_splat_u16(0xf800); - __m256i v_d800 = lasx_splat_u16(0xd800); - - while (end - buf >= 16) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); - if (!match_system(big_endian)) { - in = lasx_swap_bytes(in); - } - - __m256i surrogates_bytemask = - __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (__lasx_xbz_v(surrogates_bytemask)) { - // case: no surrogate pairs, extend all 16-bit code units to 32-bit code - // units - __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); - __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); - __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); - utf32_output += 16; - buf += 16; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(nullptr, - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(buf, reinterpret_cast(utf32_output)); -} - -/* - Returns a pair: a result struct and utf8_output. - If there is an error, the count field of the result is the position of the - error. Otherwise, it is the position of the first unprocessed byte in buf - (even if finished). A scalar routing should carry on the conversion of the - tail if needed. -*/ -template -std::pair -lasx_convert_utf16_to_utf32_with_errors(const char16_t *buf, size_t len, - char32_t *utf32_out) { - uint32_t *utf32_output = reinterpret_cast(utf32_out); - const char16_t *start = buf; - const char16_t *end = buf + len; - - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf32_output & 0x1f) && buf < end) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[0]) : buf[0]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - buf++; - } else if (buf + 1 < end) { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[1]) : buf[1]; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - buf += 2; - } else { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf32_output)); - } - } - - __m256i v_f800 = lasx_splat_u16(0xf800); - __m256i v_d800 = lasx_splat_u16(0xd800); - while (end - buf >= 16) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); - if (!match_system(big_endian)) { - in = lasx_swap_bytes(in); - } - - __m256i surrogates_bytemask = - __lasx_xvseq_h(__lasx_xvand_v(in, v_f800), v_d800); - // It might seem like checking for surrogates_bitmask == 0xc000 could help. - // However, it is likely an uncommon occurrence. - if (__lasx_xbz_v(surrogates_bytemask)) { - // case: no surrogate pairs, extend all 16-bit code units to 32-bit code - // units - __m256i in_hi = __lasx_xvpermi_q(in, in, 0b00000001); - __lasx_xvst(__lasx_vext2xv_wu_hu(in), utf32_output, 0); - __lasx_xvst(__lasx_vext2xv_wu_hu(in_hi), utf32_output, 32); - utf32_output += 16; - buf += 16; - // surrogate pair(s) in a register - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint16_t word = - !match_system(big_endian) ? scalar::u16_swap_bytes(buf[k]) : buf[k]; - if ((word & 0xF800) != 0xD800) { - *utf32_output++ = char32_t(word); - } else { - // must be a surrogate pair - uint16_t diff = uint16_t(word - 0xD800); - uint16_t next_word = !match_system(big_endian) - ? scalar::u16_swap_bytes(buf[k + 1]) - : buf[k + 1]; - k++; - uint16_t diff2 = uint16_t(next_word - 0xDC00); - if ((diff | diff2) > 0x3FF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k - 1), - reinterpret_cast(utf32_output)); - } - uint32_t value = (diff << 10) + diff2 + 0x10000; - *utf32_output++ = char32_t(value); - } - } - buf += k; - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf32_output)); -} -/* end file src/lasx/lasx_convert_utf16_to_utf32.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/* begin file src/lasx/lasx_convert_utf32_to_latin1.cpp */ -std::pair -lasx_convert_utf32_to_latin1(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *end = buf + len; - const __m256i shuf_mask = ____m256i( - (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); - __m256i v_ff = __lasx_xvrepli_w(0xFF); - - while (end - buf >= 16) { - __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); - - __m256i in12 = __lasx_xvor_v(in1, in2); - if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { - // 1. pack the bytes - __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); - latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); - __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); - latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); - // 2. store (8 bytes) - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - // 3. adjust pointers - buf += 16; - latin1_output += 16; - } else { - return std::make_pair(nullptr, reinterpret_cast(latin1_output)); - } - } // while - return std::make_pair(buf, latin1_output); -} - -std::pair -lasx_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len, - char *latin1_output) { - const char32_t *start = buf; - const char32_t *end = buf + len; - - const __m256i shuf_mask = ____m256i( - (__m128i)v16u8{0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0}); - __m256i v_ff = __lasx_xvrepli_w(0xFF); - - while (end - buf >= 16) { - __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i in2 = __lasx_xvld(reinterpret_cast(buf), 32); - - __m256i in12 = __lasx_xvor_v(in1, in2); - if (__lasx_xbz_v(__lasx_xvslt_wu(v_ff, in12))) { - // 1. pack the bytes - __m256i latin1_packed_tmp = __lasx_xvshuf_b(in2, in1, shuf_mask); - latin1_packed_tmp = __lasx_xvpermi_d(latin1_packed_tmp, 0b00001000); - __m128i latin1_packed = lasx_extracti128_lo(latin1_packed_tmp); - latin1_packed = __lsx_vpermi_w(latin1_packed, latin1_packed, 0b11011000); - // 2. store (8 bytes) - __lsx_vst(latin1_packed, reinterpret_cast(latin1_output), 0); - // 3. adjust pointers - buf += 16; - latin1_output += 16; - } else { - // Let us do a scalar fallback. - for (int k = 0; k < 16; k++) { - uint32_t word = buf[k]; - if (word <= 0xff) { - *latin1_output++ = char(word); - } else { - return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), - latin1_output); - } - } - } - } // while - return std::make_pair(result(error_code::SUCCESS, buf - start), - latin1_output); -} -/* end file src/lasx/lasx_convert_utf32_to_latin1.cpp */ -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lasx/lasx_convert_utf32_to_utf8.cpp */ -std::pair -lasx_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char32_t *end = buf + len; - - // load addr align 32 - while (((uint64_t)buf & 0x1F) && buf < end) { - uint32_t word = *buf; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(nullptr, reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - buf++; - } - - __m256i v_c080 = lasx_splat_u16(0xc080); - __m256i v_07ff = lasx_splat_u16(0x07ff); - __m256i v_dfff = lasx_splat_u16(0xdfff); - __m256i v_d800 = lasx_splat_u16(0xd800); - __m256i zero = __lasx_xvldi(0); - __m128i zero_128 = __lsx_vldi(0); - __m256i forbidden_bytemask = __lasx_xvldi(0x0); - - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf > std::ptrdiff_t(16 + safety_margin)) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); - - // Check if no bits set above 16th - if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { - // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) - __m256i utf16_packed = - __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); - - if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), - utf16_packed))) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - __m256i utf8_packed = __lasx_xvpermi_d( - __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); - // 2. store (8 bytes) - __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - - if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = __lasx_xvor_v(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = __lasx_xvor_v(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = - __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); - __m256i utf8_unpacked = - __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); - // 3. prepare bitmask for 8-bit lookup - __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); - uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); - // 4. pack the bytes - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m1]][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_packed1 = __lsx_vshuf_b( - zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); - - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_packed2 = __lsx_vshuf_b( - zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); - // 5. store bytes - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - __lsx_vst(utf8_packed2, utf8_output, 0); - utf8_output += row2[0]; - - buf += 16; - continue; - } else { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - forbidden_bytemask = __lasx_xvor_v( - __lasx_xvand_v( - __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 - forbidden_bytemask); - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & - #3 in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); - t0 = __lasx_xvilvl_b(t0, t0); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); - __m256i t1 = __lasx_xvand_v(t0, v_3f7f); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3f00)); - // [00bb|bbbb|0000|aaaa] - __m256i s2 = __lasx_xvor_v(s0, s1); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); - __m256i s3 = __lasx_xvor_v(s2, v_c0e0); - // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); - __m256i one_or_two_bytes_bytemask = - __lasx_xvsle_hu(utf16_packed, v_07ff); - __m256i m0 = - __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); - __m256i s4 = __lasx_xvxor_v(s3, m0); - - // 4. expand code units 16-bit => 32-bit - __m256i out0 = __lasx_xvilvl_h(s4, t2); - __m256i out1 = __lasx_xvilvh_h(s4, t2); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m256i one_byte_bytemask = - __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); - - __m256i one_or_two_bytes_bytemask_u16_to_u32_low = - __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); - __m256i one_or_two_bytes_bytemask_u16_to_u32_high = - __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); - - __m256i one_byte_bytemask_u16_to_u32_low = - __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); - __m256i one_byte_bytemask_u16_to_u32_high = - __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); - - __m256i mask0 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, - one_byte_bytemask_u16_to_u32_low)); - __m256i mask1 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, - one_byte_bytemask_u16_to_u32_high)); - - uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 0); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); - __lsx_vst(utf8_1, utf8_output, 0); - utf8_output += row1[0]; - - mask = __lasx_xvpickve2gr_wu(mask0, 4); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); - __lsx_vst(utf8_2, utf8_output, 0); - utf8_output += row2[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 4); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle3 = __lsx_vld(row3, 1); - __m128i utf8_3 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); - __lsx_vst(utf8_3, utf8_output, 0); - utf8_output += row3[0]; - - buf += 16; - } - // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes. - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - // check for invalid input - if (__lasx_xbnz_v(forbidden_bytemask)) { - return std::make_pair(nullptr, reinterpret_cast(utf8_output)); - } - return std::make_pair(buf, reinterpret_cast(utf8_output)); -} - -std::pair -lasx_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len, - char *utf8_out) { - uint8_t *utf8_output = reinterpret_cast(utf8_out); - const char32_t *start = buf; - const char32_t *end = buf + len; - - // load addr align 32 - while (((uint64_t)buf & 0x1F) && buf < end) { - uint32_t word = *buf; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - buf++; - } - - __m256i v_c080 = lasx_splat_u16(0xc080); - __m256i v_07ff = lasx_splat_u16(0x07ff); - __m256i v_dfff = lasx_splat_u16(0xdfff); - __m256i v_d800 = lasx_splat_u16(0xd800); - __m256i zero = __lasx_xvldi(0); - __m128i zero_128 = __lsx_vldi(0); - __m256i forbidden_bytemask = __lasx_xvldi(0x0); - const size_t safety_margin = - 12; // to avoid overruns, see issue - // https://github.com/simdutf/simdutf/issues/92 - - while (end - buf > std::ptrdiff_t(16 + safety_margin)) { - __m256i in = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i nextin = __lasx_xvld(reinterpret_cast(buf), 32); - - // Check if no bits set above 16th - if (__lasx_xbz_v(__lasx_xvpickod_h(in, nextin))) { - // Pack UTF-32 to UTF-16 safely (without surrogate pairs) - // Apply UTF-16 => UTF-8 routine (lasx_convert_utf16_to_utf8.cpp) - __m256i utf16_packed = - __lasx_xvpermi_d(__lasx_xvpickev_h(nextin, in), 0b11011000); - - if (__lasx_xbz_v(__lasx_xvslt_hu(__lasx_xvrepli_h(0x7F), - utf16_packed))) { // ASCII fast path!!!! - // 1. pack the bytes - // obviously suboptimal. - __m256i utf8_packed = __lasx_xvpermi_d( - __lasx_xvpickev_b(utf16_packed, utf16_packed), 0b00001000); - // 2. store (8 bytes) - __lsx_vst(lasx_extracti128_lo(utf8_packed), utf8_output, 0); - // 3. adjust pointers - buf += 16; - utf8_output += 16; - continue; // we are done for this round! - } - - if (__lasx_xbz_v(__lasx_xvslt_hu(v_07ff, utf16_packed))) { - // 1. prepare 2-byte values - // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8 - // expected output : [110a|aaaa|10bb|bbbb] x 8 - - // t0 = [000a|aaaa|bbbb|bb00] - const __m256i t0 = __lasx_xvslli_h(utf16_packed, 2); - // t1 = [000a|aaaa|0000|0000] - const __m256i t1 = __lasx_xvand_v(t0, lasx_splat_u16(0x1f00)); - // t2 = [0000|0000|00bb|bbbb] - const __m256i t2 = __lasx_xvand_v(utf16_packed, __lasx_xvrepli_h(0x3f)); - // t3 = [000a|aaaa|00bb|bbbb] - const __m256i t3 = __lasx_xvor_v(t1, t2); - // t4 = [110a|aaaa|10bb|bbbb] - const __m256i t4 = __lasx_xvor_v(t3, v_c080); - // 2. merge ASCII and 2-byte codewords - __m256i one_byte_bytemask = - __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F /*0x007F*/)); - __m256i utf8_unpacked = - __lasx_xvbitsel_v(t4, utf16_packed, one_byte_bytemask); - // 3. prepare bitmask for 8-bit lookup - __m256i mask = __lasx_xvmskltz_h(one_byte_bytemask); - uint32_t m1 = __lasx_xvpickve2gr_wu(mask, 0); - uint32_t m2 = __lasx_xvpickve2gr_wu(mask, 4); - // 4. pack the bytes - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m1]][0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_packed1 = __lsx_vshuf_b( - zero_128, lasx_extracti128_lo(utf8_unpacked), shuffle1); - - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes - [lasx_1_2_utf8_bytes_mask[m2]][0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_packed2 = __lsx_vshuf_b( - zero_128, lasx_extracti128_hi(utf8_unpacked), shuffle2); - // 5. store bytes - __lsx_vst(utf8_packed1, utf8_output, 0); - utf8_output += row1[0]; - - __lsx_vst(utf8_packed2, utf8_output, 0); - utf8_output += row2[0]; - - buf += 16; - continue; - } else { - // case: code units from register produce either 1, 2 or 3 UTF-8 bytes - forbidden_bytemask = __lasx_xvor_v( - __lasx_xvand_v( - __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 - forbidden_bytemask); - if (__lasx_xbnz_v(forbidden_bytemask)) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf8_output)); - } - /* In this branch we handle three cases: - 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] - - single UFT-8 byte - 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - - two UTF-8 bytes - 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - - three UTF-8 bytes - - We expand the input word (16-bit) into two code units (32-bit), thus - we have room for four bytes. However, we need five distinct bit - layouts. Note that the last byte in cases #2 and #3 is the same. - - We precompute byte 1 for case #1 and the common byte for cases #2 & - #3 in register t2. - - We precompute byte 1 for case #3 and -- **conditionally** -- - precompute either byte 1 for case #2 or byte 2 for case #3. Note that - they differ by exactly one bit. - - Finally from these two code units we build proper UTF-8 sequence, - taking into account the case (i.e, the number of bytes to write). - */ - /** - * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce: - * t2 => [0ccc|cccc] [10cc|cccc] - * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) - */ - // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] - __m256i t0 = __lasx_xvpickev_b(utf16_packed, utf16_packed); - t0 = __lasx_xvilvl_b(t0, t0); - // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] - __m256i v_3f7f = __lasx_xvreplgr2vr_h(uint16_t(0x3F7F)); - __m256i t1 = __lasx_xvand_v(t0, v_3f7f); - // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] - __m256i t2 = __lasx_xvor_v(t1, lasx_splat_u16(0x8000)); - - // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] - __m256i s0 = __lasx_xvsrli_h(utf16_packed, 12); - // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] - __m256i s1 = __lasx_xvslli_h(utf16_packed, 2); - // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] - s1 = __lasx_xvand_v(s1, lasx_splat_u16(0x3F00)); - // [00bb|bbbb|0000|aaaa] - __m256i s2 = __lasx_xvor_v(s0, s1); - // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] - __m256i v_c0e0 = __lasx_xvreplgr2vr_h(uint16_t(0xC0E0)); - __m256i s3 = __lasx_xvor_v(s2, v_c0e0); - // __m256i v_07ff = vmovq_n_u16((uint16_t)0x07FF); - __m256i one_or_two_bytes_bytemask = - __lasx_xvsle_hu(utf16_packed, v_07ff); - __m256i m0 = - __lasx_xvandn_v(one_or_two_bytes_bytemask, lasx_splat_u16(0x4000)); - __m256i s4 = __lasx_xvxor_v(s3, m0); - - // 4. expand code units 16-bit => 32-bit - __m256i out0 = __lasx_xvilvl_h(s4, t2); - __m256i out1 = __lasx_xvilvh_h(s4, t2); - - // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle - __m256i one_byte_bytemask = - __lasx_xvsle_hu(utf16_packed, __lasx_xvrepli_h(0x7F)); - - __m256i one_or_two_bytes_bytemask_u16_to_u32_low = - __lasx_xvilvl_h(one_or_two_bytes_bytemask, zero); - __m256i one_or_two_bytes_bytemask_u16_to_u32_high = - __lasx_xvilvh_h(one_or_two_bytes_bytemask, zero); - - __m256i one_byte_bytemask_u16_to_u32_low = - __lasx_xvilvl_h(one_byte_bytemask, one_byte_bytemask); - __m256i one_byte_bytemask_u16_to_u32_high = - __lasx_xvilvh_h(one_byte_bytemask, one_byte_bytemask); - - __m256i mask0 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_low, - one_byte_bytemask_u16_to_u32_low)); - __m256i mask1 = __lasx_xvmskltz_h( - __lasx_xvor_v(one_or_two_bytes_bytemask_u16_to_u32_high, - one_byte_bytemask_u16_to_u32_high)); - - uint32_t mask = __lasx_xvpickve2gr_wu(mask0, 0); - const uint8_t *row0 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle0 = __lsx_vld(row0, 1); - __m128i utf8_0 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out0), shuffle0); - __lsx_vst(utf8_0, utf8_output, 0); - utf8_output += row0[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 0); - const uint8_t *row1 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle1 = __lsx_vld(row1, 1); - __m128i utf8_1 = - __lsx_vshuf_b(zero_128, lasx_extracti128_lo(out1), shuffle1); - __lsx_vst(utf8_1, utf8_output, 0); - utf8_output += row1[0]; - - mask = __lasx_xvpickve2gr_wu(mask0, 4); - const uint8_t *row2 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle2 = __lsx_vld(row2, 1); - __m128i utf8_2 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out0), shuffle2); - __lsx_vst(utf8_2, utf8_output, 0); - utf8_output += row2[0]; - - mask = __lasx_xvpickve2gr_wu(mask1, 4); - const uint8_t *row3 = - &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask & 0xFF] - [0]; - __m128i shuffle3 = __lsx_vld(row3, 1); - __m128i utf8_3 = - __lsx_vshuf_b(zero_128, lasx_extracti128_hi(out1), shuffle3); - __lsx_vst(utf8_3, utf8_output, 0); - utf8_output += row3[0]; - - buf += 16; - } - // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> - // will produce four UTF-8 bytes. - } else { - // Let us do a scalar fallback. - // It may seem wasteful to use scalar code, but being efficient with SIMD - // in the presence of surrogate pairs may require non-trivial tables. - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFFFF80) == 0) { - *utf8_output++ = char(word); - } else if ((word & 0xFFFFF800) == 0) { - *utf8_output++ = char((word >> 6) | 0b11000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else if ((word & 0xFFFF0000) == 0) { - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 12) | 0b11100000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } else { - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), - reinterpret_cast(utf8_output)); - } - *utf8_output++ = char((word >> 18) | 0b11110000); - *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000); - *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000); - *utf8_output++ = char((word & 0b111111) | 0b10000000); - } - } - buf += k; - } - } // while - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf8_output)); -} -/* end file src/lasx/lasx_convert_utf32_to_utf8.cpp */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/* begin file src/lasx/lasx_convert_utf32_to_utf16.cpp */ -template -std::pair -lasx_convert_utf32_to_utf16(const char32_t *buf, size_t len, - char16_t *utf16_out) { - uint16_t *utf16_output = reinterpret_cast(utf16_out); - const char32_t *end = buf + len; - - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf16_output & 0x1F) && buf < end) { - uint32_t word = *buf++; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - // buf++; - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - // buf++; - } - } - - __m256i forbidden_bytemask = __lasx_xvrepli_h(0); - __m256i v_d800 = lasx_splat_u16(0xd800); - __m256i v_dfff = lasx_splat_u16(0xdfff); - while (end - buf >= 16) { - __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); - - // Check if no bits set above 16th - if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { - __m256i utf16_packed = - __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); - forbidden_bytemask = __lasx_xvor_v( - __lasx_xvand_v( - __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 - forbidden_bytemask); - - if (!match_system(big_endian)) { - utf16_packed = lasx_swap_bytes(utf16_packed); - } - __lasx_xvst(utf16_packed, utf16_output, 0); - utf16_output += 16; - buf += 16; - } else { - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(nullptr, - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = - uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } - - // check for invalid input - if (__lasx_xbnz_v(forbidden_bytemask)) { - return std::make_pair(nullptr, reinterpret_cast(utf16_output)); - } - return std::make_pair(buf, reinterpret_cast(utf16_output)); -} - -template -std::pair -lasx_convert_utf32_to_utf16_with_errors(const char32_t *buf, size_t len, - char16_t *utf16_out) { - uint16_t *utf16_output = reinterpret_cast(utf16_out); - const char32_t *start = buf; - const char32_t *end = buf + len; - - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)utf16_output & 0x1F) && buf < end) { - uint32_t word = *buf++; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair(result(error_code::SURROGATE, buf - start - 1), - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair(result(error_code::TOO_LARGE, buf - start - 1), - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - - __m256i forbidden_bytemask = __lasx_xvrepli_h(0); - __m256i v_d800 = lasx_splat_u16(0xd800); - __m256i v_dfff = lasx_splat_u16(0xdfff); - while (end - buf >= 16) { - __m256i in0 = __lasx_xvld(reinterpret_cast(buf), 0); - __m256i in1 = __lasx_xvld(reinterpret_cast(buf), 32); - - // Check if no bits set above 16th - if (__lasx_xbz_v(__lasx_xvpickod_h(in1, in0))) { - __m256i utf16_packed = - __lasx_xvpermi_d(__lasx_xvpickev_h(in1, in0), 0b11011000); - forbidden_bytemask = __lasx_xvor_v( - __lasx_xvand_v( - __lasx_xvsle_h(utf16_packed, v_dfff), // utf16_packed <= 0xdfff - __lasx_xvsle_h(v_d800, utf16_packed)), // utf16_packed >= 0xd800 - forbidden_bytemask); - if (__lasx_xbnz_v(forbidden_bytemask)) { - return std::make_pair(result(error_code::SURROGATE, buf - start), - reinterpret_cast(utf16_output)); - } - - if (!match_system(big_endian)) { - utf16_packed = lasx_swap_bytes(utf16_packed); - } - - __lasx_xvst(utf16_packed, utf16_output, 0); - utf16_output += 16; - buf += 16; - } else { - size_t forward = 15; - size_t k = 0; - if (size_t(end - buf) < forward + 1) { - forward = size_t(end - buf - 1); - } - for (; k < forward; k++) { - uint32_t word = buf[k]; - if ((word & 0xFFFF0000) == 0) { - // will not generate a surrogate pair - if (word >= 0xD800 && word <= 0xDFFF) { - return std::make_pair( - result(error_code::SURROGATE, buf - start + k), - reinterpret_cast(utf16_output)); - } - *utf16_output++ = !match_system(big_endian) - ? char16_t(word >> 8 | word << 8) - : char16_t(word); - } else { - // will generate a surrogate pair - if (word > 0x10FFFF) { - return std::make_pair( - result(error_code::TOO_LARGE, buf - start + k), - reinterpret_cast(utf16_output)); - } - word -= 0x10000; - uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10)); - uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF)); - if (!match_system(big_endian)) { - high_surrogate = - uint16_t(high_surrogate >> 8 | high_surrogate << 8); - low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8); - } - *utf16_output++ = char16_t(high_surrogate); - *utf16_output++ = char16_t(low_surrogate); - } - } - buf += k; - } - } - - return std::make_pair(result(error_code::SUCCESS, buf - start), - reinterpret_cast(utf16_output)); -} -/* end file src/lasx/lasx_convert_utf32_to_utf16.cpp */ -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -#if SIMDUTF_FEATURE_BASE64 -/* begin file src/lasx/lasx_base64.cpp */ -/** - * References and further reading: - * - * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the - * speed of a memory copy, Software: Practice and Experience 50 (2), 2020. - * https://arxiv.org/abs/1910.05109 - * - * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2 - * Instructions, ACM Transactions on the Web 12 (3), 2018. - * https://arxiv.org/abs/1704.00605 - * - * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings. - * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force, - * Request for Comments: 4648. - * - * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization. - * http://www.alfredklomp.com/programming/sse-base64/. (2014). - * - * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD - * acceleration. https://github.com/aklomp/base64. (2014). - * - * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014). - * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/ - * - * Nick Kopp. 2013. Base64 Encoding on a GPU. - * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013). - */ - -template -size_t encode_base64(char *dst, const char *src, size_t srclen, - base64_options options) { - // credit: Wojciech Muła - // SSE (lookup: pshufb improved unrolled) - const uint8_t *input = (const uint8_t *)src; - static const char *lookup_tbl = - isbase64url - ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" - : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - uint8_t *out = (uint8_t *)dst; - - v32u8 shuf; - __m256i v_fc0fc00, v_3f03f0, shift_r, shift_l, base64_tbl0, base64_tbl1, - base64_tbl2, base64_tbl3; - if (srclen >= 28) { - shuf = v32u8{1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10, - 1, 0, 2, 1, 4, 3, 5, 4, 7, 6, 8, 7, 10, 9, 11, 10}; - - v_fc0fc00 = __lasx_xvreplgr2vr_w(uint32_t(0x0fc0fc00)); - v_3f03f0 = __lasx_xvreplgr2vr_w(uint32_t(0x003f03f0)); - shift_r = __lasx_xvreplgr2vr_w(uint32_t(0x0006000a)); - shift_l = __lasx_xvreplgr2vr_w(uint32_t(0x00080004)); - base64_tbl0 = ____m256i(__lsx_vld(lookup_tbl, 0)); - base64_tbl1 = ____m256i(__lsx_vld(lookup_tbl, 16)); - base64_tbl2 = ____m256i(__lsx_vld(lookup_tbl, 32)); - base64_tbl3 = ____m256i(__lsx_vld(lookup_tbl, 48)); - } - size_t i = 0; - for (; i + 100 <= srclen; i += 96) { - __m128i in0_lo = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 0); - __m128i in0_hi = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); - __m128i in1_lo = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 2); - __m128i in1_hi = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 3); - __m128i in2_lo = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 4); - __m128i in2_hi = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 5); - __m128i in3_lo = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 6); - __m128i in3_hi = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 7); - - __m256i in0 = lasx_set_q(in0_hi, in0_lo); - __m256i in1 = lasx_set_q(in1_hi, in1_lo); - __m256i in2 = lasx_set_q(in2_hi, in2_lo); - __m256i in3 = lasx_set_q(in3_hi, in3_lo); - - in0 = __lasx_xvshuf_b(in0, in0, (__m256i)shuf); - in1 = __lasx_xvshuf_b(in1, in1, (__m256i)shuf); - in2 = __lasx_xvshuf_b(in2, in2, (__m256i)shuf); - in3 = __lasx_xvshuf_b(in3, in3, (__m256i)shuf); - - __m256i t0_0 = __lasx_xvand_v(in0, v_fc0fc00); - __m256i t0_1 = __lasx_xvand_v(in1, v_fc0fc00); - __m256i t0_2 = __lasx_xvand_v(in2, v_fc0fc00); - __m256i t0_3 = __lasx_xvand_v(in3, v_fc0fc00); - - __m256i t1_0 = __lasx_xvsrl_h(t0_0, shift_r); - __m256i t1_1 = __lasx_xvsrl_h(t0_1, shift_r); - __m256i t1_2 = __lasx_xvsrl_h(t0_2, shift_r); - __m256i t1_3 = __lasx_xvsrl_h(t0_3, shift_r); - - __m256i t2_0 = __lasx_xvand_v(in0, v_3f03f0); - __m256i t2_1 = __lasx_xvand_v(in1, v_3f03f0); - __m256i t2_2 = __lasx_xvand_v(in2, v_3f03f0); - __m256i t2_3 = __lasx_xvand_v(in3, v_3f03f0); - - __m256i t3_0 = __lasx_xvsll_h(t2_0, shift_l); - __m256i t3_1 = __lasx_xvsll_h(t2_1, shift_l); - __m256i t3_2 = __lasx_xvsll_h(t2_2, shift_l); - __m256i t3_3 = __lasx_xvsll_h(t2_3, shift_l); - - __m256i input0 = __lasx_xvor_v(t1_0, t3_0); - __m256i input0_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input0); - __m256i input0_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(input0, __lasx_xvldi(32))); - __m256i input0_mask = __lasx_xvslei_bu(input0, 31); - __m256i input0_result = - __lasx_xvbitsel_v(input0_shuf1, input0_shuf0, input0_mask); - __lasx_xvst(input0_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; - - __m256i input1 = __lasx_xvor_v(t1_1, t3_1); - __m256i input1_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input1); - __m256i input1_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(input1, __lasx_xvldi(32))); - __m256i input1_mask = __lasx_xvslei_bu(input1, 31); - __m256i input1_result = - __lasx_xvbitsel_v(input1_shuf1, input1_shuf0, input1_mask); - __lasx_xvst(input1_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; - - __m256i input2 = __lasx_xvor_v(t1_2, t3_2); - __m256i input2_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input2); - __m256i input2_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(input2, __lasx_xvldi(32))); - __m256i input2_mask = __lasx_xvslei_bu(input2, 31); - __m256i input2_result = - __lasx_xvbitsel_v(input2_shuf1, input2_shuf0, input2_mask); - __lasx_xvst(input2_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; - - __m256i input3 = __lasx_xvor_v(t1_3, t3_3); - __m256i input3_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, input3); - __m256i input3_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(input3, __lasx_xvldi(32))); - __m256i input3_mask = __lasx_xvslei_bu(input3, 31); - __m256i input3_result = - __lasx_xvbitsel_v(input3_shuf1, input3_shuf0, input3_mask); - __lasx_xvst(input3_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; - } - for (; i + 28 <= srclen; i += 24) { - - __m128i in_lo = __lsx_vld(reinterpret_cast(input + i), 0); - __m128i in_hi = - __lsx_vld(reinterpret_cast(input + i), 4 * 3 * 1); - - __m256i in = lasx_set_q(in_hi, in_lo); - - // bytes from groups A, B and C are needed in separate 32-bit lanes - // in = [DDDD|CCCC|BBBB|AAAA] - // - // an input triplet has layout - // [????????|ccdddddd|bbbbcccc|aaaaaabb] - // byte 3 byte 2 byte 1 byte 0 -- byte 3 comes from the next - // triplet - // - // shuffling changes the order of bytes: 1, 0, 2, 1 - // [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] - // ^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^ - // processed bits - in = __lasx_xvshuf_b(in, in, (__m256i)shuf); - - // unpacking - // t0 = [0000cccc|cc000000|aaaaaa00|00000000] - __m256i t0 = __lasx_xvand_v(in, v_fc0fc00); - // t1 = [00000000|00cccccc|00000000|00aaaaaa] - // ((c >> 6), (a >> 10)) - __m256i t1 = __lasx_xvsrl_h(t0, shift_r); - - // t2 = [00000000|00dddddd|000000bb|bbbb0000] - __m256i t2 = __lasx_xvand_v(in, v_3f03f0); - // t3 = [00dddddd|00000000|00bbbbbb|00000000] - // ((d << 8), (b << 4)) - __m256i t3 = __lasx_xvsll_h(t2, shift_l); - - // res = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] = t1 | t3 - __m256i indices = __lasx_xvor_v(t1, t3); - - __m256i indices_shuf0 = __lasx_xvshuf_b(base64_tbl1, base64_tbl0, indices); - __m256i indices_shuf1 = __lasx_xvshuf_b( - base64_tbl3, base64_tbl2, __lasx_xvsub_b(indices, __lasx_xvldi(32))); - __m256i indices_mask = __lasx_xvslei_bu(indices, 31); - __m256i indices_result = - __lasx_xvbitsel_v(indices_shuf1, indices_shuf0, indices_mask); - __lasx_xvst(indices_result, reinterpret_cast<__m256i *>(out), 0); - out += 32; - } - - return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i, - srclen - i, options); -} - -static inline void compress(__m128i data, uint16_t mask, char *output) { - if (mask == 0) { - __lsx_vst(data, reinterpret_cast<__m128i *>(output), 0); - return; - } - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - - v2u64 shufmask = {tables::base64::thintable_epi8[mask1], - tables::base64::thintable_epi8[mask2]}; - - // we increment by 0x08 the second half of the mask - const v4u32 hi = {0, 0, 0x08080808, 0x08080808}; - __m128i shufmask1 = __lsx_vadd_b((__m128i)shufmask, (__m128i)hi); - - // this is the version "nearly pruned" - __m128i pruned = __lsx_vshuf_b(data, data, shufmask1); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = tables::base64::BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - __lsx_vld(reinterpret_cast( - tables::base64::pshufb_combine_table + pop1 * 8), - 0); - __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask); - - __lsx_vst(answer, reinterpret_cast<__m128i *>(output), 0); -} - -struct block64 { - __m256i chunks[2]; -}; - -template -static inline uint32_t to_base64_mask(__m256i *src, bool *error) { - __m256i ascii_space_tbl = - ____m256i((__m128i)v16u8{0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0}); - // credit: aqrit - __m256i delta_asso = - ____m256i((__m128i)v16u8{0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, - 0x0, 0x0, 0x0, 0xF, 0x0, 0xF}); - __m256i delta_values; - if (base64_url) { - delta_values = ____m256i( - (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x11), int8_t(0xC3), - int8_t(0xBF), int8_t(0xE0), int8_t(0xB9), int8_t(0xB9)}); - } else { - delta_values = ____m256i( - (__m128i)v16i8{int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), - int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), - int8_t(0xB9), int8_t(0x00), int8_t(0x10), int8_t(0xC3), - int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9)}); - } - - __m256i check_asso; - if (base64_url) { - check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, - 0x0B, 0x06, 0x0B, 0x12}); - } else { - check_asso = ____m256i((__m128i)v16u8{0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, - 0x0B, 0x0B, 0x0B, 0x0F}); - } - - __m256i check_values; - if (base64_url) { - check_values = ____m256i( - (__m128i)v16i8{int8_t(0x0), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD3), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD0), int8_t(0x80), - int8_t(0xB0), int8_t(0x80), int8_t(0x0), int8_t(0x0)}); - } else { - check_values = ____m256i( - (__m128i)v16i8{int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), - int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), - int8_t(0xB5), int8_t(0x86), int8_t(0xD1), int8_t(0x80), - int8_t(0xB1), int8_t(0x80), int8_t(0x91), int8_t(0x80)}); - } - - __m256i shifted = __lasx_xvsrli_b(*src, 3); - __m256i asso_index = __lasx_xvand_v(*src, __lasx_xvldi(0xF)); - __m256i delta_hash = __lasx_xvavgr_bu( - __lasx_xvshuf_b(delta_asso, delta_asso, asso_index), shifted); - __m256i check_hash = __lasx_xvavgr_bu( - __lasx_xvshuf_b(check_asso, check_asso, asso_index), shifted); - - __m256i out = __lasx_xvsadd_b( - __lasx_xvshuf_b(delta_values, delta_values, delta_hash), *src); - __m256i chk = __lasx_xvsadd_b( - __lasx_xvshuf_b(check_values, check_values, check_hash), *src); - __m256i chk_ltz = __lasx_xvmskltz_b(chk); - unsigned int mask = __lasx_xvpickve2gr_wu(chk_ltz, 0); - mask = mask | (__lsx_vpickve2gr_hu(lasx_extracti128_hi(chk_ltz), 0) << 16); - if (mask) { - __m256i ascii_space = __lasx_xvseq_b( - __lasx_xvshuf_b(ascii_space_tbl, ascii_space_tbl, asso_index), *src); - __m256i ascii_space_ltz = __lasx_xvmskltz_b(ascii_space); - unsigned int ascii_space_mask = __lasx_xvpickve2gr_wu(ascii_space_ltz, 0); - ascii_space_mask = - ascii_space_mask | - (__lsx_vpickve2gr_hu(lasx_extracti128_hi(ascii_space_ltz), 0) << 16); - *error |= (mask != ascii_space_mask); - } - - *src = out; - return (uint32_t)mask; -} - -template -static inline uint64_t to_base64_mask(block64 *b, bool *error) { - *error = 0; - uint64_t m0 = to_base64_mask(&b->chunks[0], error); - uint64_t m1 = to_base64_mask(&b->chunks[1], error); - return m0 | (m1 << 32); -} - -static inline void copy_block(block64 *b, char *output) { - __lasx_xvst(b->chunks[0], reinterpret_cast<__m256i *>(output), 0); - __lasx_xvst(b->chunks[1], reinterpret_cast<__m256i *>(output), 32); -} - -static inline uint64_t compress_block(block64 *b, uint64_t mask, char *output) { - uint64_t nmask = ~mask; - uint64_t count = - __lsx_vpickve2gr_d(__lsx_vpcnt_h(__lsx_vreplgr2vr_d(nmask)), 0); - uint16_t *count_ptr = (uint16_t *)&count; - compress(lasx_extracti128_lo(b->chunks[0]), uint16_t(mask), output); - compress(lasx_extracti128_hi(b->chunks[0]), uint16_t(mask >> 16), - output + count_ptr[0]); - compress(lasx_extracti128_lo(b->chunks[1]), uint16_t(mask >> 32), - output + count_ptr[0] + count_ptr[1]); - compress(lasx_extracti128_hi(b->chunks[1]), uint16_t(mask >> 48), - output + count_ptr[0] + count_ptr[1] + count_ptr[2]); - return count_ones(nmask); -} - -// The caller of this function is responsible to ensure that there are 64 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char *src) { - b->chunks[0] = __lasx_xvld(reinterpret_cast(src), 0); - b->chunks[1] = __lasx_xvld(reinterpret_cast(src), 32); -} - -// The caller of this function is responsible to ensure that there are 128 bytes -// available from reading at src. The data is read into a block64 structure. -static inline void load_block(block64 *b, const char16_t *src) { - __m256i m1 = __lasx_xvld(reinterpret_cast(src), 0); - __m256i m2 = __lasx_xvld(reinterpret_cast(src), 32); - __m256i m3 = __lasx_xvld(reinterpret_cast(src), 64); - __m256i m4 = __lasx_xvld(reinterpret_cast(src), 96); - b->chunks[0] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m2, m1, 0), 0b11011000); - b->chunks[1] = __lasx_xvpermi_d(__lasx_xvssrlni_bu_h(m4, m3, 0), 0b11011000); -} - -static inline void base64_decode(char *out, __m256i str) { - __m256i t0 = __lasx_xvor_v( - __lasx_xvslli_w(str, 26), - __lasx_xvslli_w(__lasx_xvand_v(str, lasx_splat_u32(0x0000ff00)), 12)); - __m256i t1 = - __lasx_xvsrli_w(__lasx_xvand_v(str, lasx_splat_u32(0x003f0000)), 2); - __m256i t2 = __lasx_xvor_v(t0, t1); - __m256i t3 = __lasx_xvor_v(t2, __lasx_xvsrli_w(str, 16)); - __m256i pack_shuffle = ____m256i( - (__m128i)v16u8{3, 2, 1, 7, 6, 5, 11, 10, 9, 15, 14, 13, 0, 0, 0, 0}); - t3 = __lasx_xvshuf_b(t3, t3, (__m256i)pack_shuffle); - - // Store the output: - __lsx_vst(lasx_extracti128_lo(t3), out, 0); - __lsx_vst(lasx_extracti128_hi(t3), out, 12); -} -// decode 64 bytes and output 48 bytes -static inline void base64_decode_block(char *out, const char *src) { - base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); - base64_decode(out + 24, - __lasx_xvld(reinterpret_cast(src), 32)); -} - -static inline void base64_decode_block_safe(char *out, const char *src) { - base64_decode(out, __lasx_xvld(reinterpret_cast(src), 0)); - char buffer[32]; - base64_decode(buffer, - __lasx_xvld(reinterpret_cast(src), 32)); - std::memcpy(out + 24, buffer, 24); -} - -static inline void base64_decode_block(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); - base64_decode(out + 24, b->chunks[1]); -} -static inline void base64_decode_block_safe(char *out, block64 *b) { - base64_decode(out, b->chunks[0]); - char buffer[32]; - base64_decode(buffer, b->chunks[1]); - std::memcpy(out + 24, buffer, 24); -} - -template -full_result -compress_decode_base64(char *dst, const chartype *src, size_t srclen, - base64_options options, - last_chunk_handling_options last_chunk_options) { - const uint8_t *to_base64 = base64_url ? tables::base64::to_base64_url_value - : tables::base64::to_base64_value; - size_t equallocation = - srclen; // location of the first padding character if any - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - size_t equalsigns = 0; - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 1; - // skip trailing spaces - while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) && - to_base64[uint8_t(src[srclen - 1])] == 64) { - srclen--; - } - if (srclen > 0 && src[srclen - 1] == '=') { - equallocation = srclen - 1; - srclen--; - equalsigns = 2; - } - } - if (srclen == 0) { - if (!ignore_garbage && equalsigns > 0) { - if (last_chunk_options == last_chunk_handling_options::strict) { - return {BASE64_INPUT_REMAINDER, 0, 0}; - } else if (last_chunk_options == - last_chunk_handling_options::stop_before_partial) { - return {SUCCESS, 0, 0}; - } - return {INVALID_BASE64_CHARACTER, equallocation, 0}; - } - return {SUCCESS, 0, 0}; - } - char *end_of_safe_64byte_zone = - (srclen + 3) / 4 * 3 >= 63 ? dst + (srclen + 3) / 4 * 3 - 63 : dst; - - const chartype *const srcinit = src; - const char *const dstinit = dst; - const chartype *const srcend = src + srclen; - - constexpr size_t block_size = 6; - static_assert(block_size >= 2, "block_size must be at least two"); - char buffer[block_size * 64]; - char *bufferptr = buffer; - if (srclen >= 64) { - const chartype *const srcend64 = src + srclen - 64; - while (src <= srcend64) { - block64 b; - load_block(&b, src); - src += 64; - bool error = false; - uint64_t badcharmask = to_base64_mask(&b, &error); - if (error && !ignore_garbage) { - src -= 64; - while (src < srcend && scalar::base64::is_eight_byte(*src) && - to_base64[uint8_t(*src)] <= 64) { - src++; - } - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - if (badcharmask != 0) { - // optimization opportunity: check for simple masks like those made of - // continuous 1s followed by continuous 0s. And masks containing a - // single bad character. - bufferptr += compress_block(&b, badcharmask, bufferptr); - } else if (bufferptr != buffer) { - copy_block(&b, bufferptr); - bufferptr += 64; - } else { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, &b); - } else { - base64_decode_block(dst, &b); - } - dst += 48; - } - if (bufferptr >= (block_size - 1) * 64 + buffer) { - for (size_t i = 0; i < (block_size - 2); i++) { - base64_decode_block(dst, buffer + i * 64); - dst += 48; - } - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer + (block_size - 2) * 64); - } else { - base64_decode_block(dst, buffer + (block_size - 2) * 64); - } - dst += 48; - std::memcpy(buffer, buffer + (block_size - 1) * 64, - 64); // 64 might be too much - bufferptr -= (block_size - 1) * 64; - } - } - } - - char *buffer_start = buffer; - // Optimization note: if this is almost full, then it is worth our - // time, otherwise, we should just decode directly. - int last_block = (int)((bufferptr - buffer_start) % 64); - if (last_block != 0 && srcend - src + last_block >= 64) { - - while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) { - uint8_t val = to_base64[uint8_t(*src)]; - *bufferptr = char(val); - if ((!scalar::base64::is_eight_byte(*src) || val > 64) && - !ignore_garbage) { - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), - size_t(dst - dstinit)}; - } - bufferptr += (val <= 63); - src++; - } - } - - for (; buffer_start + 64 <= bufferptr; buffer_start += 64) { - if (dst >= end_of_safe_64byte_zone) { - base64_decode_block_safe(dst, buffer_start); - } else { - base64_decode_block(dst, buffer_start); - } - dst += 48; - } - if ((bufferptr - buffer_start) % 64 != 0) { - while (buffer_start + 4 < bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::u32_swap_bytes(triple); - std::memcpy(dst, &triple, 4); - - dst += 3; - buffer_start += 4; - } - if (buffer_start + 4 <= bufferptr) { - uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) + - (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) + - (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) + - (uint32_t(uint8_t(buffer_start[3])) << 0 * 6)) - << 8; - triple = scalar::u32_swap_bytes(triple); - std::memcpy(dst, &triple, 3); - - dst += 3; - buffer_start += 4; - } - // we may have 1, 2 or 3 bytes left and we need to decode them so let us - // backtrack - int leftover = int(bufferptr - buffer_start); - while (leftover > 0) { - if (!ignore_garbage) { - while (to_base64[uint8_t(*(src - 1))] == 64) { - src--; - } - } else { - while (to_base64[uint8_t(*(src - 1))] >= 64) { - src--; - } - } - src--; - leftover--; - } - } - if (src < srcend + equalsigns) { - full_result r = scalar::base64::base64_tail_decode( - dst, src, srcend - src, equalsigns, options, last_chunk_options); - r.input_count += size_t(src - srcinit); - if (r.error == error_code::INVALID_BASE64_CHARACTER || - r.error == error_code::BASE64_EXTRA_BITS) { - return r; - } else { - r.output_count += size_t(dst - dstinit); - } - if (last_chunk_options != stop_before_partial && - r.error == error_code::SUCCESS && equalsigns > 0 && !ignore_garbage) { - // additional checks - if ((r.output_count % 3 == 0) || - ((r.output_count % 3) + 1 + equalsigns != 4)) { - r.error = error_code::INVALID_BASE64_CHARACTER; - r.input_count = equallocation; - } - } - return r; - } - if (equalsigns > 0 && !ignore_garbage) { - if ((size_t(dst - dstinit) % 3 == 0) || - ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) { - return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)}; - } - } - return {SUCCESS, srclen, size_t(dst - dstinit)}; -} -/* end file src/lasx/lasx_base64.cpp */ -#endif // SIMDUTF_FEATURE_BASE64 - -} // namespace -} // namespace lasx -} // namespace simdutf - -/* begin file src/generic/buf_block_reader.h */ -namespace simdutf { -namespace lasx { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with -// spaces -template struct buf_block_reader { -public: - simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdutf_really_inline size_t block_index(); - simdutf_really_inline bool has_full_block() const; - simdutf_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 - * (in which case this function fills the buffer with spaces and returns 0. In - * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder - * block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdutf_really_inline size_t get_remainder(uint8_t *dst) const; - simdutf_really_inline void advance(); - -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text_64(const uint8_t *text) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdutf_unused static char *format_input_text(const simd8x64 &in) { - static char *buf = - reinterpret_cast(malloc(sizeof(simd8x64) + 1)); - in.store(reinterpret_cast(buf)); - for (size_t i = 0; i < sizeof(simd8x64); i++) { - if (buf[i] < ' ') { - buf[i] = '_'; - } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdutf_unused static char *format_mask(uint64_t mask) { - static char *buf = reinterpret_cast(malloc(64 + 1)); - for (size_t i = 0; i < 64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdutf_really_inline -buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) - : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, - idx{0} {} - -template -simdutf_really_inline size_t buf_block_reader::block_index() { - return idx; -} - -template -simdutf_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdutf_really_inline const uint8_t * -buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdutf_really_inline size_t -buf_block_reader::get_remainder(uint8_t *dst) const { - if (len == idx) { - return 0; - } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, - STEP_SIZE); // std::memset STEP_SIZE because it is more efficient - // to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdutf_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/buf_block_reader.h */ -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8_validation { - -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -// -// Return nonzero if there are incomplete multibyte characters at the end of the -// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end. -// -simdutf_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they - // ended at EOF): - // ... 1111____ 111_____ 11______ - static const uint8_t max_array[32] = {255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 255, - 0b11110000u - 1, - 0b11100000u - 1, - 0b11000000u - 1}; - const simd8 max_value( - &max_array[sizeof(max_array) - sizeof(simd8)]); - return input.gt_bits(max_value); -} - -struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast - // path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is - // too short or a byte value too large in the last bytes: check_special_cases - // only checks for bytes too large in the first of two bytes. - simdutf_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an - // ASCII block can't possibly finish them. - this->error |= this->prev_incomplete; - } - - simdutf_really_inline void check_next_input(const simd8x64 &input) { - if (simdutf_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = - is_incomplete(input.chunks[simd8x64::NUM_CHUNKS - 1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS - 1]; - } - } - - // do not forget to call check_eof! - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ -/* begin file src/generic/utf8_validation/utf8_validator.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8_validation { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return !c.errors(); -} - -bool generic_validate_utf8(const char *input, size_t length) { - return generic_validate_utf8( - reinterpret_cast(input), length); -} - -/** - * Validates that the string is actual UTF-8 and stops on errors. - */ -template -result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input + count), length - count); - res.count += count; - return res; - } - reader.advance(); - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - if (c.errors()) { - if (count != 0) { - count--; - } // Sometimes the error is only detected in the next chunk - result res = scalar::utf8::rewind_and_validate_with_errors( - reinterpret_cast(input), - reinterpret_cast(input) + count, length - count); - res.count += count; - return res; - } else { - return result(error_code::SUCCESS, length); - } -} - -result generic_validate_utf8_with_errors(const char *input, size_t length) { - return generic_validate_utf8_with_errors( - reinterpret_cast(input), length); -} - -} // namespace utf8_validation -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8_validation/utf8_validator.h */ -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -#if SIMDUTF_FEATURE_ASCII -/* begin file src/generic/ascii_validation.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace ascii_validation { - -bool generic_validate_ascii(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - uint8_t blocks[64]{}; - simd::simd8x64 running_or(blocks); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - running_or |= in; - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - running_or |= in; - return running_or.is_ascii(); -} - -result generic_validate_ascii_with_errors(const char *input, size_t length) { - buf_block_reader<64> reader(reinterpret_cast(input), length); - size_t count{0}; - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } - reader.advance(); - - count += 64; - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - if (!in.is_ascii()) { - result res = scalar::ascii::validate_with_errors( - reinterpret_cast(input + count), length - count); - return result(res.error, count + res.count); - } else { - return result(error_code::SUCCESS, length); - } -} - -} // namespace ascii_validation -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/ascii_validation.h */ -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - // transcoding from UTF-8 to Latin 1 -/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // For UTF-8 to Latin 1, we can allow any ASCII character, and any - // continuation byte, but the non-ASCII leading bytes must be 0b11000011 or - // 0b11000010 and nothing else. - // - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - constexpr const uint8_t FORBIDDEN = 0xff; - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - FORBIDDEN, - // 1110____ ________ - FORBIDDEN, - // 1111____ ________ - FORBIDDEN); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - FORBIDDEN, - // ____0101 ________ - FORBIDDEN, - // ____011_ ________ - FORBIDDEN, FORBIDDEN, - - // ____1___ ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, FORBIDDEN, - // ____1101 ________ - FORBIDDEN, FORBIDDEN, FORBIDDEN); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - this->error |= check_special_cases(input, prev1); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 16; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output); - if (howmany == 0) { - return 0; - } - latin1_output += howmany; - } - return latin1_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors( - pos, in + pos, size - pos, latin1_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - latin1_output += res.count; - } - } - return result(error_code::SUCCESS, latin1_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_latin1 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */ -/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8_to_latin1 { -using namespace simd; - -simdutf_really_inline size_t convert_valid(const char *in, size_t size, - char *latin1_output) { - size_t pos = 0; - char *start{latin1_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last - // 16 bytes, and if the data is valid, then it is entirely safe because 16 - // UTF-8 bytes generate much more than 8 bytes. However, you cannot generally - // assume that you have valid UTF-8 input, so we are going to go back from the - // end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > - -65); // twos complement of -65 is 1011 1111 ... - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store((int8_t *)latin1_output); - latin1_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, it - // is not good enough. - uint64_t utf8_continuation_mask = - input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in - // this case, we also have ASCII to account for. - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_latin1( - in + pos, utf8_end_of_code_point_mask, latin1_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (pos < size) { - size_t howmany = scalar::utf8_to_latin1::convert_valid(in + pos, size - pos, - latin1_output); - latin1_output += howmany; - } - return latin1_output - start; -} - -} // namespace utf8_to_latin1 -} // namespace -} // namespace lasx -} // namespace simdutf - // namespace simdutf -/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - // transcoding from UTF-8 to UTF-16 -/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8_to_utf16 { - -using namespace simd; - -template -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char16_t *utf16_output) noexcept { - // The implementation is not specific to haswell and should be moved to the - // generic directory. - size_t pos = 0; - char16_t *start{utf16_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - // this loop could be unrolled further. For example, we could process the - // mask far more than 64 bytes. - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // Slow path. We hope that the compiler will recognize that this is a slow - // path. Anything that is not a continuation mask is a 'leading byte', - // that is, the start of a new code point. - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - // The *start* of code points is not so useful, rather, we want the *end* - // of code points. - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times when using solely - // the slow/regular path, and at least four times if there are fast paths. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - // - // Thus we may allow convert_masked_utf8_to_utf16 to process - // more bytes at a time under a fast-path mode where 16 bytes - // are consumed at once (e.g., when encountering ASCII). - size_t consumed = convert_masked_utf8_to_utf16( - input + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - utf16_output += scalar::utf8_to_utf16::convert_valid( - input + pos, size - pos, utf16_output); - return utf16_output - start; -} - -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ -/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8_to_utf16 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - template - simdutf_really_inline size_t convert(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = scalar::utf8_to_utf16::convert( - in + pos, size - pos, utf16_output); - if (howmany == 0) { - return 0; - } - utf16_output += howmany; - } - return utf16_output - start; - } - - template - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char16_t *utf16_output) { - size_t pos = 0; - char16_t *start{utf16_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the eight last - // leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf16(utf16_output); - utf16_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - // rewind_and_convert_with_errors will seek a potential error from - // in+pos onward, with the ability to go back up to pos bytes, and - // read size-pos bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf16( - in + pos, utf8_end_of_code_point_mask, utf16_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - res.count += pos; - return res; - } - if (pos < size) { - // rewind_and_convert_with_errors will seek a potential error from in+pos - // onward, with the ability to go back up to pos bytes, and read size-pos - // bytes forward. - result res = - scalar::utf8_to_utf16::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf16_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf16_output += res.count; - } - } - return result(error_code::SUCCESS, utf16_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf16 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ -/* begin file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t utf16_length_from_utf8_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 2; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - - size_t iterations = 0; - size_t pos = 0; - size_t count = 0; - for (; pos + N <= size; pos += N) { - const auto input = - vector_i8::load(reinterpret_cast(in + pos)); - - const auto continuation = input > int8_t(-65); - const auto utf_4bytes = vector_u8(input.value) >= uint8_t(240); - - local -= vector_u8(continuation); - local -= vector_u8(utf_4bytes); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8/utf16_length_from_utf8_bytemask.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - // transcoding from UTF-8 to UTF-32 -/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8_to_utf32 { - -using namespace simd; - -simdutf_warn_unused size_t convert_valid(const char *input, size_t size, - char32_t *utf32_output) noexcept { - size_t pos = 0; - char32_t *start{utf32_output}; - const size_t safety_margin = 16; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 in(reinterpret_cast(input + pos)); - if (in.is_ascii()) { - in.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // -65 is 0b10111111 in two-complement's, so largest possible continuation - // byte - uint64_t utf8_continuation_mask = in.lt(-65 + 1); - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - size_t max_starting_point = (pos + 64) - 12; - while (pos < max_starting_point) { - size_t consumed = convert_masked_utf8_to_utf32( - input + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - } - } - utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, - utf32_output); - return utf32_output - start; -} - -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ -/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8_to_utf32 { -using namespace simd; - -simdutf_really_inline simd8 -check_special_cases(const simd8 input, const simd8 prev1) { - // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) - // Bit 1 = Too Long (ASCII followed by continuation) - // Bit 2 = Overlong 3-byte - // Bit 4 = Surrogate - // Bit 5 = Overlong 2-byte - // Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1 << 6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4); - constexpr const uint8_t CARRY = - TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = - (prev1 & 0x0F) - .lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | - OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT); - return (byte_1_high & byte_1_low & byte_2_high); -} -simdutf_really_inline simd8 -check_multibyte_lengths(const simd8 input, - const simd8 prev_input, - const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = - simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; -} - -struct validating_transcoder { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - - validating_transcoder() : error(uint8_t(0)) {} - // - // Check whether the current bytes are valid UTF-8. - // - simdutf_really_inline void check_utf8_bytes(const simd8 input, - const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ - // lead bytes (2, 3, 4-byte leads become large positive numbers instead of - // small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - simdutf_really_inline size_t convert(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 words when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 16 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (utf8_continuation_mask & 1) { - return 0; // we have an error - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - return 0; - } - if (pos < size) { - size_t howmany = - scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output); - if (howmany == 0) { - return 0; - } - utf32_output += howmany; - } - return utf32_output - start; - } - - simdutf_really_inline result convert_with_errors(const char *in, size_t size, - char32_t *utf32_output) { - size_t pos = 0; - char32_t *start{utf32_output}; - // In the worst case, we have the haswell kernel which can cause an overflow - // of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the - // last 16 bytes, and if the data is valid, then it is entirely safe because - // 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot - // generally assume that you have valid UTF-8 input, so we are going to go - // back from the end counting 8 leading bytes, to give us a good margin. - size_t leading_byte = 0; - size_t margin = size; - for (; margin > 0 && leading_byte < 8; margin--) { - leading_byte += (int8_t(in[margin - 1]) > -65); - } - // If the input is long enough, then we have that margin-1 is the fourth - // last leading byte. - const size_t safety_margin = size - margin + 1; // to avoid overruns! - while (pos + 64 + safety_margin <= size) { - simd8x64 input(reinterpret_cast(in + pos)); - if (input.is_ascii()) { - input.store_ascii_as_utf32(utf32_output); - utf32_output += 64; - pos += 64; - } else { - // you might think that a for-loop would work, but under Visual Studio, - // it is not good enough. - static_assert( - (simd8x64::NUM_CHUNKS == 2) || - (simd8x64::NUM_CHUNKS == 4), - "We support either two or four chunks per 64-byte block."); - auto zero = simd8{uint8_t(0)}; - if (simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if (simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], zero); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - if (errors() || (utf8_continuation_mask & 1)) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - uint64_t utf8_leading_mask = ~utf8_continuation_mask; - uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1; - // We process in blocks of up to 12 bytes except possibly - // for fast paths which may process up to 16 bytes. For the - // slow path to work, we should have at least 12 input bytes left. - size_t max_starting_point = (pos + 64) - 12; - // Next loop is going to run at least five times. - while (pos < max_starting_point) { - // Performance note: our ability to compute 'consumed' and - // then shift and recompute is critical. If there is a - // latency of, say, 4 cycles on getting 'consumed', then - // the inner loop might have a total latency of about 6 cycles. - // Yet we process between 6 to 12 inputs bytes, thus we get - // a speed limit between 1 cycle/byte and 0.5 cycle/byte - // for this section of the code. Hence, there is a limit - // to how much we can further increase this latency before - // it seriously harms performance. - size_t consumed = convert_masked_utf8_to_utf32( - in + pos, utf8_end_of_code_point_mask, utf32_output); - pos += consumed; - utf8_end_of_code_point_mask >>= consumed; - } - // At this point there may remain between 0 and 12 bytes in the - // 64-byte block. These bytes will be processed again. So we have an - // 80% efficiency (in the worst case). In practice we expect an - // 85% to 90% efficiency. - } - } - if (errors()) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - res.count += pos; - return res; - } - if (pos < size) { - result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors( - pos, in + pos, size - pos, utf32_output); - if (res.error) { // In case of error, we want the error position - res.count += pos; - return res; - } else { // In case of success, we want the number of word written - utf32_output += res.count; - } - } - return result(error_code::SUCCESS, utf32_output - start); - } - - simdutf_really_inline bool errors() const { - return this->error.any_bits_set_anywhere(); - } - -}; // struct utf8_checker -} // namespace utf8_to_utf32 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 -/* begin file src/generic/utf8.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf8 { - -using namespace simd; - -simdutf_really_inline size_t count_code_points(const char *in, size_t size) { - size_t pos = 0; - size_t count = 0; - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.gt(-65); - count += count_ones(utf8_continuation_mask); - } - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} - -#ifdef SIMDUTF_SIMD_HAS_BYTEMASK -simdutf_really_inline size_t count_code_points_bytemask(const char *in, - size_t size) { - using vector_i8 = simd8; - using vector_u8 = simd8; - using vector_u64 = simd64; - - constexpr size_t N = vector_i8::SIZE; - constexpr size_t max_iterations = 255 / 4; - - size_t pos = 0; - size_t count = 0; - - auto counters = vector_u64::zero(); - auto local = vector_u8::zero(); - size_t iterations = 0; - for (; pos + 4 * N <= size; pos += 4 * N) { - const auto input0 = - simd8::load(reinterpret_cast(in + pos + 0 * N)); - const auto input1 = - simd8::load(reinterpret_cast(in + pos + 1 * N)); - const auto input2 = - simd8::load(reinterpret_cast(in + pos + 2 * N)); - const auto input3 = - simd8::load(reinterpret_cast(in + pos + 3 * N)); - const auto mask0 = input0 > int8_t(-65); - const auto mask1 = input1 > int8_t(-65); - const auto mask2 = input2 > int8_t(-65); - const auto mask3 = input3 > int8_t(-65); - - local -= vector_u8(mask0); - local -= vector_u8(mask1); - local -= vector_u8(mask2); - local -= vector_u8(mask3); - - iterations += 1; - if (iterations == max_iterations) { - counters += sum_8bytes(local); - local = vector_u8::zero(); - iterations = 0; - } - } - - if (iterations > 0) { - count += local.sum_bytes(); - } - - count += counters.sum(); - - return count + scalar::utf8::count_code_points(in + pos, size - pos); -} -#endif // SIMDUTF_SIMD_HAS_BYTEMASK - -simdutf_really_inline size_t utf16_length_from_utf8(const char *in, - size_t size) { - size_t pos = 0; - size_t count = 0; - // This algorithm could no doubt be improved! - for (; pos + 64 <= size; pos += 64) { - simd8x64 input(reinterpret_cast(in + pos)); - uint64_t utf8_continuation_mask = input.lt(-65 + 1); - // We count one word for anything that is not a continuation (so - // leading bytes). - count += 64 - count_ones(utf8_continuation_mask); - int64_t utf8_4byte = input.gteq_unsigned(240); - count += count_ones(utf8_4byte); - } - return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos); -} - -} // namespace utf8 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf8.h */ -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 -/* begin file src/generic/utf16/count_code_points_bytemask.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf16 { - -using namespace simd; - -template -simdutf_really_inline size_t count_code_points(const char16_t *in, - size_t size) { - using vector_u16 = simd16; - constexpr size_t N = vector_u16::ELEMENTS; - - size_t pos = 0; - size_t count = 0; - - constexpr size_t max_itertions = 65535; - const auto one = vector_u16::splat(1); - const auto zero = vector_u16::zero(); - - size_t itertion = 0; - - auto counters = zero; - for (; pos < size / N * N; pos += N) { - auto input = vector_u16::load(in + pos); - if (!match_system(big_endian)) { - input = input.swap_bytes(); - } - - const auto t0 = input & uint16_t(0xfc00); - const auto t1 = t0 ^ uint16_t(0xdc00); - - // t2[0] == 1 iff input[0] outside range 0xdc00..dfff (the word is not a - // high surrogate) - const auto t2 = min(t1, one); - - counters += t2; - - itertion += 1; - if (itertion == max_itertions) { - count += counters.sum(); - counters = zero; - itertion = 0; - } - } - - if (itertion > 0) { - count += counters.sum(); - } - - return count + - scalar::utf16::count_code_points(in + pos, size - pos); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf16/count_code_points_bytemask.h */ -/* begin file src/generic/utf16/change_endianness.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf16 { - -simdutf_really_inline void -change_endianness_utf16(const char16_t *in, size_t size, char16_t *output) { - size_t pos = 0; - - while (pos < size / 32 * 32) { - simd16x32 input(reinterpret_cast(in + pos)); - input.swap_bytes(); - input.store(reinterpret_cast(output)); - pos += 32; - output += 32; - } - - scalar::utf16::change_endianness_utf16(in + pos, size - pos, output); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf16/change_endianness.h */ -/* begin file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf16 { - -using namespace simd; - -template -simdutf_really_inline size_t utf8_length_from_utf16_bytemask(const char16_t *in, - size_t size) { - size_t pos = 0; - - using vector_u16 = simd16; - constexpr size_t N = vector_u16::ELEMENTS; - - const auto one = vector_u16::splat(1); - - auto v_count = vector_u16::zero(); - - // each char16 yields at least one byte - size_t count = size / N * N; - - // in a single iteration the increment is 0, 1 or 2, despite we have - // three additions - constexpr size_t max_iterations = 65535 / 2; - size_t iteration = max_iterations; - - for (; pos < size / N * N; pos += N) { - auto input = vector_u16::load(reinterpret_cast(in + pos)); - if (!match_system(big_endian)) { - input = input.swap_bytes(); - } - // 0xd800 .. 0xdbff - low surrogate - // 0xdc00 .. 0xdfff - high surrogate - const auto is_surrogate = ((input & uint16_t(0xf800)) == uint16_t(0xd800)); - - // c0 - chars that yield 2- or 3-byte UTF-8 codes - const auto c0 = min(input & uint16_t(0xff80), one); - - // c1 - chars that yield 3-byte UTF-8 codes (including surrogates) - const auto c1 = min(input & uint16_t(0xf800), one); - - /* - Explanation how the counting works. - - In the case of a non-surrogate character we count: - * always 1 -- see how `count` is initialized above; - * c0 = 1 if the current char yields 2 or 3 bytes; - * c1 = 1 if the current char yields 3 bytes. - - Thus, we always have correct count for the current char: - from 1, 2 or 3 bytes. - - A trickier part is how we count surrogate pairs. Whether - we encounter a surrogate (low or high), we count it as - 3 chars and then minus 1 (`is_surrogate` is -1 or 0). - Each surrogate char yields 2. A surrogate pair, that - is a low surrogate followed by a high one, yields - the expected 4 bytes. - - It also correctly handles cases when low surrogate is - processed by the this loop, but high surrogate is counted - by the scalar procedure. The scalar procedure uses exactly - the described approach, thanks to that for valid UTF-16 - strings it always count correctly. - */ - v_count += c0; - v_count += c1; - v_count += vector_u16(is_surrogate); - - iteration -= 1; - if (iteration == 0) { - count += v_count.sum(); - v_count = vector_u16::zero(); - iteration = max_iterations; - } - } - - if (iteration > 0) { - count += v_count.sum(); - } - - return count + scalar::utf16::utf8_length_from_utf16(in + pos, - size - pos); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf16/utf8_length_from_utf16_bytemask.h */ -/* begin file src/generic/utf16/utf32_length_from_utf16.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf16 { - -template -simdutf_really_inline size_t utf32_length_from_utf16(const char16_t *in, - size_t size) { - return count_code_points(in, size); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf16/utf32_length_from_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/* begin file src/generic/validate_utf16.h */ -namespace simdutf { -namespace lasx { -namespace { -namespace utf16 { -/* - UTF-16 validation - -------------------------------------------------- - - In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning. - - In a vectorized algorithm we want to examine the most significant - nibble in order to select a fast path. If none of highest nibbles - are 0xD (13), than we are sure that UTF-16 chunk in a vector - register is valid. - - Let us analyze what we need to check if the nibble is 0xD. The - value of the preceding nibble determines what we have: - - 0xd000 .. 0xd7ff - a valid word - 0xd800 .. 0xdbff - low surrogate - 0xdc00 .. 0xdfff - high surrogate - - Other constraints we have to consider: - - there must not be two consecutive low surrogates (0xd800 .. 0xdbff) - - there must not be two consecutive high surrogates (0xdc00 .. 0xdfff) - - there must not be sole low surrogate nor high surrogate - - We are going to build three bitmasks based on the 3rd nibble: - - V = valid word, - - L = low surrogate (0xd800 .. 0xdbff) - - H = high surrogate (0xdc00 .. 0xdfff) - - 0 1 2 3 4 5 6 7 <--- word index - [ V | L | H | L | H | V | V | L ] - 1 0 0 0 0 1 1 0 - V = valid masks - 0 1 0 1 0 0 0 1 - L = low surrogate - 0 0 1 0 1 0 0 0 - H high surrogate - - - 1 0 0 0 0 1 1 0 V = valid masks - 0 1 0 1 0 0 0 0 a = L & (H >> 1) - 0 0 1 0 1 0 0 0 b = a << 1 - 1 1 1 1 1 1 1 0 c = V | a | b - ^ - the last bit can be zero, we just consume 7 - code units and recheck this word in the next iteration -*/ -template -const result validate_utf16_with_errors(const char16_t *input, size_t size) { - if (simdutf_unlikely(size == 0)) { - return result(error_code::SUCCESS, 0); - } - - const char16_t *start = input; - const char16_t *end = input + size; - - const auto v_d8 = simd8::splat(0xd8); - const auto v_f8 = simd8::splat(0xf8); - const auto v_fc = simd8::splat(0xfc); - const auto v_dc = simd8::splat(0xdc); - - while (input + simd16::SIZE * 2 < end) { - // 0. Load data: since the validation takes into account only higher - // byte of each word, we compress the two vectors into one which - // consists only the higher bytes. - auto in0 = simd16(input); - auto in1 = - simd16(input + simd16::SIZE / sizeof(char16_t)); - - // Function `utf16_gather_high_bytes` consumes two vectors of UTF-16 - // and yields a single vector having only higher bytes of characters. - const auto in = utf16_gather_high_bytes(in0, in1); - - // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy). - const auto surrogates_wordmask = (in & v_f8) == v_d8; - const uint16_t surrogates_bitmask = - static_cast(surrogates_wordmask.to_bitmask()); - if (surrogates_bitmask == 0x0000) { - input += 16; - } else { - // 2. We have some surrogates that have to be distinguished: - // - low surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF) - // - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF) - // - // Fact: high surrogate has 11th bit set (3rd bit in the higher byte) - - // V - non-surrogate code units - // V = not surrogates_wordmask - const uint16_t V = static_cast(~surrogates_bitmask); - - // H - word-mask for high surrogates: the six highest bits are 0b1101'11 - const auto vH = (in & v_fc) == v_dc; - const uint16_t H = static_cast(vH.to_bitmask()); - - // L - word mask for low surrogates - // L = not H and surrogates_wordmask - const uint16_t L = static_cast(~H & surrogates_bitmask); - - const uint16_t a = static_cast( - L & (H >> 1)); // A low surrogate must be followed by high one. - // (A low surrogate placed in the 7th register's word - // is an exception we handle.) - const uint16_t b = static_cast( - a << 1); // Just mark that the opinput - startite fact is hold, - // thanks to that we have only two masks for valid case. - const uint16_t c = static_cast( - V | a | b); // Combine all the masks into the final one. - - if (c == 0xffff) { - // The whole input register contains valid UTF-16, i.e., - // either single code units or proper surrogate pairs. - input += 16; - } else if (c == 0x7fff) { - // The 15 lower code units of the input register contains valid UTF-16. - // The 15th word may be either a low or high surrogate. It the next - // iteration we 1) check if the low surrogate is followed by a high - // one, 2) reject sole high surrogate. - input += 15; - } else { - return result(error_code::SURROGATE, input - start); - } - } - } - - return result(error_code::SUCCESS, input - start); -} - -} // namespace utf16 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/validate_utf16.h */ -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -/* begin file src/generic/utf32.h */ -#include - -namespace simdutf { -namespace lasx { -namespace { -namespace utf32 { - -template T min(T a, T b) { return a <= b ? a : b; } - -simdutf_really_inline size_t utf8_length_from_utf32(const char32_t *input, - size_t length) { - using vector_u32 = simd32; - - const char32_t *start = input; - - // we add up to three ones in a single iteration (see the vectorized loop in - // section #2 below) - const size_t max_increment = 3; - - const size_t N = vector_u32::ELEMENTS; - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - const auto v_0000007f = vector_u32::splat(0x0000007f); - const auto v_000007ff = vector_u32::splat(0x000007ff); - const auto v_0000ffff = vector_u32::splat(0x0000ffff); -#else - const auto v_ffffff80 = vector_u32::splat(0xffffff80); - const auto v_fffff800 = vector_u32::splat(0xfffff800); - const auto v_ffff0000 = vector_u32::splat(0xffff0000); - const auto one = vector_u32::splat(1); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - size_t counter = 0; - - // 1. vectorized loop unrolled 4 times - { - // we use vector of uint32 counters, this is why this limit is used - const size_t max_iterations = - std::numeric_limits::max() / (max_increment * 4); - size_t blocks = length / (N * 4); - length -= blocks * (N * 4); - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - simd32 acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in0 = vector_u32(input + 0 * N); - const auto in1 = vector_u32(input + 1 * N); - const auto in2 = vector_u32(input + 2 * N); - const auto in3 = vector_u32(input + 3 * N); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in0 > v_0000007f); - acc -= as_vector_u32(in1 > v_0000007f); - acc -= as_vector_u32(in2 > v_0000007f); - acc -= as_vector_u32(in3 > v_0000007f); - - acc -= as_vector_u32(in0 > v_000007ff); - acc -= as_vector_u32(in1 > v_000007ff); - acc -= as_vector_u32(in2 > v_000007ff); - acc -= as_vector_u32(in3 > v_000007ff); - - acc -= as_vector_u32(in0 > v_0000ffff); - acc -= as_vector_u32(in1 > v_0000ffff); - acc -= as_vector_u32(in2 > v_0000ffff); - acc -= as_vector_u32(in3 > v_0000ffff); -#else - acc += min(one, in0 & v_ffffff80); - acc += min(one, in1 & v_ffffff80); - acc += min(one, in2 & v_ffffff80); - acc += min(one, in3 & v_ffffff80); - - acc += min(one, in0 & v_fffff800); - acc += min(one, in1 & v_fffff800); - acc += min(one, in2 & v_fffff800); - acc += min(one, in3 & v_fffff800); - - acc += min(one, in0 & v_ffff0000); - acc += min(one, in1 & v_ffff0000); - acc += min(one, in2 & v_ffff0000); - acc += min(one, in3 & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += 4 * N; - } - - counter += acc.sum(); - } - } - - // 2. vectorized loop for tail - { - const size_t max_iterations = - std::numeric_limits::max() / max_increment; - size_t blocks = length / N; - length -= blocks * N; - while (blocks != 0) { - const size_t iterations = min(blocks, max_iterations); - blocks -= iterations; - - auto acc = vector_u32::zero(); - for (size_t i = 0; i < iterations; i++) { - const auto in = vector_u32(input); - -#if SIMDUTF_SIMD_HAS_UNSIGNED_CMP - acc -= as_vector_u32(in > v_0000007f); - acc -= as_vector_u32(in > v_000007ff); - acc -= as_vector_u32(in > v_0000ffff); -#else - acc += min(one, in & v_ffffff80); - acc += min(one, in & v_fffff800); - acc += min(one, in & v_ffff0000); -#endif // SIMDUTF_SIMD_HAS_UNSIGNED_CMP - - input += N; - } - - counter += acc.sum(); - } - } - - const size_t consumed = input - start; - if (consumed != 0) { - // We don't count 0th bytes in the vectorized loops above, this - // is why we need to count them in the end. - counter += consumed; - } - - return counter + scalar::utf32::utf8_length_from_utf32(input, length); -} - -} // namespace utf32 -} // unnamed namespace -} // namespace lasx -} // namespace simdutf -/* end file src/generic/utf32.h */ -#endif // SIMDUTF_FEATURE_UTF32 - -// -// Implementation-specific overrides -// -namespace simdutf { -namespace lasx { - -#if SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused int -implementation::detect_encodings(const char *input, - size_t length) const noexcept { - // If there is a BOM, then we trust it. - auto bom_encoding = simdutf::BOM::check_bom(input, length); - // todo: reimplement as a one-pass algorithm. - if (bom_encoding != encoding_type::unspecified) { - return bom_encoding; - } - int out = 0; - if (validate_utf8(input, length)) { - out |= encoding_type::UTF8; - } - if ((length % 2) == 0) { - if (validate_utf16le(reinterpret_cast(input), - length / 2)) { - out |= encoding_type::UTF16_LE; - } - } - if ((length % 4) == 0) { - if (validate_utf32(reinterpret_cast(input), length / 4)) { - out |= encoding_type::UTF32_LE; - } - } - return out; -} -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return lasx::utf8_validation::generic_validate_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused result implementation::validate_utf8_with_errors( - const char *buf, size_t len) const noexcept { - return lasx::utf8_validation::generic_validate_utf8_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -simdutf_warn_unused bool -implementation::validate_ascii(const char *buf, size_t len) const noexcept { - return lasx::ascii_validation::generic_validate_ascii(buf, len); -} - -simdutf_warn_unused result implementation::validate_ascii_with_errors( - const char *buf, size_t len) const noexcept { - return lasx::ascii_validation::generic_validate_ascii_with_errors(buf, len); -} -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf16le(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const auto res = - lasx::utf16::validate_utf16_with_errors(buf, len); - if (res.is_err()) { - return false; - } - - if (res.count != len) { - return scalar::utf16::validate(buf + res.count, - len - res.count); - } - - return true; -} -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused bool -implementation::validate_utf16be(const char16_t *buf, - size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - - const auto res = - lasx::utf16::validate_utf16_with_errors(buf, len); - if (res.is_err()) { - return false; - } - - if (res.count != len) { - return scalar::utf16::validate(buf + res.count, - len - res.count); - } - - return true; -} - -simdutf_warn_unused result implementation::validate_utf16le_with_errors( - const char16_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - const result res = - lasx::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - const result scalar_res = - scalar::utf16::validate_with_errors( - buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -simdutf_warn_unused result implementation::validate_utf16be_with_errors( - const char16_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - const result res = - lasx::utf16::validate_utf16_with_errors(buf, len); - if (res.count != len) { - const result scalar_res = - scalar::utf16::validate_with_errors(buf + res.count, - len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} - -void implementation::to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} - -void implementation::to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept { - return scalar::utf16::to_well_formed_utf16(input, len, - output); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -simdutf_warn_unused bool -implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - // empty input is valid. protected the implementation from nullptr. - return true; - } - const char32_t *tail = lasx_validate_utf32le(buf, len); - if (tail) { - return scalar::utf32::validate(tail, len - (tail - buf)); - } else { - return false; - } -} -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused result implementation::validate_utf32_with_errors( - const char32_t *buf, size_t len) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - result res = lasx_validate_utf32le_with_errors(buf, len); - if (res.count != len) { - result scalar_res = - scalar::utf32::validate_with_errors(buf + res.count, len - res.count); - return result(scalar_res.error, res.count + scalar_res.count); - } else { - return res; - } -} -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf8( - const char *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - lasx_convert_latin1_to_utf8(buf, len, utf8_output); - size_t converted_chars = ret.second - utf8_output; - - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - lasx_convert_latin1_to_utf16le(buf, len, utf16_output); - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} - -simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - lasx_convert_latin1_to_utf16be(buf, len, utf16_output); - size_t converted_chars = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = - scalar::latin1_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_latin1_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - lasx_convert_latin1_to_utf32(buf, len, utf32_output); - size_t converted_chars = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - converted_chars += scalar_converted_chars; - } - return converted_chars; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - size_t pos = 0; - char *output_start{latin1_output}; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)latin1_output & 0x1F) && pos < len) { - if (buf[pos] & 0x80) { - if (pos + 1 >= len) - return 0; - if ((buf[pos] & 0b11100000) == 0b11000000) { - if ((buf[pos + 1] & 0b11000000) != 0b10000000) - return 0; - uint32_t code_point = - (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0xFF < code_point) { - return 0; - } - *latin1_output++ = char(code_point); - pos += 2; - } else { - return 0; - } - } else { - *latin1_output++ = char(buf[pos]); - pos++; - } - } - size_t convert_size = latin1_output - output_start; - if (pos == len) - return convert_size; - utf8_to_latin1::validating_transcoder converter; - size_t convert_result = - converter.convert(buf + pos, len - pos, latin1_output); - return convert_result ? convert_size + convert_result : 0; -} - -simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors( - const char *buf, size_t len, char *latin1_output) const noexcept { - size_t pos = 0; - char *output_start{latin1_output}; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)latin1_output & 0x1F) && pos < len) { - if (buf[pos] & 0x80) { - if ((buf[pos] & 0b11100000) == 0b11000000) { - if (pos + 1 >= len) - return result(error_code::TOO_SHORT, pos); - if ((buf[pos + 1] & 0b11000000) != 0b10000000) - return result(error_code::TOO_SHORT, pos); - uint32_t code_point = - (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); - if (code_point < 0x80) - return result(error_code::OVERLONG, pos); - if (0xFF < code_point) - return result(error_code::TOO_LARGE, pos); - *latin1_output++ = char(code_point); - pos += 2; - } else if ((buf[pos] & 0b11110000) == 0b11100000) { - return result(error_code::TOO_LARGE, pos); - } else if ((buf[pos] & 0b11111000) == 0b11110000) { - return result(error_code::TOO_LARGE, pos); - } else { - if ((buf[pos] & 0b11000000) == 0b10000000) { - return result(error_code::TOO_LONG, pos); - } - return result(error_code::HEADER_BITS, pos); - } - } else { - *latin1_output++ = char(buf[pos]); - pos++; - } - } - size_t convert_size = latin1_output - output_start; - if (pos == len) - return result(error_code::SUCCESS, convert_size); - - utf8_to_latin1::validating_transcoder converter; - result res = - converter.convert_with_errors(buf + pos, len - pos, latin1_output); - return res.error ? result(res.error, res.count + pos) - : result(res.error, res.count + convert_size); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1( - const char *buf, size_t len, char *latin1_output) const noexcept { - size_t pos = 0; - char *output_start{latin1_output}; - // Performance degradation when memory address is not 32-byte aligned - while (((uint64_t)latin1_output & 0x1F) && pos < len) { - if (buf[pos] & 0x80) { - if (pos + 1 >= len) - break; - if ((buf[pos] & 0b11100000) == 0b11000000) { - if ((buf[pos + 1] & 0b11000000) != 0b10000000) - return 0; - uint32_t code_point = - (buf[pos] & 0b00011111) << 6 | (buf[pos + 1] & 0b00111111); - *latin1_output++ = char(code_point); - pos += 2; - } else { - return 0; - } - } else { - *latin1_output++ = char(buf[pos]); - pos++; - } - } - size_t convert_size = latin1_output - output_start; - if (pos == len) - return convert_size; - - size_t convert_result = - lasx::utf8_to_latin1::convert_valid(buf + pos, len - pos, latin1_output); - return convert_result ? convert_size + convert_result : 0; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert(buf, len, utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, - utf16_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors( - const char *buf, size_t len, char16_t *utf16_output) const noexcept { - utf8_to_utf16::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be( - const char *input, size_t size, char16_t *utf16_output) const noexcept { - return utf8_to_utf16::convert_valid(input, size, - utf16_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf8_to_utf32( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert(buf, len, utf32_output); -} - -simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors( - const char *buf, size_t len, char32_t *utf32_output) const noexcept { - utf8_to_utf32::validating_transcoder converter; - return converter.convert_with_errors(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32( - const char *input, size_t size, char32_t *utf32_output) const noexcept { - return utf8_to_utf32::convert_valid(input, size, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lasx_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lasx_convert_utf16_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result -implementation::convert_utf16le_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lasx_convert_utf16_to_latin1_with_errors( - buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result -implementation::convert_utf16be_to_latin1_with_errors( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lasx_convert_utf16_to_latin1_with_errors(buf, len, - latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf16be_to_latin1(buf, len, latin1_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1( - const char16_t *buf, size_t len, char *latin1_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf16le_to_latin1(buf, len, latin1_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - lasx_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - std::pair ret = - lasx_convert_utf16_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lasx_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lasx_convert_utf16_to_utf8_with_errors(buf, len, - utf8_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16le_to_utf8(buf, len, utf8_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8( - const char16_t *buf, size_t len, char *utf8_output) const noexcept { - return convert_utf16be_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - if (simdutf_unlikely(len == 0)) { - return 0; - } - std::pair ret = - lasx_convert_utf32_to_utf8(buf, len, utf8_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf8_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - if (simdutf_unlikely(len == 0)) { - return result(error_code::SUCCESS, 0); - } - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lasx_convert_utf32_to_utf8_with_errors(buf, len, utf8_output); - if (ret.first.count != len) { - result scalar_res = scalar::utf32_to_utf8::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf8_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - lasx_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - std::pair ret = - lasx_convert_utf16_to_utf32(buf, len, utf32_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf32_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf16_to_utf32::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lasx_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lasx_convert_utf16_to_utf32_with_errors(buf, len, - utf32_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = - scalar::utf16_to_utf32::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf32_output; // Set count to the number of 8-bit code units written - return ret.first; -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::convert_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lasx_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lasx_convert_utf32_to_latin1_with_errors(buf, len, latin1_output); - if (ret.first.error) { - return ret.first; - } // Can return directly since scalar fallback already found correct - // ret.first.count - if (ret.first.count != len) { // All good so far, but not finished - result scalar_res = scalar::utf32_to_latin1::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - latin1_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1( - const char32_t *buf, size_t len, char *latin1_output) const noexcept { - std::pair ret = - lasx_convert_utf32_to_latin1(buf, len, latin1_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - latin1_output; - - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert_valid( - ret.first, len - (ret.first - buf), ret.second); - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8( - const char32_t *buf, size_t len, char *utf8_output) const noexcept { - // optimization opportunity: implement a custom function. - return convert_utf32_to_utf8(buf, len, utf8_output); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - lasx_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - - return saved_bytes; -} - -simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - std::pair ret = - lasx_convert_utf32_to_utf16(buf, len, utf16_output); - if (ret.first == nullptr) { - return 0; - } - size_t saved_bytes = ret.second - utf16_output; - if (ret.first != buf + len) { - const size_t scalar_saved_bytes = - scalar::utf32_to_utf16::convert( - ret.first, len - (ret.first - buf), ret.second); - if (scalar_saved_bytes == 0) { - return 0; - } - saved_bytes += scalar_saved_bytes; - } - return saved_bytes; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lasx_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - // ret.first.count is always the position in the buffer, not the number of - // code units written even if finished - std::pair ret = - lasx_convert_utf32_to_utf16_with_errors(buf, len, - utf16_output); - if (ret.first.count != len) { - result scalar_res = - scalar::utf32_to_utf16::convert_with_errors( - buf + ret.first.count, len - ret.first.count, ret.second); - if (scalar_res.error) { - scalar_res.count += ret.first.count; - return scalar_res; - } else { - ret.second += scalar_res.count; - } - } - ret.first.count = - ret.second - - utf16_output; // Set count to the number of 8-bit code units written - return ret.first; -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16le(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be( - const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept { - return convert_utf32_to_utf16be(buf, len, utf16_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16le_to_utf32(buf, len, utf32_output); -} - -simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32( - const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept { - return convert_utf16be_to_utf32(buf, len, utf32_output); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -void implementation::change_endianness_utf16(const char16_t *input, - size_t length, - char16_t *output) const noexcept { - utf16::change_endianness_utf16(input, length, output); -} - -simdutf_warn_unused size_t implementation::count_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} - -simdutf_warn_unused size_t implementation::count_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -simdutf_warn_unused size_t -implementation::count_utf8(const char *input, size_t length) const noexcept { - size_t pos = 0; - size_t count = 0; - // Performance degradation when memory address is not 32-byte aligned - while ((((uint64_t)input + pos) & 0x1F && pos < length)) { - if (input[pos++] > -65) { - count++; - } - } - __m256i v_bf = __lasx_xvldi(0xBF); // 0b10111111 - for (; pos + 32 <= length; pos += 32) { - __m256i in = __lasx_xvld(reinterpret_cast(input + pos), 0); - __m256i utf8_count = - __lasx_xvpcnt_h(__lasx_xvmskltz_b(__lasx_xvslt_b(v_bf, in))); - count = count + __lasx_xvpickve2gr_wu(utf8_count, 0) + - __lasx_xvpickve2gr_wu(utf8_count, 4); - } - return count + scalar::utf8::count_code_points(input + pos, length - pos); -} -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::latin1_length_from_utf8( - const char *buf, size_t len) const noexcept { - return count_utf8(buf, len); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -simdutf_warn_unused size_t implementation::utf8_length_from_latin1( - const char *input, size_t length) const noexcept { - const uint8_t *data = reinterpret_cast(input); - const uint8_t *data_end = data + length; - uint64_t result = 0; - while (data_end - data > 16) { - uint64_t two_bytes = 0; - __m128i input_vec = __lsx_vld(data, 0); - two_bytes = - __lsx_vpickve2gr_hu(__lsx_vpcnt_h(__lsx_vmskltz_b(input_vec)), 0); - result += 16 + two_bytes; - data += 16; - } - return result + scalar::latin1::utf8_length_from_latin1((const char *)data, - data_end - data); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf8_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16_bytemask(input, - length); -} - -simdutf_warn_unused size_t implementation::utf8_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf8_length_from_utf16_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf16le( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} - -simdutf_warn_unused size_t implementation::utf32_length_from_utf16be( - const char16_t *input, size_t length) const noexcept { - return utf16::utf32_length_from_utf16(input, length); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -simdutf_warn_unused size_t implementation::utf16_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::utf16_length_from_utf8_bytemask(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf8_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - return utf32::utf8_length_from_utf32(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf16_length_from_utf32( - const char32_t *input, size_t length) const noexcept { - __m128i v_ffff = lsx_splat_u32(0x0000ffff); - size_t pos = 0; - size_t count = 0; - for (; pos + 4 <= length; pos += 4) { - __m128i in = __lsx_vld(reinterpret_cast(input + pos), 0); - __m128i surrogate_bytemask = __lsx_vslt_wu(v_ffff, in); - size_t surrogate_count = __lsx_vpickve2gr_bu( - __lsx_vpcnt_b(__lsx_vmskltz_w(surrogate_bytemask)), 0); - count += 4 + surrogate_count; - } - return count + - scalar::utf32::utf16_length_from_utf32(input + pos, length - pos); -} -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -simdutf_warn_unused size_t implementation::utf32_length_from_utf8( - const char *input, size_t length) const noexcept { - return utf8::count_code_points(input, length); -} -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_BASE64 -simdutf_warn_unused result implementation::base64_to_binary( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused result implementation::base64_to_binary( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -simdutf_warn_unused full_result implementation::base64_to_binary_details( - const char16_t *input, size_t length, char *output, base64_options options, - last_chunk_handling_options last_chunk_options) const noexcept { - if (options & base64_url) { - if (options == base64_options::base64_url_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } - } else { - if (options == base64_options::base64_default_accept_garbage) { - return compress_decode_base64(output, input, length, options, - last_chunk_options); - } else { - return compress_decode_base64(output, input, length, - options, last_chunk_options); - } - } -} - -size_t implementation::binary_to_base64(const char *input, size_t length, - char *output, - base64_options options) const noexcept { - if (options & base64_url) { - return encode_base64(output, input, length, options); - } else { - return encode_base64(output, input, length, options); - } -} -#endif // SIMDUTF_FEATURE_BASE64 -} // namespace lasx -} // namespace simdutf - -/* begin file src/simdutf/lasx/end.h */ -#undef SIMDUTF_SIMD_HAS_UNSIGNED_CMP -/* end file src/simdutf/lasx/end.h */ -/* end file src/lasx/implementation.cpp */ -#endif - -SIMDUTF_POP_DISABLE_WARNINGS -/* end file src/simdutf.cpp */ diff --git a/deps/simdutf/simdutf.gyp b/deps/simdutf/simdutf.gyp deleted file mode 100644 index ca8cd2fa7bc..00000000000 --- a/deps/simdutf/simdutf.gyp +++ /dev/null @@ -1,21 +0,0 @@ -{ - 'variables': { - 'simdutf_sources': [ - 'simdutf.cpp', - ] - }, - 'targets': [ - { - 'target_name': 'simdutf', - 'toolsets': ['host', 'target'], - 'type': 'static_library', - 'include_dirs': ['.'], - 'direct_dependent_settings': { - 'include_dirs': ['.'], - }, - 'sources': [ - '<@(simdutf_sources)', - ], - }, - ] -} diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h deleted file mode 100644 index 877c7d84ada..00000000000 --- a/deps/simdutf/simdutf.h +++ /dev/null @@ -1,6085 +0,0 @@ -/* auto-generated on 2025-04-14 21:04:55 -0400. Do not edit! */ -/* begin file include/simdutf.h */ -#ifndef SIMDUTF_H -#define SIMDUTF_H -#include - -/* begin file include/simdutf/compiler_check.h */ -#ifndef SIMDUTF_COMPILER_CHECK_H -#define SIMDUTF_COMPILER_CHECK_H - -#ifndef __cplusplus - #error simdutf requires a C++ compiler -#endif - -#ifndef SIMDUTF_CPLUSPLUS - #if defined(_MSVC_LANG) && !defined(__clang__) - #define SIMDUTF_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) - #else - #define SIMDUTF_CPLUSPLUS __cplusplus - #endif -#endif - -// C++ 23 -#if !defined(SIMDUTF_CPLUSPLUS23) && (SIMDUTF_CPLUSPLUS >= 202302L) - #define SIMDUTF_CPLUSPLUS23 1 -#endif - -// C++ 20 -#if !defined(SIMDUTF_CPLUSPLUS20) && (SIMDUTF_CPLUSPLUS >= 202002L) - #define SIMDUTF_CPLUSPLUS20 1 -#endif - -// C++ 17 -#if !defined(SIMDUTF_CPLUSPLUS17) && (SIMDUTF_CPLUSPLUS >= 201703L) - #define SIMDUTF_CPLUSPLUS17 1 -#endif - -// C++ 14 -#if !defined(SIMDUTF_CPLUSPLUS14) && (SIMDUTF_CPLUSPLUS >= 201402L) - #define SIMDUTF_CPLUSPLUS14 1 -#endif - -// C++ 11 -#if !defined(SIMDUTF_CPLUSPLUS11) && (SIMDUTF_CPLUSPLUS >= 201103L) - #define SIMDUTF_CPLUSPLUS11 1 -#endif - -#ifndef SIMDUTF_CPLUSPLUS11 - #error simdutf requires a compiler compliant with the C++11 standard -#endif - -#endif // SIMDUTF_COMPILER_CHECK_H -/* end file include/simdutf/compiler_check.h */ -/* begin file include/simdutf/common_defs.h */ -#ifndef SIMDUTF_COMMON_DEFS_H -#define SIMDUTF_COMMON_DEFS_H - -/* begin file include/simdutf/portability.h */ -#ifndef SIMDUTF_PORTABILITY_H -#define SIMDUTF_PORTABILITY_H - - -#include -#include -#include -#include -#ifndef _WIN32 - // strcasecmp, strncasecmp - #include -#endif - -#if defined(__apple_build_version__) - #if __apple_build_version__ < 14000000 - #define SIMDUTF_SPAN_DISABLED \ - 1 // apple-clang/13 doesn't support std::convertible_to - #endif -#endif - -#if SIMDUTF_CPLUSPLUS20 - #include - #if __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L && \ - !defined(SIMDUTF_SPAN_DISABLED) - #define SIMDUTF_SPAN 1 - #endif // __cpp_concepts >= 201907L && __cpp_lib_span >= 202002L - #if __cpp_lib_atomic_ref >= 201806L - #define SIMDUTF_ATOMIC_REF 1 - #endif // __cpp_lib_atomic_ref -#endif - -/** - * We want to check that it is actually a little endian system at - * compile-time. - */ - -#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) - #define SIMDUTF_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#elif defined(_WIN32) - #define SIMDUTF_IS_BIG_ENDIAN 0 -#else - #if defined(__APPLE__) || \ - defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined - // __ORDER_BIG_ENDIAN__ - #include - #elif defined(sun) || \ - defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) - #include - #else // defined(__APPLE__) || defined(__FreeBSD__) - - #ifdef __has_include - #if __has_include() - #include - #endif //__has_include() - #endif //__has_include - - #endif // defined(__APPLE__) || defined(__FreeBSD__) - - #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) - #define SIMDUTF_IS_BIG_ENDIAN 0 - #endif - - #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - #define SIMDUTF_IS_BIG_ENDIAN 0 - #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - #define SIMDUTF_IS_BIG_ENDIAN 1 - #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - -#endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ - -/** - * At this point in time, SIMDUTF_IS_BIG_ENDIAN is defined. - */ - -#ifdef _MSC_VER - #define SIMDUTF_VISUAL_STUDIO 1 - /** - * We want to differentiate carefully between - * clang under visual studio and regular visual - * studio. - * - * Under clang for Windows, we enable: - * * target pragmas so that part and only part of the - * code gets compiled for advanced instructions. - * - */ - #ifdef __clang__ - // clang under visual studio - #define SIMDUTF_CLANG_VISUAL_STUDIO 1 - #else - // just regular visual studio (best guess) - #define SIMDUTF_REGULAR_VISUAL_STUDIO 1 - #endif // __clang__ -#endif // _MSC_VER - -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - // https://en.wikipedia.org/wiki/C_alternative_tokens - // This header should have no effect, except maybe - // under Visual Studio. - #include -#endif - -#if (defined(__x86_64__) || defined(_M_AMD64)) && !defined(_M_ARM64EC) - #define SIMDUTF_IS_X86_64 1 -#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) - #define SIMDUTF_IS_ARM64 1 -#elif defined(__PPC64__) || defined(_M_PPC64) - #if defined(__VEC__) && defined(__ALTIVEC__) - #define SIMDUTF_IS_PPC64 1 - #endif -#elif defined(__s390__) -// s390 IBM system. Big endian. -#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 - // RISC-V 64-bit - #define SIMDUTF_IS_RISCV64 1 - - // #if __riscv_v_intrinsic >= 1000000 - // #define SIMDUTF_HAS_RVV_INTRINSICS 1 - // #define SIMDUTF_HAS_RVV_TARGET_REGION 1 - // #elif ... - // Check for special compiler versions that implement pre v1.0 intrinsics - #if __riscv_v_intrinsic >= 11000 - #define SIMDUTF_HAS_RVV_INTRINSICS 1 - #endif - - #define SIMDUTF_HAS_ZVBB_INTRINSICS \ - 0 // there is currently no way to detect this - - #if SIMDUTF_HAS_RVV_INTRINSICS && __riscv_vector && \ - __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64 - // RISC-V V extension - #define SIMDUTF_IS_RVV 1 - #if SIMDUTF_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000 - // RISC-V Vector Basic Bit-manipulation - #define SIMDUTF_IS_ZVBB 1 - #endif - #endif - -#elif defined(__loongarch_lp64) - #if defined(__loongarch_sx) && defined(__loongarch_asx) - #define SIMDUTF_IS_LSX 1 - #define SIMDUTF_IS_LASX 1 - #elif defined(__loongarch_sx) - #define SIMDUTF_IS_LSX 1 - #endif -#else - // The simdutf library is designed - // for 64-bit processors and it seems that you are not - // compiling for a known 64-bit platform. Please - // use a 64-bit target such as x64 or 64-bit ARM for best performance. - #define SIMDUTF_IS_32BITS 1 - - // We do not support 32-bit platforms, but it can be - // handy to identify them. - #if defined(_M_IX86) || defined(__i386__) - #define SIMDUTF_IS_X86_32BITS 1 - #elif defined(__arm__) || defined(_M_ARM) - #define SIMDUTF_IS_ARM_32BITS 1 - #elif defined(__PPC__) || defined(_M_PPC) - #define SIMDUTF_IS_PPC_32BITS 1 - #endif - -#endif // defined(__x86_64__) || defined(_M_AMD64) - -#ifdef SIMDUTF_IS_32BITS - #ifndef SIMDUTF_NO_PORTABILITY_WARNING - // In the future, we may want to warn users of 32-bit systems that - // the simdutf does not support accelerated kernels for such systems. - #endif // SIMDUTF_NO_PORTABILITY_WARNING -#endif // SIMDUTF_IS_32BITS - -// this is almost standard? -#define SIMDUTF_STRINGIFY_IMPLEMENTATION_(a) #a -#define SIMDUTF_STRINGIFY(a) SIMDUTF_STRINGIFY_IMPLEMENTATION_(a) - -// Our fast kernels require 64-bit systems. -// -// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. -// Furthermore, the number of SIMD registers is reduced. -// -// On 32-bit ARM, we would have smaller registers. -// -// The simdutf users should still have the fallback kernel. It is -// slower, but it should run everywhere. - -// -// Enable valid runtime implementations, and select -// SIMDUTF_BUILTIN_IMPLEMENTATION -// - -// We are going to use runtime dispatch. -#ifdef SIMDUTF_IS_X86_64 - #ifdef __clang__ - // clang does not have GCC push pop - // warning: clang attribute push can't be used within a namespace in clang - // up til 8.0 so SIMDUTF_TARGET_REGION and SIMDUTF_UNTARGET_REGION must be - // *outside* of a namespace. - #define SIMDUTF_TARGET_REGION(T) \ - _Pragma(SIMDUTF_STRINGIFY(clang attribute push( \ - __attribute__((target(T))), apply_to = function))) - #define SIMDUTF_UNTARGET_REGION _Pragma("clang attribute pop") - #elif defined(__GNUC__) - // GCC is easier - #define SIMDUTF_TARGET_REGION(T) \ - _Pragma("GCC push_options") _Pragma(SIMDUTF_STRINGIFY(GCC target(T))) - #define SIMDUTF_UNTARGET_REGION _Pragma("GCC pop_options") - #endif // clang then gcc - -#endif // x86 - -// Default target region macros don't do anything. -#ifndef SIMDUTF_TARGET_REGION - #define SIMDUTF_TARGET_REGION(T) - #define SIMDUTF_UNTARGET_REGION -#endif - -// Is threading enabled? -#if defined(_REENTRANT) || defined(_MT) - #ifndef SIMDUTF_THREADS_ENABLED - #define SIMDUTF_THREADS_ENABLED - #endif -#endif - -// workaround for large stack sizes under -O0. -// https://github.com/simdutf/simdutf/issues/691 -#ifdef __APPLE__ - #ifndef __OPTIMIZE__ - // Apple systems have small stack sizes in secondary threads. - // Lack of compiler optimization may generate high stack usage. - // Users may want to disable threads for safety, but only when - // in debug mode which we detect by the fact that the __OPTIMIZE__ - // macro is not defined. - #undef SIMDUTF_THREADS_ENABLED - #endif -#endif - -#ifdef SIMDUTF_VISUAL_STUDIO - // This is one case where we do not distinguish between - // regular visual studio and clang under visual studio. - // clang under Windows has _stricmp (like visual studio) but not strcasecmp - // (as clang normally has) - #define simdutf_strcasecmp _stricmp - #define simdutf_strncasecmp _strnicmp -#else - // The strcasecmp, strncasecmp, and strcasestr functions do not work with - // multibyte strings (e.g. UTF-8). So they are only useful for ASCII in our - // context. - // https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings - #define simdutf_strcasecmp strcasecmp - #define simdutf_strncasecmp strncasecmp -#endif - -#if defined(__GNUC__) && !defined(__clang__) - #if __GNUC__ >= 11 - #define SIMDUTF_GCC11ORMORE 1 - #endif // __GNUC__ >= 11 -#endif // defined(__GNUC__) && !defined(__clang__) - -#endif // SIMDUTF_PORTABILITY_H -/* end file include/simdutf/portability.h */ -/* begin file include/simdutf/avx512.h */ -#ifndef SIMDUTF_AVX512_H_ -#define SIMDUTF_AVX512_H_ - -/* - It's possible to override AVX512 settings with cmake DCMAKE_CXX_FLAGS. - - All preprocessor directives has form `SIMDUTF_HAS_AVX512{feature}`, - where a feature is a code name for extensions. - - Please see the listing below to find which are supported. -*/ - -#ifndef SIMDUTF_HAS_AVX512F - #if defined(__AVX512F__) && __AVX512F__ == 1 - #define SIMDUTF_HAS_AVX512F 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512DQ - #if defined(__AVX512DQ__) && __AVX512DQ__ == 1 - #define SIMDUTF_HAS_AVX512DQ 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512IFMA - #if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1 - #define SIMDUTF_HAS_AVX512IFMA 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512CD - #if defined(__AVX512CD__) && __AVX512CD__ == 1 - #define SIMDUTF_HAS_AVX512CD 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512BW - #if defined(__AVX512BW__) && __AVX512BW__ == 1 - #define SIMDUTF_HAS_AVX512BW 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512VL - #if defined(__AVX512VL__) && __AVX512VL__ == 1 - #define SIMDUTF_HAS_AVX512VL 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512VBMI - #if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1 - #define SIMDUTF_HAS_AVX512VBMI 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512VBMI2 - #if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1 - #define SIMDUTF_HAS_AVX512VBMI2 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512VNNI - #if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1 - #define SIMDUTF_HAS_AVX512VNNI 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512BITALG - #if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1 - #define SIMDUTF_HAS_AVX512BITALG 1 - #endif -#endif - -#ifndef SIMDUTF_HAS_AVX512VPOPCNTDQ - #if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1 - #define SIMDUTF_HAS_AVX512VPOPCNTDQ 1 - #endif -#endif - -#endif // SIMDUTF_AVX512_H_ -/* end file include/simdutf/avx512.h */ - -#if defined(SIMDUTF_REGULAR_VISUAL_STUDIO) - #define SIMDUTF_DEPRECATED __declspec(deprecated) - - #define simdutf_really_inline __forceinline // really inline in release mode - #define simdutf_always_inline __forceinline // always inline, no matter what - #define simdutf_never_inline __declspec(noinline) - - #define simdutf_unused - #define simdutf_warn_unused - - #ifndef simdutf_likely - #define simdutf_likely(x) x - #endif - #ifndef simdutf_unlikely - #define simdutf_unlikely(x) x - #endif - - #define SIMDUTF_PUSH_DISABLE_WARNINGS __pragma(warning(push)) - #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS __pragma(warning(push, 0)) - #define SIMDUTF_DISABLE_VS_WARNING(WARNING_NUMBER) \ - __pragma(warning(disable : WARNING_NUMBER)) - // Get rid of Intellisense-only warnings (Code Analysis) - // Though __has_include is C++17, it is supported in Visual Studio 2017 or - // better (_MSC_VER>=1910). - #ifdef __has_include - #if __has_include() - #include - #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS \ - SIMDUTF_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) - #endif - #endif - - #ifndef SIMDUTF_DISABLE_UNDESIRED_WARNINGS - #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS - #endif - - #define SIMDUTF_DISABLE_DEPRECATED_WARNING SIMDUTF_DISABLE_VS_WARNING(4996) - #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING - #define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning(pop)) - -#else // SIMDUTF_REGULAR_VISUAL_STUDIO - #if defined(__OPTIMIZE__) || defined(NDEBUG) - #define simdutf_really_inline inline __attribute__((always_inline)) - #else - #define simdutf_really_inline inline - #endif - #define simdutf_always_inline \ - inline __attribute__((always_inline)) // always inline, no matter what - #define SIMDUTF_DEPRECATED __attribute__((deprecated)) - #define simdutf_never_inline inline __attribute__((noinline)) - - #define simdutf_unused __attribute__((unused)) - #define simdutf_warn_unused __attribute__((warn_unused_result)) - - #ifndef simdutf_likely - #define simdutf_likely(x) __builtin_expect(!!(x), 1) - #endif - #ifndef simdutf_unlikely - #define simdutf_unlikely(x) __builtin_expect(!!(x), 0) - #endif - - // clang-format off - #define SIMDUTF_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") - // gcc doesn't seem to disable all warnings with all and extra, add warnings - // here as necessary - #define SIMDUTF_PUSH_DISABLE_ALL_WARNINGS \ - SIMDUTF_PUSH_DISABLE_WARNINGS \ - SIMDUTF_DISABLE_GCC_WARNING(-Weffc++) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wall) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wconversion) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wextra) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wattributes) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wreturn-type) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wshadow) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wunused-parameter) \ - SIMDUTF_DISABLE_GCC_WARNING(-Wunused-variable) - #define SIMDUTF_PRAGMA(P) _Pragma(#P) - #define SIMDUTF_DISABLE_GCC_WARNING(WARNING) \ - SIMDUTF_PRAGMA(GCC diagnostic ignored #WARNING) - #if defined(SIMDUTF_CLANG_VISUAL_STUDIO) - #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS \ - SIMDUTF_DISABLE_GCC_WARNING(-Wmicrosoft-include) - #else - #define SIMDUTF_DISABLE_UNDESIRED_WARNINGS - #endif - #define SIMDUTF_DISABLE_DEPRECATED_WARNING \ - SIMDUTF_DISABLE_GCC_WARNING(-Wdeprecated-declarations) - #define SIMDUTF_DISABLE_STRICT_OVERFLOW_WARNING \ - SIMDUTF_DISABLE_GCC_WARNING(-Wstrict-overflow) - #define SIMDUTF_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") - // clang-format on - -#endif // MSC_VER - -#ifndef SIMDUTF_DLLIMPORTEXPORT - #if defined(SIMDUTF_VISUAL_STUDIO) - /** - * It does not matter here whether you are using - * the regular visual studio or clang under visual - * studio. - */ - #if SIMDUTF_USING_LIBRARY - #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllimport) - #else - #define SIMDUTF_DLLIMPORTEXPORT __declspec(dllexport) - #endif - #else - #define SIMDUTF_DLLIMPORTEXPORT - #endif -#endif - -#endif // SIMDUTF_COMMON_DEFS_H -/* end file include/simdutf/common_defs.h */ -/* begin file include/simdutf/encoding_types.h */ -#ifndef SIMDUTF_ENCODING_TYPES_H -#define SIMDUTF_ENCODING_TYPES_H -#include - -namespace simdutf { - -enum encoding_type { - UTF8 = 1, // BOM 0xef 0xbb 0xbf - UTF16_LE = 2, // BOM 0xff 0xfe - UTF16_BE = 4, // BOM 0xfe 0xff - UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00 - UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff - Latin1 = 32, - - unspecified = 0 -}; - -enum endianness { LITTLE = 0, BIG = 1 }; - -bool match_system(endianness e); - -std::string to_string(encoding_type bom); - -// Note that BOM for UTF8 is discouraged. -namespace BOM { - -/** - * Checks for a BOM. If not, returns unspecified - * @param input the string to process - * @param length the length of the string in code units - * @return the corresponding encoding - */ - -encoding_type check_bom(const uint8_t *byte, size_t length); -encoding_type check_bom(const char *byte, size_t length); -/** - * Returns the size, in bytes, of the BOM for a given encoding type. - * Note that UTF8 BOM are discouraged. - * @param bom the encoding type - * @return the size in bytes of the corresponding BOM - */ -size_t bom_byte_size(encoding_type bom); - -} // namespace BOM -} // namespace simdutf -#endif -/* end file include/simdutf/encoding_types.h */ -/* begin file include/simdutf/error.h */ -#ifndef SIMDUTF_ERROR_H -#define SIMDUTF_ERROR_H -namespace simdutf { - -enum error_code { - SUCCESS = 0, - HEADER_BITS, // Any byte must have fewer than 5 header bits. - TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes, - // where N is the UTF-8 character length This is also the error - // when the input is truncated. - TOO_LONG, // We either have too many consecutive continuation bytes or the - // string starts with a continuation byte. - OVERLONG, // The decoded character must be above U+7F for two-byte characters, - // U+7FF for three-byte characters, and U+FFFF for four-byte - // characters. - TOO_LARGE, // The decoded character must be less than or equal to - // U+10FFFF,less than or equal than U+7F for ASCII OR less than - // equal than U+FF for Latin1 - SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or - // UTF-32) OR a high surrogate must be followed by a low surrogate - // and a low surrogate must be preceded by a high surrogate - // (UTF-16) OR there must be no surrogate at all (Latin1) - INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid - // base64 string. This may include a misplaced - // padding character ('='). - BASE64_INPUT_REMAINDER, // The base64 input terminates with a single - // character, excluding padding (=). - BASE64_EXTRA_BITS, // The base64 input terminates with non-zero - // padding bits. - OUTPUT_BUFFER_TOO_SMALL, // The provided buffer is too small. - OTHER // Not related to validation/transcoding. -}; - -struct result { - error_code error; - size_t count; // In case of error, indicates the position of the error. In - // case of success, indicates the number of code units - // validated/written. - - simdutf_really_inline result() : error{error_code::SUCCESS}, count{0} {} - - simdutf_really_inline result(error_code err, size_t pos) - : error{err}, count{pos} {} - - simdutf_really_inline bool is_ok() const { - return error == error_code::SUCCESS; - } - - simdutf_really_inline bool is_err() const { - return error != error_code::SUCCESS; - } -}; - -struct full_result { - error_code error; - size_t input_count; - size_t output_count; - - simdutf_really_inline full_result() - : error{error_code::SUCCESS}, input_count{0}, output_count{0} {} - - simdutf_really_inline full_result(error_code err, size_t pos_in, - size_t pos_out) - : error{err}, input_count{pos_in}, output_count{pos_out} {} - - simdutf_really_inline operator result() const noexcept { - if (error == error_code::SUCCESS || - error == error_code::BASE64_INPUT_REMAINDER) { - return result{error, output_count}; - } else { - return result{error, input_count}; - } - } -}; - -} // namespace simdutf -#endif -/* end file include/simdutf/error.h */ - -SIMDUTF_PUSH_DISABLE_WARNINGS -SIMDUTF_DISABLE_UNDESIRED_WARNINGS - -// Public API -/* begin file include/simdutf/simdutf_version.h */ -// /include/simdutf/simdutf_version.h automatically generated by release.py, -// do not change by hand -#ifndef SIMDUTF_SIMDUTF_VERSION_H -#define SIMDUTF_SIMDUTF_VERSION_H - -/** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION "6.5.0" - -namespace simdutf { -enum { - /** - * The major version (MAJOR.minor.revision) of simdutf being used. - */ - SIMDUTF_VERSION_MAJOR = 6, - /** - * The minor version (major.MINOR.revision) of simdutf being used. - */ - SIMDUTF_VERSION_MINOR = 5, - /** - * The revision (major.minor.REVISION) of simdutf being used. - */ - SIMDUTF_VERSION_REVISION = 0 -}; -} // namespace simdutf - -#endif // SIMDUTF_SIMDUTF_VERSION_H -/* end file include/simdutf/simdutf_version.h */ -/* begin file include/simdutf/implementation.h */ -#ifndef SIMDUTF_IMPLEMENTATION_H -#define SIMDUTF_IMPLEMENTATION_H -#if !defined(SIMDUTF_NO_THREADS) - #include -#endif -#include -#include -/* begin file include/simdutf/internal/isadetection.h */ -/* From -https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h -Highly modified. - -Copyright (c) 2016- Facebook, Inc (Adam Paszke) -Copyright (c) 2014- Facebook, Inc (Soumith Chintala) -Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) -Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) -Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) -Copyright (c) 2011-2013 NYU (Clement Farabet) -Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, -Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute -(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, -Samy Bengio, Johnny Mariethoz) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories -America and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef SIMDutf_INTERNAL_ISADETECTION_H -#define SIMDutf_INTERNAL_ISADETECTION_H - -#include -#include -#if defined(_MSC_VER) - #include -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) - #include -#endif - - -// RISC-V ISA detection utilities -#if SIMDUTF_IS_RISCV64 && defined(__linux__) - #include // for syscall -// We define these ourselves, for backwards compatibility -struct simdutf_riscv_hwprobe { - int64_t key; - uint64_t value; -}; - #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__) - #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4 - #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2) - #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17) -#endif // SIMDUTF_IS_RISCV64 && defined(__linux__) - -#if defined(__loongarch__) && defined(__linux__) - #include -// bits/hwcap.h -// #define HWCAP_LOONGARCH_LSX (1 << 4) -// #define HWCAP_LOONGARCH_LASX (1 << 5) -#endif - -namespace simdutf { -namespace internal { - -enum instruction_set { - DEFAULT = 0x0, - NEON = 0x1, - AVX2 = 0x4, - SSE42 = 0x8, - PCLMULQDQ = 0x10, - BMI1 = 0x20, - BMI2 = 0x40, - ALTIVEC = 0x80, - AVX512F = 0x100, - AVX512DQ = 0x200, - AVX512IFMA = 0x400, - AVX512PF = 0x800, - AVX512ER = 0x1000, - AVX512CD = 0x2000, - AVX512BW = 0x4000, - AVX512VL = 0x8000, - AVX512VBMI2 = 0x10000, - AVX512VPOPCNTDQ = 0x2000, - RVV = 0x4000, - ZVBB = 0x8000, - LSX = 0x40000, - LASX = 0x80000, -}; - -#if defined(__PPC64__) - -static inline uint32_t detect_supported_architectures() { - return instruction_set::ALTIVEC; -} - -#elif SIMDUTF_IS_RISCV64 - -static inline uint32_t detect_supported_architectures() { - uint32_t host_isa = instruction_set::DEFAULT; - #if SIMDUTF_IS_RVV - host_isa |= instruction_set::RVV; - #endif - #if SIMDUTF_IS_ZVBB - host_isa |= instruction_set::ZVBB; - #endif - #if defined(__linux__) - simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}}; - long ret = simdutf_riscv_hwprobe(&probes, sizeof probes / sizeof *probes, 0, - nullptr, 0); - if (ret == 0) { - uint64_t extensions = probes[0].value; - if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V) - host_isa |= instruction_set::RVV; - if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB) - host_isa |= instruction_set::ZVBB; - } - #endif - #if defined(RUN_IN_SPIKE_SIMULATOR) - // Proxy Kernel does not implement yet hwprobe syscall - host_isa |= instruction_set::RVV; - #endif - return host_isa; -} - -#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) - -static inline uint32_t detect_supported_architectures() { - return instruction_set::NEON; -} - -#elif defined(__x86_64__) || defined(_M_AMD64) // x64 - -namespace { -namespace cpuid_bit { -// Can be found on Intel ISA Reference for CPUID - -// EAX = 0x01 -constexpr uint32_t pclmulqdq = uint32_t(1) - << 1; ///< @private bit 1 of ECX for EAX=0x1 -constexpr uint32_t sse42 = uint32_t(1) - << 20; ///< @private bit 20 of ECX for EAX=0x1 -constexpr uint32_t osxsave = - (uint32_t(1) << 26) | - (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1 - -// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf) -// See: "Table 3-8. Information Returned by CPUID Instruction" -namespace ebx { -constexpr uint32_t bmi1 = uint32_t(1) << 3; -constexpr uint32_t avx2 = uint32_t(1) << 5; -constexpr uint32_t bmi2 = uint32_t(1) << 8; -constexpr uint32_t avx512f = uint32_t(1) << 16; -constexpr uint32_t avx512dq = uint32_t(1) << 17; -constexpr uint32_t avx512ifma = uint32_t(1) << 21; -constexpr uint32_t avx512cd = uint32_t(1) << 28; -constexpr uint32_t avx512bw = uint32_t(1) << 30; -constexpr uint32_t avx512vl = uint32_t(1) << 31; -} // namespace ebx - -namespace ecx { -constexpr uint32_t avx512vbmi = uint32_t(1) << 1; -constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6; -constexpr uint32_t avx512vnni = uint32_t(1) << 11; -constexpr uint32_t avx512bitalg = uint32_t(1) << 12; -constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14; -} // namespace ecx -namespace edx { -constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8; -} -namespace xcr0_bit { -constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX -constexpr uint64_t avx512_saved = - uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM -} // namespace xcr0_bit -} // namespace cpuid_bit -} // namespace - -static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, - uint32_t *edx) { - #if defined(_MSC_VER) - int cpu_info[4]; - __cpuidex(cpu_info, *eax, *ecx); - *eax = cpu_info[0]; - *ebx = cpu_info[1]; - *ecx = cpu_info[2]; - *edx = cpu_info[3]; - #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) - uint32_t level = *eax; - __get_cpuid(level, eax, ebx, ecx, edx); - #else - uint32_t a = *eax, b, c = *ecx, d; - asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); - *eax = a; - *ebx = b; - *ecx = c; - *edx = d; - #endif -} - -static inline uint64_t xgetbv() { - #if defined(_MSC_VER) - return _xgetbv(0); - #else - uint32_t xcr0_lo, xcr0_hi; - asm volatile("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0)); - return xcr0_lo | ((uint64_t)xcr0_hi << 32); - #endif -} - -static inline uint32_t detect_supported_architectures() { - uint32_t eax; - uint32_t ebx = 0; - uint32_t ecx = 0; - uint32_t edx = 0; - uint32_t host_isa = 0x0; - - // EBX for EAX=0x1 - eax = 0x1; - cpuid(&eax, &ebx, &ecx, &edx); - - if (ecx & cpuid_bit::sse42) { - host_isa |= instruction_set::SSE42; - } - - if (ecx & cpuid_bit::pclmulqdq) { - host_isa |= instruction_set::PCLMULQDQ; - } - - if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) { - return host_isa; - } - - // xgetbv for checking if the OS saves registers - uint64_t xcr0 = xgetbv(); - - if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) { - return host_isa; - } - // ECX for EAX=0x7 - eax = 0x7; - ecx = 0x0; // Sub-leaf = 0 - cpuid(&eax, &ebx, &ecx, &edx); - if (ebx & cpuid_bit::ebx::avx2) { - host_isa |= instruction_set::AVX2; - } - if (ebx & cpuid_bit::ebx::bmi1) { - host_isa |= instruction_set::BMI1; - } - if (ebx & cpuid_bit::ebx::bmi2) { - host_isa |= instruction_set::BMI2; - } - if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == - cpuid_bit::xcr0_bit::avx512_saved)) { - return host_isa; - } - if (ebx & cpuid_bit::ebx::avx512f) { - host_isa |= instruction_set::AVX512F; - } - if (ebx & cpuid_bit::ebx::avx512bw) { - host_isa |= instruction_set::AVX512BW; - } - if (ebx & cpuid_bit::ebx::avx512cd) { - host_isa |= instruction_set::AVX512CD; - } - if (ebx & cpuid_bit::ebx::avx512dq) { - host_isa |= instruction_set::AVX512DQ; - } - if (ebx & cpuid_bit::ebx::avx512vl) { - host_isa |= instruction_set::AVX512VL; - } - if (ecx & cpuid_bit::ecx::avx512vbmi2) { - host_isa |= instruction_set::AVX512VBMI2; - } - if (ecx & cpuid_bit::ecx::avx512vpopcnt) { - host_isa |= instruction_set::AVX512VPOPCNTDQ; - } - return host_isa; -} -#elif defined(__loongarch__) - -static inline uint32_t detect_supported_architectures() { - uint32_t host_isa = instruction_set::DEFAULT; - #if defined(__linux__) - uint64_t hwcap = 0; - hwcap = getauxval(AT_HWCAP); - if (hwcap & HWCAP_LOONGARCH_LSX) { - host_isa |= instruction_set::LSX; - } - if (hwcap & HWCAP_LOONGARCH_LASX) { - host_isa |= instruction_set::LASX; - } - #endif - return host_isa; -} -#else // fallback - -// includes 32-bit ARM. -static inline uint32_t detect_supported_architectures() { - return instruction_set::DEFAULT; -} - -#endif // end SIMD extension detection code - -} // namespace internal -} // namespace simdutf - -#endif // SIMDutf_INTERNAL_ISADETECTION_H -/* end file include/simdutf/internal/isadetection.h */ - -#if SIMDUTF_SPAN - #include - #include - #include -#endif - -// The following defines are conditionally enabled/disabled during amalgamation. -// By default all features are enabled, regular code shouldn't check them. Only -// when user code really relies of a selected subset, it's good to verify these -// flags, like: -// -// #if !SIMDUTF_FEATURE_UTF16 -// # error("Please amalgamate simdutf with UTF-16 support") -// #endif -// -#define SIMDUTF_FEATURE_DETECT_ENCODING 1 -#define SIMDUTF_FEATURE_ASCII 1 -#define SIMDUTF_FEATURE_LATIN1 1 -#define SIMDUTF_FEATURE_UTF8 1 -#define SIMDUTF_FEATURE_UTF16 1 -#define SIMDUTF_FEATURE_UTF32 1 -#define SIMDUTF_FEATURE_BASE64 1 - -namespace simdutf { - -#if SIMDUTF_SPAN -/// helpers placed in namespace detail are not a part of the public API -namespace detail { -/** - * matches a byte, in the many ways C++ allows. note that these - * are all distinct types. - */ -template -concept byte_like = std::is_same_v || // - std::is_same_v || // - std::is_same_v || // - std::is_same_v; - -template -concept is_byte_like = byte_like>; - -template -concept is_pointer = std::is_pointer_v; - -/** - * matches anything that behaves like std::span and points to character-like - * data such as: std::byte, char, unsigned char, signed char, std::int8_t, - * std::uint8_t - */ -template -concept input_span_of_byte_like = requires(const T &t) { - { t.size() } noexcept -> std::convertible_to; - { t.data() } noexcept -> is_pointer; - { *t.data() } noexcept -> is_byte_like; -}; - -template -concept is_mutable = !std::is_const_v>; - -/** - * like span_of_byte_like, but for an output span (intended to be written to) - */ -template -concept output_span_of_byte_like = requires(T &t) { - { t.size() } noexcept -> std::convertible_to; - { t.data() } noexcept -> is_pointer; - { *t.data() } noexcept -> is_byte_like; - { *t.data() } noexcept -> is_mutable; -}; -} // namespace detail -#endif - -#if SIMDUTF_FEATURE_DETECT_ENCODING -/** - * Autodetect the encoding of the input, a single encoding is recommended. - * E.g., the function might return simdutf::encoding_type::UTF8, - * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or - * simdutf::encoding_type::UTF32_LE. - * - * @param input the string to analyze. - * @param length the length of the string in bytes. - * @return the detected encoding type - */ -simdutf_warn_unused simdutf::encoding_type -autodetect_encoding(const char *input, size_t length) noexcept; -simdutf_really_inline simdutf_warn_unused simdutf::encoding_type -autodetect_encoding(const uint8_t *input, size_t length) noexcept { - return autodetect_encoding(reinterpret_cast(input), length); -} - #if SIMDUTF_SPAN -/** - * Autodetect the encoding of the input, a single encoding is recommended. - * E.g., the function might return simdutf::encoding_type::UTF8, - * simdutf::encoding_type::UTF16_LE, simdutf::encoding_type::UTF16_BE, or - * simdutf::encoding_type::UTF32_LE. - * - * @param input the string to analyze. can be a anything span-like that has a - * data() and size() that points to character data: std::string, - * std::string_view, std::vector, std::span etc. - * @return the detected encoding type - */ -simdutf_really_inline simdutf_warn_unused simdutf::encoding_type -autodetect_encoding( - const detail::input_span_of_byte_like auto &input) noexcept { - return autodetect_encoding(reinterpret_cast(input.data()), - input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Autodetect the possible encodings of the input in one pass. - * E.g., if the input might be UTF-16LE or UTF-8, this function returns - * the value (simdutf::encoding_type::UTF8 | simdutf::encoding_type::UTF16_LE). - * - * Overridden by each implementation. - * - * @param input the string to analyze. - * @param length the length of the string in bytes. - * @return the detected encoding type - */ -simdutf_warn_unused int detect_encodings(const char *input, - size_t length) noexcept; -simdutf_really_inline simdutf_warn_unused int -detect_encodings(const uint8_t *input, size_t length) noexcept { - return detect_encodings(reinterpret_cast(input), length); -} - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused int -detect_encodings(const detail::input_span_of_byte_like auto &input) noexcept { - return detect_encodings(reinterpret_cast(input.data()), - input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING -/** - * Validate the UTF-8 string. This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf8_with_errors. - * - * Overridden by each implementation. - * - * @param buf the UTF-8 string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid UTF-8. - */ -simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool -validate_utf8(const detail::input_span_of_byte_like auto &input) noexcept { - return validate_utf8(reinterpret_cast(input.data()), - input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 -/** - * Validate the UTF-8 string and stop on error. - * - * Overridden by each implementation. - * - * @param buf the UTF-8 string to validate. - * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated if - * successful. - */ -simdutf_warn_unused result validate_utf8_with_errors(const char *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result validate_utf8_with_errors( - const detail::input_span_of_byte_like auto &input) noexcept { - return validate_utf8_with_errors(reinterpret_cast(input.data()), - input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII -/** - * Validate the ASCII string. - * - * Overridden by each implementation. - * - * @param buf the ASCII string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid ASCII. - */ -simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool -validate_ascii(const detail::input_span_of_byte_like auto &input) noexcept { - return validate_ascii(reinterpret_cast(input.data()), - input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Validate the ASCII string and stop on error. It might be faster than - * validate_utf8 when an error is expected to occur early. - * - * Overridden by each implementation. - * - * @param buf the ASCII string to validate. - * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated if - * successful. - */ -simdutf_warn_unused result validate_ascii_with_errors(const char *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result validate_ascii_with_errors( - const detail::input_span_of_byte_like auto &input) noexcept { - return validate_ascii_with_errors( - reinterpret_cast(input.data()), input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 -/** - * Using native endianness; Validate the UTF-16 string. - * This function may be best when you expect the input to be almost always - * valid. Otherwise, consider using validate_utf16_with_errors. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16 string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return true if and only if the string is valid UTF-16. - */ -simdutf_warn_unused bool validate_utf16(const char16_t *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool -validate_utf16(std::span input) noexcept { - return validate_utf16(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING -/** - * Validate the UTF-16LE string. This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf16le_with_errors. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16LE string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return true if and only if the string is valid UTF-16LE. - */ -simdutf_warn_unused bool validate_utf16le(const char16_t *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool -validate_utf16le(std::span input) noexcept { - return validate_utf16le(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 -/** - * Validate the UTF-16BE string. This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf16be_with_errors. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16BE string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return true if and only if the string is valid UTF-16BE. - */ -simdutf_warn_unused bool validate_utf16be(const char16_t *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool -validate_utf16be(std::span input) noexcept { - return validate_utf16be(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Using native endianness; Validate the UTF-16 string and stop on error. - * It might be faster than validate_utf16 when an error is expected to occur - * early. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16 string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated if - * successful. - */ -simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -validate_utf16_with_errors(std::span input) noexcept { - return validate_utf16_with_errors(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Validate the UTF-16LE string and stop on error. It might be faster than - * validate_utf16le when an error is expected to occur early. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16LE string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated if - * successful. - */ -simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -validate_utf16le_with_errors(std::span input) noexcept { - return validate_utf16le_with_errors(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Validate the UTF-16BE string and stop on error. It might be faster than - * validate_utf16be when an error is expected to occur early. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16BE string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated if - * successful. - */ -simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -validate_utf16be_with_errors(std::span input) noexcept { - return validate_utf16be_with_errors(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Fixes an ill-formed UTF-16LE string by replacing mismatched surrogates with - * the Unicode replacement character U+FFFD. If input and output points to - * different memory areas, the procedure copies string, and it's expected that - * output memory is at least as big as the input. It's also possible to set - * input equal output, that makes replacements an in-place operation. - * - * @param input the UTF-16LE string to correct. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @param output the output buffer. - */ -void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline void -to_well_formed_utf16le(std::span input, - std::span output) noexcept { - to_well_formed_utf16le(input.data(), input.size(), output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Fixes an ill-formed UTF-16BE string by replacing mismatched surrogates with - * the Unicode replacement character U+FFFD. If input and output points to - * different memory areas, the procedure copies string, and it's expected that - * output memory is at least as big as the input. It's also possible to set - * input equal output, that makes replacements an in-place operation. - * - * @param input the UTF-16BE string to correct. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @param output the output buffer. - */ -void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline void -to_well_formed_utf16be(std::span input, - std::span output) noexcept { - to_well_formed_utf16be(input.data(), input.size(), output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Fixes an ill-formed UTF-16 string by replacing mismatched surrogates with the - * Unicode replacement character U+FFFD. If input and output points to different - * memory areas, the procedure copies string, and it's expected that output - * memory is at least as big as the input. It's also possible to set input equal - * output, that makes replacements an in-place operation. - * - * @param input the UTF-16 string to correct. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @param output the output buffer. - */ -void to_well_formed_utf16(const char16_t *input, size_t len, - char16_t *output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline void -to_well_formed_utf16(std::span input, - std::span output) noexcept { - to_well_formed_utf16(input.data(), input.size(), output.data()); -} - #endif // SIMDUTF_SPAN - -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING -/** - * Validate the UTF-32 string. This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf32_with_errors. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-32 string to validate. - * @param len the length of the string in number of 4-byte code units - * (char32_t). - * @return true if and only if the string is valid UTF-32. - */ -simdutf_warn_unused bool validate_utf32(const char32_t *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused bool -validate_utf32(std::span input) noexcept { - return validate_utf32(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 -/** - * Validate the UTF-32 string and stop on error. It might be faster than - * validate_utf32 when an error is expected to occur early. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-32 string to validate. - * @param len the length of the string in number of 4-byte code units - * (char32_t). - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated if - * successful. - */ -simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, - size_t len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -validate_utf32_with_errors(std::span input) noexcept { - return validate_utf32_with_errors(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/** - * Convert Latin1 string into UTF8 string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf8_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_latin1_to_utf8(const char *input, - size_t length, - char *utf8_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8( - const detail::input_span_of_byte_like auto &latin1_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_latin1_to_utf8( - reinterpret_cast(latin1_input.data()), latin1_input.size(), - utf8_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert Latin1 string into UTF8 string with output limit. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf8_output the pointer to buffer that can hold conversion result - * @param utf8_len the maximum output length - * @return the number of written char; 0 if conversion is not possible - */ -simdutf_warn_unused size_t -convert_latin1_to_utf8_safe(const char *input, size_t length, char *utf8_output, - size_t utf8_len) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf8_safe( - const detail::input_span_of_byte_like auto &input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - // implementation note: outputspan is a forwarding ref to avoid copying and - // allow both lvalues and rvalues. std::span can be copied without problems, - // but std::vector should not, and this function should accept both. it will - // allow using an owning rvalue ref (example: passing a temporary std::string) - // as output, but the user will quickly find out that he has no way of getting - // the data out of the object in that case. - return convert_latin1_to_utf8_safe( - input.data(), input.size(), reinterpret_cast(utf8_output.data()), - utf8_output.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/** - * Convert possibly Latin1 string into UTF-16LE string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_latin1_to_utf16le( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf16le( - const detail::input_span_of_byte_like auto &latin1_input, - std::span utf16_output) noexcept { - return convert_latin1_to_utf16le( - reinterpret_cast(latin1_input.data()), latin1_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert Latin1 string into UTF-16BE string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_latin1_to_utf16be( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_latin1_to_utf16be(const detail::input_span_of_byte_like auto &input, - std::span output) noexcept { - return convert_latin1_to_utf16be(reinterpret_cast(input.data()), - input.size(), output.data()); -} - #endif // SIMDUTF_SPAN -/** - * Compute the number of bytes that this UTF-16 string would require in Latin1 - * format. - * - * @param length the length of the string in Latin1 code units (char) - * @return the length of the string in Latin1 code units (char) required to - * encode the UTF-16 string as Latin1 - */ -simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; - -/** - * Compute the number of code units that this Latin1 string would require in - * UTF-16 format. - * - * @param length the length of the string in Latin1 code units (char) - * @return the length of the string in 2-byte code units (char16_t) required to - * encode the Latin1 string as UTF-16 - */ -simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/** - * Convert Latin1 string into UTF-32 string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return the number of written char32_t; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_latin1_to_utf32( - const char *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_latin1_to_utf32( - const detail::input_span_of_byte_like auto &latin1_input, - std::span utf32_output) noexcept { - return convert_latin1_to_utf32( - reinterpret_cast(latin1_input.data()), latin1_input.size(), - utf32_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/** - * Convert possibly broken UTF-8 string into latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if the input was not valid UTF-8 string - * or if it cannot be represented as Latin1 - */ -simdutf_warn_unused size_t convert_utf8_to_latin1(const char *input, - size_t length, - char *latin1_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf8_to_latin1( - const detail::input_span_of_byte_like auto &input, - detail::output_span_of_byte_like auto &&output) noexcept { - return convert_utf8_to_latin1(reinterpret_cast(input.data()), - input.size(), - reinterpret_cast(output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Using native endianness, convert possibly broken UTF-8 string into a UTF-16 - * string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 - * string - */ -simdutf_warn_unused size_t convert_utf8_to_utf16( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf8_to_utf16(const detail::input_span_of_byte_like auto &input, - std::span output) noexcept { - return convert_utf8_to_utf16(reinterpret_cast(input.data()), - input.size(), output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/** - * Using native endianness, convert a Latin1 string into a UTF-16 string. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t. - */ -simdutf_warn_unused size_t convert_latin1_to_utf16( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_latin1_to_utf16(const detail::input_span_of_byte_like auto &input, - std::span output) noexcept { - return convert_latin1_to_utf16(reinterpret_cast(input.data()), - input.size(), output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Convert possibly broken UTF-8 string into UTF-16LE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 - * string - */ -simdutf_warn_unused size_t convert_utf8_to_utf16le( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf8_to_utf16le(const detail::input_span_of_byte_like auto &utf8_input, - std::span utf16_output) noexcept { - return convert_utf8_to_utf16le( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-8 string into UTF-16BE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 - * string - */ -simdutf_warn_unused size_t convert_utf8_to_utf16be( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf8_to_utf16be(const detail::input_span_of_byte_like auto &utf8_input, - std::span utf16_output) noexcept { - return convert_utf8_to_utf16be( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/** - * Convert possibly broken UTF-8 string into latin1 string with errors. - * If the string cannot be represented as Latin1, an error - * code is returned. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated if - * successful. - */ -simdutf_warn_unused result convert_utf8_to_latin1_with_errors( - const char *input, size_t length, char *latin1_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf8_to_latin1_with_errors( - const detail::input_span_of_byte_like auto &utf8_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf8_to_latin1_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Using native endianness, convert possibly broken UTF-8 string into UTF-16 - * string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char16_t written if - * successful. - */ -simdutf_warn_unused result convert_utf8_to_utf16_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf8_to_utf16_with_errors( - const detail::input_span_of_byte_like auto &utf8_input, - std::span utf16_output) noexcept { - return convert_utf8_to_utf16_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-8 string into UTF-16LE string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char16_t written if - * successful. - */ -simdutf_warn_unused result convert_utf8_to_utf16le_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf8_to_utf16le_with_errors( - const detail::input_span_of_byte_like auto &utf8_input, - std::span utf16_output) noexcept { - return convert_utf8_to_utf16le_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-8 string into UTF-16BE string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char16_t written if - * successful. - */ -simdutf_warn_unused result convert_utf8_to_utf16be_with_errors( - const char *input, size_t length, char16_t *utf16_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf8_to_utf16be_with_errors( - const detail::input_span_of_byte_like auto &utf8_input, - std::span utf16_output) noexcept { - return convert_utf8_to_utf16be_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/** - * Convert possibly broken UTF-8 string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return the number of written char32_t; 0 if the input was not valid UTF-8 - * string - */ -simdutf_warn_unused size_t convert_utf8_to_utf32( - const char *input, size_t length, char32_t *utf32_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf8_to_utf32(const detail::input_span_of_byte_like auto &utf8_input, - std::span utf32_output) noexcept { - return convert_utf8_to_utf32( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char32_t written if - * successful. - */ -simdutf_warn_unused result convert_utf8_to_utf32_with_errors( - const char *input, size_t length, char32_t *utf32_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf8_to_utf32_with_errors( - const detail::input_span_of_byte_like auto &utf8_input, - std::span utf32_output) noexcept { - return convert_utf8_to_utf32_with_errors( - reinterpret_cast(utf8_input.data()), utf8_input.size(), - utf32_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/** - * Convert valid UTF-8 string into latin1 string. - * - * This function assumes that the input string is valid UTF-8 and that it can be - * represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf8_to_latin1 instead. The function may be removed from the library - * in the future. - * - * This function is not BOM-aware. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if the input was not valid UTF-8 string - */ -simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const char *input, size_t length, char *latin1_output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_latin1( - const detail::input_span_of_byte_like auto &valid_utf8_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf8_to_latin1( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), latin1_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Using native endianness, convert valid UTF-8 string into a UTF-16 string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t - */ -simdutf_warn_unused size_t convert_valid_utf8_to_utf16( - const char *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16( - const detail::input_span_of_byte_like auto &valid_utf8_input, - std::span utf16_output) noexcept { - return convert_valid_utf8_to_utf16( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-8 string into UTF-16LE string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t - */ -simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const char *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16le( - const detail::input_span_of_byte_like auto &valid_utf8_input, - std::span utf16_output) noexcept { - return convert_valid_utf8_to_utf16le( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-8 string into UTF-16BE string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t - */ -simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const char *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf16be( - const detail::input_span_of_byte_like auto &valid_utf8_input, - std::span utf16_output) noexcept { - return convert_valid_utf8_to_utf16be( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/** - * Convert valid UTF-8 string into UTF-32 string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return the number of written char32_t - */ -simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const char *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf8_to_utf32( - const detail::input_span_of_byte_like auto &valid_utf8_input, - std::span utf32_output) noexcept { - return convert_valid_utf8_to_utf32( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size(), utf32_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 -/** - * Return the number of bytes that this Latin1 string would require in UTF-8 - * format. - * - * @param input the Latin1 string to convert - * @param length the length of the string bytes - * @return the number of bytes required to encode the Latin1 string as UTF-8 - */ -simdutf_warn_unused size_t utf8_length_from_latin1(const char *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf8_length_from_latin1( - const detail::input_span_of_byte_like auto &latin1_input) noexcept { - return utf8_length_from_latin1( - reinterpret_cast(latin1_input.data()), latin1_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Compute the number of bytes that this UTF-8 string would require in Latin1 - * format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-8 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in byte - * @return the number of bytes required to encode the UTF-8 string as Latin1 - */ -simdutf_warn_unused size_t latin1_length_from_utf8(const char *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t latin1_length_from_utf8( - const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return latin1_length_from_utf8( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Compute the number of 2-byte code units that this UTF-8 string would require - * in UTF-16LE format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-8 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return the number of char16_t code units required to encode the UTF-8 string - * as UTF-16LE - */ -simdutf_warn_unused size_t utf16_length_from_utf8(const char *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf16_length_from_utf8( - const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return utf16_length_from_utf8( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/** - * Compute the number of 4-byte code units that this UTF-8 string would require - * in UTF-32 format. - * - * This function is equivalent to count_utf8 - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-8 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return the number of char32_t code units required to encode the UTF-8 string - * as UTF-32 - */ -simdutf_warn_unused size_t utf32_length_from_utf8(const char *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf8( - const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return utf32_length_from_utf8( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Using native endianness, convert possibly broken UTF-16 string into UTF-8 - * string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string - */ -simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t *input, - size_t length, - char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_utf8( - std::span utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16_to_utf8(utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/** - * Using native endianness, convert possibly broken UTF-16 string into Latin1 - * string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16 string - * or if it cannot be represented as Latin1 - */ -simdutf_warn_unused size_t convert_utf16_to_latin1( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16_to_latin1( - std::span utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16_to_latin1( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16LE string into Latin1 string. - * If the string cannot be represented as Latin1, an error - * is returned. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string or if it cannot be represented as Latin1 - */ -simdutf_warn_unused size_t convert_utf16le_to_latin1( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_latin1( - std::span utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16le_to_latin1( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16BE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16BE - * string or if it cannot be represented as Latin1 - */ -simdutf_warn_unused size_t convert_utf16be_to_latin1( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_latin1( - std::span utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16be_to_latin1( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Convert possibly broken UTF-16LE string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string - */ -simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t *input, - size_t length, - char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16le_to_utf8( - std::span utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16le_to_utf8(utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16BE string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string - */ -simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t *input, - size_t length, - char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf16be_to_utf8( - std::span utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16be_to_utf8(utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/** - * Using native endianness, convert possibly broken UTF-16 string into Latin1 - * string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ -simdutf_warn_unused result convert_utf16_to_latin1_with_errors( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16_to_latin1_with_errors( - std::span utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16_to_latin1_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16LE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ -simdutf_warn_unused result convert_utf16le_to_latin1_with_errors( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16le_to_latin1_with_errors( - std::span utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16le_to_latin1_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16BE string into Latin1 string. - * If the string cannot be represented as Latin1, an error - * is returned. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ -simdutf_warn_unused result convert_utf16be_to_latin1_with_errors( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16be_to_latin1_with_errors( - std::span utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf16be_to_latin1_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Using native endianness, convert possibly broken UTF-16 string into UTF-8 - * string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ -simdutf_warn_unused result convert_utf16_to_utf8_with_errors( - const char16_t *input, size_t length, char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16_to_utf8_with_errors( - std::span utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16_to_utf8_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16LE string into UTF-8 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ -simdutf_warn_unused result convert_utf16le_to_utf8_with_errors( - const char16_t *input, size_t length, char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16le_to_utf8_with_errors( - std::span utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16le_to_utf8_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16BE string into UTF-8 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ -simdutf_warn_unused result convert_utf16be_to_utf8_with_errors( - const char16_t *input, size_t length, char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16be_to_utf8_with_errors( - std::span utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf16be_to_utf8_with_errors( - utf16_input.data(), utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Using native endianness, convert valid UTF-16 string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-16LE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16_to_utf8( - const char16_t *input, size_t length, char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_utf8( - std::span valid_utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_valid_utf16_to_utf8( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/** - * Using native endianness, convert UTF-16 string into Latin1 string. - * - * This function assumes that the input string is valid UTF-16 and that it can - * be represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf16_to_latin1 instead. The function may be removed from the library - * in the future. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16_to_latin1( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16_to_latin1( - std::span valid_utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf16_to_latin1( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-16LE string into Latin1 string. - * - * This function assumes that the input string is valid UTF-16LE and that it can - * be represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf16le_to_latin1 instead. The function may be removed from the - * library in the future. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16le_to_latin1( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_valid_utf16le_to_latin1( - std::span valid_utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf16le_to_latin1( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-16BE string into Latin1 string. - * - * This function assumes that the input string is valid UTF-16BE and that it can - * be represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf16be_to_latin1 instead. The function may be removed from the - * library in the future. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16be_to_latin1( - const char16_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_valid_utf16be_to_latin1( - std::span valid_utf16_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf16be_to_latin1( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Convert valid UTF-16LE string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-16LE and that it can - * be represented as Latin1. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - const char16_t *input, size_t length, char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16le_to_utf8( - std::span valid_utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_valid_utf16le_to_utf8( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-16BE string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-16BE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf8_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - const char16_t *input, size_t length, char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf16be_to_utf8( - std::span valid_utf16_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_valid_utf16be_to_utf8( - valid_utf16_input.data(), valid_utf16_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/** - * Using native endianness, convert possibly broken UTF-16 string into UTF-32 - * string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string - */ -simdutf_warn_unused size_t convert_utf16_to_utf32( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf16_to_utf32(std::span utf16_input, - std::span utf32_output) noexcept { - return convert_utf16_to_utf32(utf16_input.data(), utf16_input.size(), - utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16LE string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string - */ -simdutf_warn_unused size_t convert_utf16le_to_utf32( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf16le_to_utf32(std::span utf16_input, - std::span utf32_output) noexcept { - return convert_utf16le_to_utf32(utf16_input.data(), utf16_input.size(), - utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16BE string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string - */ -simdutf_warn_unused size_t convert_utf16be_to_utf32( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf16be_to_utf32(std::span utf16_input, - std::span utf32_output) noexcept { - return convert_utf16be_to_utf32(utf16_input.data(), utf16_input.size(), - utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Using native endianness, convert possibly broken UTF-16 string into - * UTF-32 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char32_t written if - * successful. - */ -simdutf_warn_unused result convert_utf16_to_utf32_with_errors( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16_to_utf32_with_errors(std::span utf16_input, - std::span utf32_output) noexcept { - return convert_utf16_to_utf32_with_errors( - utf16_input.data(), utf16_input.size(), utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16LE string into UTF-32 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char32_t written if - * successful. - */ -simdutf_warn_unused result convert_utf16le_to_utf32_with_errors( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16le_to_utf32_with_errors( - std::span utf16_input, - std::span utf32_output) noexcept { - return convert_utf16le_to_utf32_with_errors( - utf16_input.data(), utf16_input.size(), utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-16BE string into UTF-32 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char32_t written if - * successful. - */ -simdutf_warn_unused result convert_utf16be_to_utf32_with_errors( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf16be_to_utf32_with_errors( - std::span utf16_input, - std::span utf32_output) noexcept { - return convert_utf16be_to_utf32_with_errors( - utf16_input.data(), utf16_input.size(), utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Using native endianness, convert valid UTF-16 string into UTF-32 string. - * - * This function assumes that the input string is valid UTF-16 (native - * endianness). - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16_to_utf32( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_valid_utf16_to_utf32(std::span valid_utf16_input, - std::span utf32_output) noexcept { - return convert_valid_utf16_to_utf32( - valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-16LE string into UTF-32 string. - * - * This function assumes that the input string is valid UTF-16LE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16le_to_utf32( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_valid_utf16le_to_utf32(std::span valid_utf16_input, - std::span utf32_output) noexcept { - return convert_valid_utf16le_to_utf32( - valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-16BE string into UTF-32 string. - * - * This function assumes that the input string is valid UTF-16LE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @param utf32_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf16be_to_utf32( - const char16_t *input, size_t length, char32_t *utf32_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_valid_utf16be_to_utf32(std::span valid_utf16_input, - std::span utf32_output) noexcept { - return convert_valid_utf16be_to_utf32( - valid_utf16_input.data(), valid_utf16_input.size(), utf32_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 -/** - * Compute the number of bytes that this UTF-16LE/BE string would require in - * Latin1 format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param length the length of the string in 2-byte code units (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as Latin1 - */ -simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept; - -/** - * Using native endianness; Compute the number of bytes that this UTF-16 - * string would require in UTF-8 format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as UTF-8 - */ -simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -utf8_length_from_utf16(std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 -/** - * Compute the number of bytes that this UTF-16LE string would require in UTF-8 - * format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as UTF-8 - */ -simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -utf8_length_from_utf16le(std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16le(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Compute the number of bytes that this UTF-16BE string would require in UTF-8 - * format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @return the number of bytes required to encode the UTF-16BE string as UTF-8 - */ -simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -utf8_length_from_utf16be(std::span valid_utf16_input) noexcept { - return utf8_length_from_utf16be(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/** - * Convert possibly broken UTF-32 string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-32 string - */ -simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t *input, - size_t length, - char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_utf8( - std::span utf32_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf32_to_utf8(utf32_input.data(), utf32_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ -simdutf_warn_unused result convert_utf32_to_utf8_with_errors( - const char32_t *input, size_t length, char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf32_to_utf8_with_errors( - std::span utf32_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_utf32_to_utf8_with_errors( - utf32_input.data(), utf32_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-32 string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf8_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - const char32_t *input, size_t length, char *utf8_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_utf8( - std::span valid_utf32_input, - detail::output_span_of_byte_like auto &&utf8_output) noexcept { - return convert_valid_utf32_to_utf8( - valid_utf32_input.data(), valid_utf32_input.size(), - reinterpret_cast(utf8_output.data())); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/** - * Using native endianness, convert possibly broken UTF-32 string into a UTF-16 - * string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-32 string - */ -simdutf_warn_unused size_t convert_utf32_to_utf16( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf32_to_utf16(std::span utf32_input, - std::span utf16_output) noexcept { - return convert_utf32_to_utf16(utf32_input.data(), utf32_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-32 string into UTF-16LE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-32 string - */ -simdutf_warn_unused size_t convert_utf32_to_utf16le( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf32_to_utf16le(std::span utf32_input, - std::span utf16_output) noexcept { - return convert_utf32_to_utf16le(utf32_input.data(), utf32_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 -/** - * Convert possibly broken UTF-32 string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-32 string - * or if it cannot be represented as Latin1 - */ -simdutf_warn_unused size_t convert_utf32_to_latin1( - const char32_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_utf32_to_latin1( - std::span utf32_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf32_to_latin1( - utf32_input.data(), utf32_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-32 string into Latin1 string and stop on error. - * If the string cannot be represented as Latin1, an error is returned. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param latin1_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ -simdutf_warn_unused result convert_utf32_to_latin1_with_errors( - const char32_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf32_to_latin1_with_errors( - std::span utf32_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_utf32_to_latin1_with_errors( - utf32_input.data(), utf32_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-32 string into Latin1 string. - * - * This function assumes that the input string is valid UTF-32 and that it can - * be represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf32_to_latin1 instead. The function may be removed from the library - * in the future. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param latin1_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf32_to_latin1( - const char32_t *input, size_t length, char *latin1_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t convert_valid_utf32_to_latin1( - std::span valid_utf32_input, - detail::output_span_of_byte_like auto &&latin1_output) noexcept { - return convert_valid_utf32_to_latin1( - valid_utf32_input.data(), valid_utf32_input.size(), - reinterpret_cast(latin1_output.data())); -} - #endif // SIMDUTF_SPAN - -/** - * Compute the number of bytes that this UTF-32 string would require in Latin1 - * format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-32 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param length the length of the string in 4-byte code units (char32_t) - * @return the number of bytes required to encode the UTF-32 string as Latin1 - */ -simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) noexcept; - -/** - * Compute the number of bytes that this Latin1 string would require in UTF-32 - * format. - * - * @param length the length of the string in Latin1 code units (char) - * @return the length of the string in 4-byte code units (char32_t) required to - * encode the Latin1 string as UTF-32 - */ -simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) noexcept; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/** - * Convert possibly broken UTF-32 string into UTF-16BE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-32 string - */ -simdutf_warn_unused size_t convert_utf32_to_utf16be( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_utf32_to_utf16be(std::span utf32_input, - std::span utf16_output) noexcept { - return convert_utf32_to_utf16be(utf32_input.data(), utf32_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Using native endianness, convert possibly broken UTF-32 string into UTF-16 - * string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char16_t written if - * successful. - */ -simdutf_warn_unused result convert_utf32_to_utf16_with_errors( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf32_to_utf16_with_errors(std::span utf32_input, - std::span utf16_output) noexcept { - return convert_utf32_to_utf16_with_errors( - utf32_input.data(), utf32_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-32 string into UTF-16LE string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char16_t written if - * successful. - */ -simdutf_warn_unused result convert_utf32_to_utf16le_with_errors( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf32_to_utf16le_with_errors( - std::span utf32_input, - std::span utf16_output) noexcept { - return convert_utf32_to_utf16le_with_errors( - utf32_input.data(), utf32_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert possibly broken UTF-32 string into UTF-16BE string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char16_t written if - * successful. - */ -simdutf_warn_unused result convert_utf32_to_utf16be_with_errors( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result -convert_utf32_to_utf16be_with_errors( - std::span utf32_input, - std::span utf16_output) noexcept { - return convert_utf32_to_utf16be_with_errors( - utf32_input.data(), utf32_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Using native endianness, convert valid UTF-32 string into a UTF-16 string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf32_to_utf16( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_valid_utf32_to_utf16(std::span valid_utf32_input, - std::span utf16_output) noexcept { - return convert_valid_utf32_to_utf16( - valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-32 string into UTF-16LE string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf32_to_utf16le( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_valid_utf32_to_utf16le(std::span valid_utf32_input, - std::span utf16_output) noexcept { - return convert_valid_utf32_to_utf16le( - valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert valid UTF-32 string into UTF-16BE string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @param utf16_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ -simdutf_warn_unused size_t convert_valid_utf32_to_utf16be( - const char32_t *input, size_t length, char16_t *utf16_buffer) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -convert_valid_utf32_to_utf16be(std::span valid_utf32_input, - std::span utf16_output) noexcept { - return convert_valid_utf32_to_utf16be( - valid_utf32_input.data(), valid_utf32_input.size(), utf16_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -/** - * Change the endianness of the input. Can be used to go from UTF-16LE to - * UTF-16BE or from UTF-16BE to UTF-16LE. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to process - * @param length the length of the string in 2-byte code units (char16_t) - * @param output the pointer to a buffer that can hold the conversion - * result - */ -void change_endianness_utf16(const char16_t *input, size_t length, - char16_t *output) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline void -change_endianness_utf16(std::span utf16_input, - std::span utf16_output) noexcept { - return change_endianness_utf16(utf16_input.data(), utf16_input.size(), - utf16_output.data()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 -/** - * Compute the number of bytes that this UTF-32 string would require in UTF-8 - * format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-32 strings but in such cases the result is implementation defined. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @return the number of bytes required to encode the UTF-32 string as UTF-8 - */ -simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -utf8_length_from_utf32(std::span valid_utf32_input) noexcept { - return utf8_length_from_utf32(valid_utf32_input.data(), - valid_utf32_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 -/** - * Compute the number of two-byte code units that this UTF-32 string would - * require in UTF-16 format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-32 strings but in such cases the result is implementation defined. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units (char32_t) - * @return the number of bytes required to encode the UTF-32 string as UTF-16 - */ -simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -utf16_length_from_utf32(std::span valid_utf32_input) noexcept { - return utf16_length_from_utf32(valid_utf32_input.data(), - valid_utf32_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Using native endianness; Compute the number of bytes that this UTF-16 - * string would require in UTF-32 format. - * - * This function is equivalent to count_utf16. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as UTF-32 - */ -simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -utf32_length_from_utf16(std::span valid_utf16_input) noexcept { - return utf32_length_from_utf16(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Compute the number of bytes that this UTF-16LE string would require in UTF-32 - * format. - * - * This function is equivalent to count_utf16le. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as UTF-32 - */ -simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16le( - std::span valid_utf16_input) noexcept { - return utf32_length_from_utf16le(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Compute the number of bytes that this UTF-16BE string would require in UTF-32 - * format. - * - * This function is equivalent to count_utf16be. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units (char16_t) - * @return the number of bytes required to encode the UTF-16BE string as UTF-32 - */ -simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t utf32_length_from_utf16be( - std::span valid_utf16_input) noexcept { - return utf32_length_from_utf16be(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 -/** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-16 (native - * endianness). It is acceptable to pass invalid UTF-16 strings but in such - * cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to process - * @param length the length of the string in 2-byte code units (char16_t) - * @return number of code points - */ -simdutf_warn_unused size_t count_utf16(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -count_utf16(std::span valid_utf16_input) noexcept { - return count_utf16(valid_utf16_input.data(), valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-16LE. - * It is acceptable to pass invalid UTF-16 strings but in such cases - * the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to process - * @param length the length of the string in 2-byte code units (char16_t) - * @return number of code points - */ -simdutf_warn_unused size_t count_utf16le(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -count_utf16le(std::span valid_utf16_input) noexcept { - return count_utf16le(valid_utf16_input.data(), valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-16BE. - * It is acceptable to pass invalid UTF-16 strings but in such cases - * the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to process - * @param length the length of the string in 2-byte code units (char16_t) - * @return number of code points - */ -simdutf_warn_unused size_t count_utf16be(const char16_t *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -count_utf16be(std::span valid_utf16_input) noexcept { - return count_utf16be(valid_utf16_input.data(), valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 -/** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-8. - * It is acceptable to pass invalid UTF-8 strings but in such cases - * the result is implementation defined. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return number of code points - */ -simdutf_warn_unused size_t count_utf8(const char *input, - size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t count_utf8( - const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return count_utf8(reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Given a valid UTF-8 string having a possibly truncated last character, - * this function checks the end of string. If the last character is truncated - * (or partial), then it returns a shorter length (shorter by 1 to 3 bytes) so - * that the short UTF-8 strings only contain complete characters. If there is no - * truncated character, the original length is returned. - * - * This function assumes that the input string is valid UTF-8, but possibly - * truncated. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return the length of the string in bytes, possibly shorter by 1 to 3 bytes - */ -simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length); - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t trim_partial_utf8( - const detail::input_span_of_byte_like auto &valid_utf8_input) noexcept { - return trim_partial_utf8( - reinterpret_cast(valid_utf8_input.data()), - valid_utf8_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_UTF16 -/** - * Given a valid UTF-16BE string having a possibly truncated last character, - * this function checks the end of string. If the last character is truncated - * (or partial), then it returns a shorter length (shorter by 1 unit) so that - * the short UTF-16BE strings only contain complete characters. If there is no - * truncated character, the original length is returned. - * - * This function assumes that the input string is valid UTF-16BE, but possibly - * truncated. - * - * @param input the UTF-16BE string to process - * @param length the length of the string in bytes - * @return the length of the string in bytes, possibly shorter by 1 unit - */ -simdutf_warn_unused size_t trim_partial_utf16be(const char16_t *input, - size_t length); - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -trim_partial_utf16be(std::span valid_utf16_input) noexcept { - return trim_partial_utf16be(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Given a valid UTF-16LE string having a possibly truncated last character, - * this function checks the end of string. If the last character is truncated - * (or partial), then it returns a shorter length (shorter by 1 unit) so that - * the short UTF-16LE strings only contain complete characters. If there is no - * truncated character, the original length is returned. - * - * This function assumes that the input string is valid UTF-16LE, but possibly - * truncated. - * - * @param input the UTF-16LE string to process - * @param length the length of the string in bytes - * @return the length of the string in unit, possibly shorter by 1 unit - */ -simdutf_warn_unused size_t trim_partial_utf16le(const char16_t *input, - size_t length); - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -trim_partial_utf16le(std::span valid_utf16_input) noexcept { - return trim_partial_utf16le(valid_utf16_input.data(), - valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Given a valid UTF-16 string having a possibly truncated last character, - * this function checks the end of string. If the last character is truncated - * (or partial), then it returns a shorter length (shorter by 1 unit) so that - * the short UTF-16 strings only contain complete characters. If there is no - * truncated character, the original length is returned. - * - * This function assumes that the input string is valid UTF-16, but possibly - * truncated. We use the native endianness. - * - * @param input the UTF-16 string to process - * @param length the length of the string in bytes - * @return the length of the string in unit, possibly shorter by 1 unit - */ -simdutf_warn_unused size_t trim_partial_utf16(const char16_t *input, - size_t length); - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -trim_partial_utf16(std::span valid_utf16_input) noexcept { - return trim_partial_utf16(valid_utf16_input.data(), valid_utf16_input.size()); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_BASE64 - #ifndef SIMDUTF_NEED_TRAILING_ZEROES - #define SIMDUTF_NEED_TRAILING_ZEROES 1 - #endif -// base64_options are used to specify the base64 encoding options. -// ASCII spaces are ' ', '\t', '\n', '\r', '\f' -// garbage characters are characters that are not part of the base64 alphabet -// nor ASCII spaces. -enum base64_options : uint64_t { - base64_default = 0, /* standard base64 format (with padding) */ - base64_url = 1, /* base64url format (no padding) */ - base64_reverse_padding = 2, /* modifier for base64_default and base64_url */ - base64_default_no_padding = - base64_default | - base64_reverse_padding, /* standard base64 format without padding */ - base64_url_with_padding = - base64_url | base64_reverse_padding, /* base64url with padding */ - base64_default_accept_garbage = - 4, /* standard base64 format accepting garbage characters */ - base64_url_accept_garbage = - 5, /* base64url format accepting garbage characters */ -}; - -// last_chunk_handling_options are used to specify the handling of the last -// chunk in base64 decoding. -// https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 -enum last_chunk_handling_options : uint64_t { - loose = 0, /* standard base64 format, decode partial final chunk */ - strict = 1, /* error when the last chunk is partial, 2 or 3 chars, and - unpadded, or non-zero bit padding */ - stop_before_partial = - 2, /* if the last chunk is partial (2 or 3 chars), ignore it (no error) */ -}; - -/** - * Provide the maximal binary length in bytes given the base64 input. - * In general, if the input contains ASCII spaces, the result will be less than - * the maximum length. - * - * @param input the base64 input to process - * @param length the length of the base64 input in bytes - * @return maximum number of binary bytes - */ -simdutf_warn_unused size_t -maximal_binary_length_from_base64(const char *input, size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -maximal_binary_length_from_base64( - const detail::input_span_of_byte_like auto &input) noexcept { - return maximal_binary_length_from_base64( - reinterpret_cast(input.data()), input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Provide the maximal binary length in bytes given the base64 input. - * In general, if the input contains ASCII spaces, the result will be less than - * the maximum length. - * - * @param input the base64 input to process, in ASCII stored as 16-bit - * units - * @param length the length of the base64 input in 16-bit units - * @return maximal number of binary bytes - */ -simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -maximal_binary_length_from_base64(std::span input) noexcept { - return maximal_binary_length_from_base64(input.data(), input.size()); -} - #endif // SIMDUTF_SPAN - -/** - * Convert a base64 input to a binary output. - * - * This function follows the WHATWG forgiving-base64 format, which means that it - * will ignore any ASCII spaces in the input. You may provide a padded input - * (with one or two equal signs at the end) or an unpadded input (without any - * equal signs at the end). - * - * See https://infra.spec.whatwg.org/#forgiving-base64-decode - * - * This function will fail in case of invalid input. When last_chunk_options = - * loose, there are two possible reasons for failure: the input contains a - * number of base64 characters that when divided by 4, leaves a single remainder - * character (BASE64_INPUT_REMAINDER), or the input contains a character that is - * not a valid base64 character (INVALID_BASE64_CHARACTER). - * - * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the - * input where the invalid character was found. When the error is - * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded. - * - * The default option (simdutf::base64_default) expects the characters `+` and - * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the - * characters `-` and `_` as part of its alphabet. - * - * The padding (`=`) is validated if present. There may be at most two padding - * characters at the end of the input. If there are any padding characters, the - * total number of characters (excluding spaces but including padding - * characters) must be divisible by four. - * - * You should call this function with a buffer that is at least - * maximal_binary_length_from_base64(input, length) bytes long. If you fail to - * provide that much space, the function may cause a buffer overflow. - * - * Advanced users may want to taylor how the last chunk is handled. By default, - * we use a loose (forgiving) approach but we also support a strict approach - * as well as a stop_before_partial approach, as per the following proposal: - * - * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 - * - * @param input the base64 string to process - * @param length the length of the string in bytes - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least maximal_binary_length_from_base64(input, length) - * bytes long). - * @param options the base64 options to use, usually base64_default or - * base64_url, and base64_default by default. - * @param last_chunk_options the last chunk handling options, - * last_chunk_handling_options::loose by default - * but can also be last_chunk_handling_options::strict or - * last_chunk_handling_options::stop_before_partial. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in bytes) if any, or the number of bytes written if successful. - */ -simdutf_warn_unused result base64_to_binary( - const char *input, size_t length, char *output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = loose) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result base64_to_binary( - const detail::input_span_of_byte_like auto &input, - detail::output_span_of_byte_like auto &&binary_output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = loose) noexcept { - return base64_to_binary(reinterpret_cast(input.data()), - input.size(), - reinterpret_cast(binary_output.data()), - options, last_chunk_options); -} - #endif // SIMDUTF_SPAN - -/** - * Provide the base64 length in bytes given the length of a binary input. - * - * @param length the length of the input in bytes - * @return number of base64 bytes - */ -simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options = base64_default) noexcept; - -/** - * Convert a binary input to a base64 output. - * - * The default option (simdutf::base64_default) uses the characters `+` and `/` - * as part of its alphabet. Further, it adds padding (`=`) at the end of the - * output to ensure that the output length is a multiple of four. - * - * The URL option (simdutf::base64_url) uses the characters `-` and `_` as part - * of its alphabet. No padding is added at the end of the output. - * - * This function always succeeds. - * - * @param input the binary to process - * @param length the length of the input in bytes - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least base64_length_from_binary(length) bytes long) - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @return number of written bytes, will be equal to - * base64_length_from_binary(length, options) - */ -size_t binary_to_base64(const char *input, size_t length, char *output, - base64_options options = base64_default) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -binary_to_base64(const detail::input_span_of_byte_like auto &input, - detail::output_span_of_byte_like auto &&binary_output, - base64_options options = base64_default) noexcept { - return binary_to_base64( - reinterpret_cast(input.data()), input.size(), - reinterpret_cast(binary_output.data()), options); -} - #endif // SIMDUTF_SPAN - - #if SIMDUTF_ATOMIC_REF -/** - * Convert a binary input to a base64 output, using atomic accesses. - * This function comes with a potentially significant performance - * penalty, but it may be useful in some cases where the input and - * output buffers are shared between threads, to avoid undefined - * behavior in case of data races. - * - * The function is for advanced users. Its main use case is when - * to silence sanitizer warnings. We have no documented use case - * where this function is actually necessary in terms of practical correctness. - * - * This function is only available when simdutf is compiled with - * C++20 support and __cpp_lib_atomic_ref >= 201806L. You may check - * the availability of this function by checking the macro - * SIMDUTF_ATOMIC_REF. - * - * The default option (simdutf::base64_default) uses the characters `+` and `/` - * as part of its alphabet. Further, it adds padding (`=`) at the end of the - * output to ensure that the output length is a multiple of four. - * - * The URL option (simdutf::base64_url) uses the characters `-` and `_` as part - * of its alphabet. No padding is added at the end of the output. - * - * This function always succeeds. - * - * @brief atomic_binary_to_base64 - * @param input the binary to process - * @param length the length of the input in bytes - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least base64_length_from_binary(length) bytes long) - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @return number of written bytes, will be equal to - * base64_length_from_binary(length, options) - */ -size_t -atomic_binary_to_base64(const char *input, size_t length, char *output, - base64_options options = base64_default) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused size_t -atomic_binary_to_base64(const detail::input_span_of_byte_like auto &input, - detail::output_span_of_byte_like auto &&binary_output, - base64_options options = base64_default) noexcept { - return atomic_binary_to_base64( - reinterpret_cast(input.data()), input.size(), - reinterpret_cast(binary_output.data()), options); -} - #endif // SIMDUTF_SPAN - #endif // SIMDUTF_ATOMIC_REF - -/** - * Convert a base64 input to a binary output. - * - * This function follows the WHATWG forgiving-base64 format, which means that it - * will ignore any ASCII spaces in the input. You may provide a padded input - * (with one or two equal signs at the end) or an unpadded input (without any - * equal signs at the end). - * - * See https://infra.spec.whatwg.org/#forgiving-base64-decode - * - * This function will fail in case of invalid input. When last_chunk_options = - * loose, there are two possible reasons for failure: the input contains a - * number of base64 characters that when divided by 4, leaves a single remainder - * character (BASE64_INPUT_REMAINDER), or the input contains a character that is - * not a valid base64 character (INVALID_BASE64_CHARACTER). - * - * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the - * input where the invalid character was found. When the error is - * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded. - * - * The default option (simdutf::base64_default) expects the characters `+` and - * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the - * characters `-` and `_` as part of its alphabet. - * - * The padding (`=`) is validated if present. There may be at most two padding - * characters at the end of the input. If there are any padding characters, the - * total number of characters (excluding spaces but including padding - * characters) must be divisible by four. - * - * You should call this function with a buffer that is at least - * maximal_binary_length_from_base64(input, length) bytes long. If you fail - * to provide that much space, the function may cause a buffer overflow. - * - * Advanced users may want to taylor how the last chunk is handled. By default, - * we use a loose (forgiving) approach but we also support a strict approach - * as well as a stop_before_partial approach, as per the following proposal: - * - * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 - * - * @param input the base64 string to process, in ASCII stored as 16-bit - * units - * @param length the length of the string in 16-bit units - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least maximal_binary_length_from_base64(input, length) - * bytes long). - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @param last_chunk_options the last chunk handling options, - * last_chunk_handling_options::loose by default - * but can also be last_chunk_handling_options::strict or - * last_chunk_handling_options::stop_before_partial. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and position of the - * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number - * of bytes written if successful. - */ -simdutf_warn_unused result -base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result base64_to_binary( - std::span input, - detail::output_span_of_byte_like auto &&binary_output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = loose) noexcept { - return base64_to_binary(input.data(), input.size(), - reinterpret_cast(binary_output.data()), - options, last_chunk_options); -} - #endif // SIMDUTF_SPAN - -/** - * Convert a base64 input to a binary output. - * - * This function follows the WHATWG forgiving-base64 format, which means that it - * will ignore any ASCII spaces in the input. You may provide a padded input - * (with one or two equal signs at the end) or an unpadded input (without any - * equal signs at the end). - * - * See https://infra.spec.whatwg.org/#forgiving-base64-decode - * - * This function will fail in case of invalid input. When last_chunk_options = - * loose, there are three possible reasons for failure: the input contains a - * number of base64 characters that when divided by 4, leaves a single remainder - * character (BASE64_INPUT_REMAINDER), the input contains a character that is - * not a valid base64 character (INVALID_BASE64_CHARACTER), or the output buffer - * is too small (OUTPUT_BUFFER_TOO_SMALL). - * - * When OUTPUT_BUFFER_TOO_SMALL, we return both the number of bytes written - * and the number of units processed, see description of the parameters and - * returned value. - * - * When the error is INVALID_BASE64_CHARACTER, r.count contains the index in the - * input where the invalid character was found. When the error is - * BASE64_INPUT_REMAINDER, then r.count contains the number of bytes decoded. - * - * The default option (simdutf::base64_default) expects the characters `+` and - * `/` as part of its alphabet. The URL option (simdutf::base64_url) expects the - * characters `-` and `_` as part of its alphabet. - * - * The padding (`=`) is validated if present. There may be at most two padding - * characters at the end of the input. If there are any padding characters, the - * total number of characters (excluding spaces but including padding - * characters) must be divisible by four. - * - * The INVALID_BASE64_CHARACTER cases are considered fatal and you are expected - * to discard the output. - * - * Advanced users may want to taylor how the last chunk is handled. By default, - * we use a loose (forgiving) approach but we also support a strict approach - * as well as a stop_before_partial approach, as per the following proposal: - * - * https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64 - * - * @param input the base64 string to process, in ASCII stored as 8-bit - * or 16-bit units - * @param length the length of the string in 8-bit or 16-bit units. - * @param output the pointer to a buffer that can hold the conversion - * result. - * @param outlen the number of bytes that can be written in the output - * buffer. Upon return, it is modified to reflect how many bytes were written. - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @param last_chunk_options the last chunk handling options, - * last_chunk_handling_options::loose by default - * but can also be last_chunk_handling_options::strict or - * last_chunk_handling_options::stop_before_partial. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and position of the - * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the number - * of units processed if successful. - */ -simdutf_warn_unused result -base64_to_binary_safe(const char *input, size_t length, char *output, - size_t &outlen, base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe( - const detail::input_span_of_byte_like auto &input, - detail::output_span_of_byte_like auto &&binary_output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = loose) noexcept { - // we can't write the outlen to the provided output span, the user will have - // to pick it up from the returned value instead (assuming success). we still - // get the benefit of providing info of how long the output buffer is. - size_t outlen = binary_output.size(); - return base64_to_binary_safe(reinterpret_cast(input.data()), - input.size(), - reinterpret_cast(binary_output.data()), - outlen, options, last_chunk_options); -} - #endif // SIMDUTF_SPAN - -simdutf_warn_unused result -base64_to_binary_safe(const char16_t *input, size_t length, char *output, - size_t &outlen, base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) noexcept; - #if SIMDUTF_SPAN -simdutf_really_inline simdutf_warn_unused result base64_to_binary_safe( - std::span input, - detail::output_span_of_byte_like auto &&binary_output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = loose) noexcept { - // we can't write the outlen to the provided output span, the user will have - // to pick it up from the returned value instead (assuming success). we still - // get the benefit of providing info of how long the output buffer is. - size_t outlen = binary_output.size(); - return base64_to_binary_safe(input.data(), input.size(), - reinterpret_cast(binary_output.data()), - outlen, options, last_chunk_options); -} - #endif // SIMDUTF_SPAN -#endif // SIMDUTF_FEATURE_BASE64 - -/** - * An implementation of simdutf for a particular CPU architecture. - * - * Also used to maintain the currently active implementation. The active - * implementation is automatically initialized on first use to the most advanced - * implementation supported by the host. - */ -class implementation { -public: - /** - * The name of this implementation. - * - * const implementation *impl = simdutf::active_implementation; - * cout << "simdutf is optimized for " << impl->name() << "(" << - * impl->description() << ")" << endl; - * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" - */ - virtual std::string name() const { return std::string(_name); } - - /** - * The description of this implementation. - * - * const implementation *impl = simdutf::active_implementation; - * cout << "simdutf is optimized for " << impl->name() << "(" << - * impl->description() << ")" << endl; - * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" - */ - virtual std::string description() const { return std::string(_description); } - - /** - * The instruction sets this implementation is compiled against - * and the current CPU match. This function may poll the current CPU/system - * and should therefore not be called too often if performance is a concern. - * - * - * @return true if the implementation can be safely used on the current system - * (determined at runtime) - */ - bool supported_by_runtime_system() const; - -#if SIMDUTF_FEATURE_DETECT_ENCODING - /** - * This function will try to detect the encoding - * @param input the string to identify - * @param length the length of the string in bytes. - * @return the encoding type detected - */ - virtual encoding_type autodetect_encoding(const char *input, - size_t length) const noexcept; - - /** - * This function will try to detect the possible encodings in one pass - * @param input the string to identify - * @param length the length of the string in bytes. - * @return the encoding type detected - */ - virtual int detect_encodings(const char *input, - size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_DETECT_ENCODING - - /** - * @private For internal implementation use - * - * The instruction sets this implementation is compiled against. - * - * @return a mask of all required `internal::instruction_set::` values - */ - virtual uint32_t required_instruction_sets() const { - return _required_instruction_sets; - } - -#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - /** - * Validate the UTF-8 string. - * - * Overridden by each implementation. - * - * @param buf the UTF-8 string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid UTF-8. - */ - simdutf_warn_unused virtual bool validate_utf8(const char *buf, - size_t len) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF8 - /** - * Validate the UTF-8 string and stop on errors. - * - * Overridden by each implementation. - * - * @param buf the UTF-8 string to validate. - * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated - * if successful. - */ - simdutf_warn_unused virtual result - validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_ASCII - /** - * Validate the ASCII string. - * - * Overridden by each implementation. - * - * @param buf the ASCII string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid ASCII. - */ - simdutf_warn_unused virtual bool - validate_ascii(const char *buf, size_t len) const noexcept = 0; - - /** - * Validate the ASCII string and stop on error. - * - * Overridden by each implementation. - * - * @param buf the ASCII string to validate. - * @param len the length of the string in bytes. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated - * if successful. - */ - simdutf_warn_unused virtual result - validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0; -#endif // SIMDUTF_FEATURE_ASCII - -#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - /** - * Validate the UTF-16LE string.This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf16le_with_errors. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16LE string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return true if and only if the string is valid UTF-16LE. - */ - simdutf_warn_unused virtual bool - validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF16 - /** - * Validate the UTF-16BE string. This function may be best when you expect - * the input to be almost always valid. Otherwise, consider using - * validate_utf16be_with_errors. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16BE string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return true if and only if the string is valid UTF-16BE. - */ - simdutf_warn_unused virtual bool - validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0; - - /** - * Validate the UTF-16LE string and stop on error. It might be faster than - * validate_utf16le when an error is expected to occur early. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16LE string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated - * if successful. - */ - simdutf_warn_unused virtual result - validate_utf16le_with_errors(const char16_t *buf, - size_t len) const noexcept = 0; - - /** - * Validate the UTF-16BE string and stop on error. It might be faster than - * validate_utf16be when an error is expected to occur early. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-16BE string to validate. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated - * if successful. - */ - simdutf_warn_unused virtual result - validate_utf16be_with_errors(const char16_t *buf, - size_t len) const noexcept = 0; - /** - * Copies the UTF-16LE string while replacing mismatched surrogates with the - * Unicode replacement character U+FFFD. We allow the input and output to be - * the same buffer so that the correction is done in-place. - * - * Overridden by each implementation. - * - * @param input the UTF-16LE string to correct. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @param output the output buffer. - */ - virtual void to_well_formed_utf16le(const char16_t *input, size_t len, - char16_t *output) const noexcept = 0; - /** - * Copies the UTF-16BE string while replacing mismatched surrogates with the - * Unicode replacement character U+FFFD. We allow the input and output to be - * the same buffer so that the correction is done in-place. - * - * Overridden by each implementation. - * - * @param input the UTF-16BE string to correct. - * @param len the length of the string in number of 2-byte code units - * (char16_t). - * @param output the output buffer. - */ - virtual void to_well_formed_utf16be(const char16_t *input, size_t len, - char16_t *output) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - /** - * Validate the UTF-32 string. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-32 string to validate. - * @param len the length of the string in number of 4-byte code units - * (char32_t). - * @return true if and only if the string is valid UTF-32. - */ - simdutf_warn_unused virtual bool - validate_utf32(const char32_t *buf, size_t len) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING - -#if SIMDUTF_FEATURE_UTF32 - /** - * Validate the UTF-32 string and stop on error. - * - * Overridden by each implementation. - * - * This function is not BOM-aware. - * - * @param buf the UTF-32 string to validate. - * @param len the length of the string in number of 4-byte code units - * (char32_t). - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated - * if successful. - */ - simdutf_warn_unused virtual result - validate_utf32_with_errors(const char32_t *buf, - size_t len) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - /** - * Convert Latin1 string into UTF8 string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf8_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_latin1_to_utf8(const char *input, size_t length, - char *utf8_output) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - /** - * Convert possibly Latin1 string into UTF-16LE string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_latin1_to_utf16le(const char *input, size_t length, - char16_t *utf16_output) const noexcept = 0; - - /** - * Convert Latin1 string into UTF-16BE string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_latin1_to_utf16be(const char *input, size_t length, - char16_t *utf16_output) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - /** - * Convert Latin1 string into UTF-32 string. - * - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the Latin1 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return the number of written char32_t; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_latin1_to_utf32(const char *input, size_t length, - char32_t *utf32_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - /** - * Convert possibly broken UTF-8 string into latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if the input was not valid UTF-8 - * string or if it cannot be represented as Latin1 - */ - simdutf_warn_unused virtual size_t - convert_utf8_to_latin1(const char *input, size_t length, - char *latin1_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into latin1 string with errors. - * If the string cannot be represented as Latin1, an error - * code is returned. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated - * if successful. - */ - simdutf_warn_unused virtual result - convert_utf8_to_latin1_with_errors(const char *input, size_t length, - char *latin1_output) const noexcept = 0; - - /** - * Convert valid UTF-8 string into latin1 string. - * - * This function assumes that the input string is valid UTF-8 and that it can - * be represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf8_to_latin1 instead. - * - * This function is not BOM-aware. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param latin1_output the pointer to buffer that can hold conversion result - * @return the number of written char; 0 if the input was not valid UTF-8 - * string - */ - simdutf_warn_unused virtual size_t - convert_valid_utf8_to_latin1(const char *input, size_t length, - char *latin1_output) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - /** - * Convert possibly broken UTF-8 string into UTF-16LE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 - * string - */ - simdutf_warn_unused virtual size_t - convert_utf8_to_utf16le(const char *input, size_t length, - char16_t *utf16_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-16BE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 - * string - */ - simdutf_warn_unused virtual size_t - convert_utf8_to_utf16be(const char *input, size_t length, - char16_t *utf16_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-16LE string and stop on - * error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated - * if successful. - */ - simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors( - const char *input, size_t length, - char16_t *utf16_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-16BE string and stop on - * error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of code units validated - * if successful. - */ - simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors( - const char *input, size_t length, - char16_t *utf16_output) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - /** - * Convert possibly broken UTF-8 string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t; 0 if the input was not valid UTF-8 - * string - */ - simdutf_warn_unused virtual size_t - convert_utf8_to_utf32(const char *input, size_t length, - char32_t *utf32_output) const noexcept = 0; - - /** - * Convert possibly broken UTF-8 string into UTF-32 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char32_t written if - * successful. - */ - simdutf_warn_unused virtual result - convert_utf8_to_utf32_with_errors(const char *input, size_t length, - char32_t *utf32_output) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - /** - * Convert valid UTF-8 string into UTF-16LE string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t - */ - simdutf_warn_unused virtual size_t - convert_valid_utf8_to_utf16le(const char *input, size_t length, - char16_t *utf16_buffer) const noexcept = 0; - - /** - * Convert valid UTF-8 string into UTF-16BE string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char16_t - */ - simdutf_warn_unused virtual size_t - convert_valid_utf8_to_utf16be(const char *input, size_t length, - char16_t *utf16_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - /** - * Convert valid UTF-8 string into UTF-32 string. - * - * This function assumes that the input string is valid UTF-8. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in bytes - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return the number of written char32_t - */ - simdutf_warn_unused virtual size_t - convert_valid_utf8_to_utf32(const char *input, size_t length, - char32_t *utf32_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - /** - * Compute the number of 2-byte code units that this UTF-8 string would - * require in UTF-16LE format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-8 strings but in such cases the result is implementation defined. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return the number of char16_t code units required to encode the UTF-8 - * string as UTF-16LE - */ - simdutf_warn_unused virtual size_t - utf16_length_from_utf8(const char *input, size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - /** - * Compute the number of 4-byte code units that this UTF-8 string would - * require in UTF-32 format. - * - * This function is equivalent to count_utf8. It is acceptable to pass invalid - * UTF-8 strings but in such cases the result is implementation defined. - * - * This function does not validate the input. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return the number of char32_t code units required to encode the UTF-8 - * string as UTF-32 - */ - simdutf_warn_unused virtual size_t - utf32_length_from_utf8(const char *input, size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - /** - * Convert possibly broken UTF-16LE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion - * result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string or if it cannot be represented as Latin1 - */ - simdutf_warn_unused virtual size_t - convert_utf16le_to_latin1(const char16_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion - * result - * @return number of written code units; 0 if input is not a valid UTF-16BE - * string or if it cannot be represented as Latin1 - */ - simdutf_warn_unused virtual size_t - convert_utf16be_to_latin1(const char16_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into Latin1 string. - * If the string cannot be represented as Latin1, an error - * is returned. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion - * result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ - simdutf_warn_unused virtual result - convert_utf16le_to_latin1_with_errors(const char16_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into Latin1 string. - * If the string cannot be represented as Latin1, an error - * is returned. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion - * result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ - simdutf_warn_unused virtual result - convert_utf16be_to_latin1_with_errors(const char16_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16LE string into Latin1 string. - * - * This function assumes that the input string is valid UTF-L16LE and that it - * can be represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf16le_to_latin1 instead. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf16le_to_latin1(const char16_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16BE string into Latin1 string. - * - * This function assumes that the input string is valid UTF16-BE and that it - * can be represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf16be_to_latin1 instead. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param latin1_buffer the pointer to buffer that can hold conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf16be_to_latin1(const char16_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - /** - * Convert possibly broken UTF-16LE string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string - */ - simdutf_warn_unused virtual size_t - convert_utf16le_to_utf8(const char16_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16BE - * string - */ - simdutf_warn_unused virtual size_t - convert_utf16be_to_utf8(const char16_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into UTF-8 string and stop on - * error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ - simdutf_warn_unused virtual result - convert_utf16le_to_utf8_with_errors(const char16_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into UTF-8 string and stop on - * error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ - simdutf_warn_unused virtual result - convert_utf16be_to_utf8_with_errors(const char16_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16LE string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-16LE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf8_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf16le_to_utf8(const char16_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16BE string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-16BE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf8_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf16be_to_utf8(const char16_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - /** - * Convert possibly broken UTF-16LE string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16LE - * string - */ - simdutf_warn_unused virtual size_t - convert_utf16le_to_utf32(const char16_t *input, size_t length, - char32_t *utf32_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into UTF-32 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-16BE - * string - */ - simdutf_warn_unused virtual size_t - convert_utf16be_to_utf32(const char16_t *input, size_t length, - char32_t *utf32_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16LE string into UTF-32 string and stop on - * error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char32_t written if - * successful. - */ - simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors( - const char16_t *input, size_t length, - char32_t *utf32_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-16BE string into UTF-32 string and stop on - * error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf32_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char32_t written if - * successful. - */ - simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors( - const char16_t *input, size_t length, - char32_t *utf32_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16LE string into UTF-32 string. - * - * This function assumes that the input string is valid UTF-16LE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf32_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf16le_to_utf32(const char16_t *input, size_t length, - char32_t *utf32_buffer) const noexcept = 0; - - /** - * Convert valid UTF-16LE string into UTF-32BE string. - * - * This function assumes that the input string is valid UTF-16BE. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param utf32_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf16be_to_utf32(const char16_t *input, size_t length, - char32_t *utf32_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - /** - * Compute the number of bytes that this UTF-16LE string would require in - * UTF-8 format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as UTF-8 - */ - simdutf_warn_unused virtual size_t - utf8_length_from_utf16le(const char16_t *input, - size_t length) const noexcept = 0; - - /** - * Compute the number of bytes that this UTF-16BE string would require in - * UTF-8 format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @return the number of bytes required to encode the UTF-16BE string as UTF-8 - */ - simdutf_warn_unused virtual size_t - utf8_length_from_utf16be(const char16_t *input, - size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - /** - * Convert possibly broken UTF-32 string into Latin1 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param latin1_buffer the pointer to buffer that can hold conversion - * result - * @return number of written code units; 0 if input is not a valid UTF-32 - * string - */ - simdutf_warn_unused virtual size_t - convert_utf32_to_latin1(const char32_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - /** - * Convert possibly broken UTF-32 string into Latin1 string and stop on error. - * If the string cannot be represented as Latin1, an error is returned. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param latin1_buffer the pointer to buffer that can hold conversion - * result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ - simdutf_warn_unused virtual result - convert_utf32_to_latin1_with_errors(const char32_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; - - /** - * Convert valid UTF-32 string into Latin1 string. - * - * This function assumes that the input string is valid UTF-32 and can be - * represented as Latin1. If you violate this assumption, the result is - * implementation defined and may include system-dependent behavior such as - * crashes. - * - * This function is for expert users only and not part of our public API. Use - * convert_utf32_to_latin1 instead. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param latin1_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf32_to_latin1(const char32_t *input, size_t length, - char *latin1_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - /** - * Convert possibly broken UTF-32 string into UTF-8 string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-32 - * string - */ - simdutf_warn_unused virtual size_t - convert_utf32_to_utf8(const char32_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-8 string and stop on error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf8_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char written if - * successful. - */ - simdutf_warn_unused virtual result - convert_utf32_to_utf8_with_errors(const char32_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; - - /** - * Convert valid UTF-32 string into UTF-8 string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf8_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf32_to_utf8(const char32_t *input, size_t length, - char *utf8_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - /** - * Return the number of bytes that this UTF-16 string would require in Latin1 - * format. - * - * - * @param input the UTF-16 string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @return the number of bytes required to encode the UTF-16 string as Latin1 - */ - simdutf_warn_unused virtual size_t - utf16_length_from_latin1(size_t length) const noexcept { - return length; - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - /** - * Convert possibly broken UTF-32 string into UTF-16LE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-32 - * string - */ - simdutf_warn_unused virtual size_t - convert_utf32_to_utf16le(const char32_t *input, size_t length, - char16_t *utf16_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-16BE string. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return number of written code units; 0 if input is not a valid UTF-32 - * string - */ - simdutf_warn_unused virtual size_t - convert_utf32_to_utf16be(const char32_t *input, size_t length, - char16_t *utf16_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-16LE string and stop on - * error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char16_t written if - * successful. - */ - simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors( - const char32_t *input, size_t length, - char16_t *utf16_buffer) const noexcept = 0; - - /** - * Convert possibly broken UTF-32 string into UTF-16BE string and stop on - * error. - * - * During the conversion also validation of the input string is done. - * This function is suitable to work with inputs from untrusted sources. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf16_buffer the pointer to buffer that can hold conversion result - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in code units) if any, or the number of char16_t written if - * successful. - */ - simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors( - const char32_t *input, size_t length, - char16_t *utf16_buffer) const noexcept = 0; - - /** - * Convert valid UTF-32 string into UTF-16LE string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf16_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf32_to_utf16le(const char32_t *input, size_t length, - char16_t *utf16_buffer) const noexcept = 0; - - /** - * Convert valid UTF-32 string into UTF-16BE string. - * - * This function assumes that the input string is valid UTF-32. - * - * This function is not BOM-aware. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @param utf16_buffer the pointer to a buffer that can hold the conversion - * result - * @return number of written code units; 0 if conversion is not possible - */ - simdutf_warn_unused virtual size_t - convert_valid_utf32_to_utf16be(const char32_t *input, size_t length, - char16_t *utf16_buffer) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - /** - * Change the endianness of the input. Can be used to go from UTF-16LE to - * UTF-16BE or from UTF-16BE to UTF-16LE. - * - * This function does not validate the input. - * - * This function is not BOM-aware. - * - * @param input the UTF-16 string to process - * @param length the length of the string in 2-byte code units - * (char16_t) - * @param output the pointer to a buffer that can hold the conversion - * result - */ - virtual void change_endianness_utf16(const char16_t *input, size_t length, - char16_t *output) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - /** - * Return the number of bytes that this Latin1 string would require in UTF-8 - * format. - * - * @param input the Latin1 string to convert - * @param length the length of the string bytes - * @return the number of bytes required to encode the Latin1 string as UTF-8 - */ - simdutf_warn_unused virtual size_t - utf8_length_from_latin1(const char *input, size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - /** - * Compute the number of bytes that this UTF-32 string would require in UTF-8 - * format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-32 strings but in such cases the result is implementation defined. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @return the number of bytes required to encode the UTF-32 string as UTF-8 - */ - simdutf_warn_unused virtual size_t - utf8_length_from_utf32(const char32_t *input, - size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - /** - * Compute the number of bytes that this UTF-32 string would require in Latin1 - * format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-32 strings but in such cases the result is implementation defined. - * - * @param length the length of the string in 4-byte code units - * (char32_t) - * @return the number of bytes required to encode the UTF-32 string as Latin1 - */ - simdutf_warn_unused virtual size_t - latin1_length_from_utf32(size_t length) const noexcept { - return length; - } -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - /** - * Compute the number of bytes that this UTF-8 string would require in Latin1 - * format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-8 strings but in such cases the result is implementation defined. - * - * @param input the UTF-8 string to convert - * @param length the length of the string in byte - * @return the number of bytes required to encode the UTF-8 string as Latin1 - */ - simdutf_warn_unused virtual size_t - latin1_length_from_utf8(const char *input, size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - /** - * Compute the number of bytes that this UTF-16LE/BE string would require in - * Latin1 format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as - * Latin1 - */ - simdutf_warn_unused virtual size_t - latin1_length_from_utf16(size_t length) const noexcept { - return length; - } -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - /** - * Compute the number of two-byte code units that this UTF-32 string would - * require in UTF-16 format. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-32 strings but in such cases the result is implementation defined. - * - * @param input the UTF-32 string to convert - * @param length the length of the string in 4-byte code units - * (char32_t) - * @return the number of bytes required to encode the UTF-32 string as UTF-16 - */ - simdutf_warn_unused virtual size_t - utf16_length_from_utf32(const char32_t *input, - size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - /** - * Return the number of bytes that this UTF-32 string would require in Latin1 - * format. - * - * @param length the length of the string in 4-byte code units - * (char32_t) - * @return the number of bytes required to encode the UTF-32 string as Latin1 - */ - simdutf_warn_unused virtual size_t - utf32_length_from_latin1(size_t length) const noexcept { - return length; - } -#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1 - -#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - /** - * Compute the number of bytes that this UTF-16LE string would require in - * UTF-32 format. - * - * This function is equivalent to count_utf16le. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @return the number of bytes required to encode the UTF-16LE string as - * UTF-32 - */ - simdutf_warn_unused virtual size_t - utf32_length_from_utf16le(const char16_t *input, - size_t length) const noexcept = 0; - - /** - * Compute the number of bytes that this UTF-16BE string would require in - * UTF-32 format. - * - * This function is equivalent to count_utf16be. - * - * This function does not validate the input. It is acceptable to pass invalid - * UTF-16 strings but in such cases the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to convert - * @param length the length of the string in 2-byte code units - * (char16_t) - * @return the number of bytes required to encode the UTF-16BE string as - * UTF-32 - */ - simdutf_warn_unused virtual size_t - utf32_length_from_utf16be(const char16_t *input, - size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32 - -#if SIMDUTF_FEATURE_UTF16 - /** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-16LE. - * It is acceptable to pass invalid UTF-16 strings but in such cases - * the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16LE string to process - * @param length the length of the string in 2-byte code units - * (char16_t) - * @return number of code points - */ - simdutf_warn_unused virtual size_t - count_utf16le(const char16_t *input, size_t length) const noexcept = 0; - - /** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-16BE. - * It is acceptable to pass invalid UTF-16 strings but in such cases - * the result is implementation defined. - * - * This function is not BOM-aware. - * - * @param input the UTF-16BE string to process - * @param length the length of the string in 2-byte code units - * (char16_t) - * @return number of code points - */ - simdutf_warn_unused virtual size_t - count_utf16be(const char16_t *input, size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF16 - -#if SIMDUTF_FEATURE_UTF8 - /** - * Count the number of code points (characters) in the string assuming that - * it is valid. - * - * This function assumes that the input string is valid UTF-8. - * It is acceptable to pass invalid UTF-8 strings but in such cases - * the result is implementation defined. - * - * @param input the UTF-8 string to process - * @param length the length of the string in bytes - * @return number of code points - */ - simdutf_warn_unused virtual size_t - count_utf8(const char *input, size_t length) const noexcept = 0; -#endif // SIMDUTF_FEATURE_UTF8 - -#if SIMDUTF_FEATURE_BASE64 - /** - * Provide the maximal binary length in bytes given the base64 input. - * In general, if the input contains ASCII spaces, the result will be less - * than the maximum length. It is acceptable to pass invalid base64 strings - * but in such cases the result is implementation defined. - * - * @param input the base64 input to process - * @param length the length of the base64 input in bytes - * @return maximal number of binary bytes - */ - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char *input, size_t length) const noexcept; - - /** - * Provide the maximal binary length in bytes given the base64 input. - * In general, if the input contains ASCII spaces, the result will be less - * than the maximum length. It is acceptable to pass invalid base64 strings - * but in such cases the result is implementation defined. - * - * @param input the base64 input to process, in ASCII stored as 16-bit - * units - * @param length the length of the base64 input in 16-bit units - * @return maximal number of binary bytes - */ - simdutf_warn_unused size_t maximal_binary_length_from_base64( - const char16_t *input, size_t length) const noexcept; - - /** - * Convert a base64 input to a binary output. - * - * This function follows the WHATWG forgiving-base64 format, which means that - * it will ignore any ASCII spaces in the input. You may provide a padded - * input (with one or two equal signs at the end) or an unpadded input - * (without any equal signs at the end). - * - * See https://infra.spec.whatwg.org/#forgiving-base64-decode - * - * This function will fail in case of invalid input. When last_chunk_options = - * loose, there are two possible reasons for failure: the input contains a - * number of base64 characters that when divided by 4, leaves a single - * remainder character (BASE64_INPUT_REMAINDER), or the input contains a - * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). - * - * You should call this function with a buffer that is at least - * maximal_binary_length_from_base64(input, length) bytes long. If you fail to - * provide that much space, the function may cause a buffer overflow. - * - * @param input the base64 string to process - * @param length the length of the string in bytes - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least maximal_binary_length_from_base64(input, length) - * bytes long). - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and either position of the error - * (in the input in bytes) if any, or the number of bytes written if - * successful. - */ - simdutf_warn_unused virtual result - base64_to_binary(const char *input, size_t length, char *output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept = 0; - - /** - * Convert a base64 input to a binary output while returning more details - * than base64_to_binary. - * - * This function follows the WHATWG forgiving-base64 format, which means that - * it will ignore any ASCII spaces in the input. You may provide a padded - * input (with one or two equal signs at the end) or an unpadded input - * (without any equal signs at the end). - * - * See https://infra.spec.whatwg.org/#forgiving-base64-decode - * - * This function will fail in case of invalid input. When last_chunk_options = - * loose, there are two possible reasons for failure: the input contains a - * number of base64 characters that when divided by 4, leaves a single - * remainder character (BASE64_INPUT_REMAINDER), or the input contains a - * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). - * - * You should call this function with a buffer that is at least - * maximal_binary_length_from_base64(input, length) bytes long. If you fail to - * provide that much space, the function may cause a buffer overflow. - * - * @param input the base64 string to process - * @param length the length of the string in bytes - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least maximal_binary_length_from_base64(input, length) - * bytes long). - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @return a full_result pair struct (of type simdutf::result containing the - * three fields error, input_count and output_count). - */ - simdutf_warn_unused virtual full_result base64_to_binary_details( - const char *input, size_t length, char *output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept = 0; - /** - * Convert a base64 input to a binary output. - * - * This function follows the WHATWG forgiving-base64 format, which means that - * it will ignore any ASCII spaces in the input. You may provide a padded - * input (with one or two equal signs at the end) or an unpadded input - * (without any equal signs at the end). - * - * See https://infra.spec.whatwg.org/#forgiving-base64-decode - * - * This function will fail in case of invalid input. When last_chunk_options = - * loose, there are two possible reasons for failure: the input contains a - * number of base64 characters that when divided by 4, leaves a single - * remainder character (BASE64_INPUT_REMAINDER), or the input contains a - * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). - * - * You should call this function with a buffer that is at least - * maximal_binary_length_from_base64(input, length) bytes long. If you - * fail to provide that much space, the function may cause a buffer overflow. - * - * @param input the base64 string to process, in ASCII stored as - * 16-bit units - * @param length the length of the string in 16-bit units - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least maximal_binary_length_from_base64(input, length) - * bytes long). - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @return a result pair struct (of type simdutf::result containing the two - * fields error and count) with an error code and position of the - * INVALID_BASE64_CHARACTER error (in the input in units) if any, or the - * number of bytes written if successful. - */ - simdutf_warn_unused virtual result - base64_to_binary(const char16_t *input, size_t length, char *output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept = 0; - - /** - * Convert a base64 input to a binary output while returning more details - * than base64_to_binary. - * - * This function follows the WHATWG forgiving-base64 format, which means that - * it will ignore any ASCII spaces in the input. You may provide a padded - * input (with one or two equal signs at the end) or an unpadded input - * (without any equal signs at the end). - * - * See https://infra.spec.whatwg.org/#forgiving-base64-decode - * - * This function will fail in case of invalid input. When last_chunk_options = - * loose, there are two possible reasons for failure: the input contains a - * number of base64 characters that when divided by 4, leaves a single - * remainder character (BASE64_INPUT_REMAINDER), or the input contains a - * character that is not a valid base64 character (INVALID_BASE64_CHARACTER). - * - * You should call this function with a buffer that is at least - * maximal_binary_length_from_base64(input, length) bytes long. If you fail to - * provide that much space, the function may cause a buffer overflow. - * - * @param input the base64 string to process - * @param length the length of the string in bytes - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least maximal_binary_length_from_base64(input, length) - * bytes long). - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @return a full_result pair struct (of type simdutf::result containing the - * three fields error, input_count and output_count). - */ - simdutf_warn_unused virtual full_result base64_to_binary_details( - const char16_t *input, size_t length, char *output, - base64_options options = base64_default, - last_chunk_handling_options last_chunk_options = - last_chunk_handling_options::loose) const noexcept = 0; - /** - * Provide the base64 length in bytes given the length of a binary input. - * - * @param length the length of the input in bytes - * @parem options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @return number of base64 bytes - */ - simdutf_warn_unused size_t base64_length_from_binary( - size_t length, base64_options options = base64_default) const noexcept; - - /** - * Convert a binary input to a base64 output. - * - * The default option (simdutf::base64_default) uses the characters `+` and - * `/` as part of its alphabet. Further, it adds padding (`=`) at the end of - * the output to ensure that the output length is a multiple of four. - * - * The URL option (simdutf::base64_url) uses the characters `-` and `_` as - * part of its alphabet. No padding is added at the end of the output. - * - * This function always succeeds. - * - * @param input the binary to process - * @param length the length of the input in bytes - * @param output the pointer to a buffer that can hold the conversion - * result (should be at least base64_length_from_binary(length) bytes long) - * @param options the base64 options to use, can be base64_default or - * base64_url, is base64_default by default. - * @return number of written bytes, will be equal to - * base64_length_from_binary(length, options) - */ - virtual size_t - binary_to_base64(const char *input, size_t length, char *output, - base64_options options = base64_default) const noexcept = 0; -#endif // SIMDUTF_FEATURE_BASE64 - -#ifdef SIMDUTF_INTERNAL_TESTS - // This method is exported only in developer mode, its purpose - // is to expose some internal test procedures from the given - // implementation and then use them through our standard test - // framework. - // - // Regular users should not use it, the tests of the public - // API are enough. - - struct TestProcedure { - // display name - std::string name; - - // procedure should return whether given test pass or not - void (*procedure)(const implementation &); - }; - - virtual std::vector internal_tests() const; -#endif - -protected: - /** @private Construct an implementation with the given name and description. - * For subclasses. */ - simdutf_really_inline implementation(const char *name, - const char *description, - uint32_t required_instruction_sets) - : _name(name), _description(description), - _required_instruction_sets(required_instruction_sets) {} - -protected: - ~implementation() = default; - -private: - /** - * The name of this implementation. - */ - const char *_name; - - /** - * The description of this implementation. - */ - const char *_description; - - /** - * Instruction sets required for this implementation. - */ - const uint32_t _required_instruction_sets; -}; - -/** @private */ -namespace internal { - -/** - * The list of available implementations compiled into simdutf. - */ -class available_implementation_list { -public: - /** Get the list of available implementations compiled into simdutf */ - simdutf_really_inline available_implementation_list() {} - /** Number of implementations */ - size_t size() const noexcept; - /** STL const begin() iterator */ - const implementation *const *begin() const noexcept; - /** STL const end() iterator */ - const implementation *const *end() const noexcept; - - /** - * Get the implementation with the given name. - * - * Case sensitive. - * - * const implementation *impl = - * simdutf::available_implementations["westmere"]; if (!impl) { exit(1); } if - * (!imp->supported_by_runtime_system()) { exit(1); } - * simdutf::active_implementation = impl; - * - * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" - * @return the implementation, or nullptr if the parse failed. - */ - const implementation *operator[](const std::string &name) const noexcept { - for (const implementation *impl : *this) { - if (impl->name() == name) { - return impl; - } - } - return nullptr; - } - - /** - * Detect the most advanced implementation supported by the current host. - * - * This is used to initialize the implementation on startup. - * - * const implementation *impl = - * simdutf::available_implementation::detect_best_supported(); - * simdutf::active_implementation = impl; - * - * @return the most advanced supported implementation for the current host, or - * an implementation that returns UNSUPPORTED_ARCHITECTURE if there is no - * supported implementation. Will never return nullptr. - */ - const implementation *detect_best_supported() const noexcept; -}; - -template class atomic_ptr { -public: - atomic_ptr(T *_ptr) : ptr{_ptr} {} - -#if defined(SIMDUTF_NO_THREADS) - operator const T *() const { return ptr; } - const T &operator*() const { return *ptr; } - const T *operator->() const { return ptr; } - - operator T *() { return ptr; } - T &operator*() { return *ptr; } - T *operator->() { return ptr; } - atomic_ptr &operator=(T *_ptr) { - ptr = _ptr; - return *this; - } - -#else - operator const T *() const { return ptr.load(); } - const T &operator*() const { return *ptr; } - const T *operator->() const { return ptr.load(); } - - operator T *() { return ptr.load(); } - T &operator*() { return *ptr; } - T *operator->() { return ptr.load(); } - atomic_ptr &operator=(T *_ptr) { - ptr = _ptr; - return *this; - } - -#endif - -private: -#if defined(SIMDUTF_NO_THREADS) - T *ptr; -#else - std::atomic ptr; -#endif -}; - -class detect_best_supported_implementation_on_first_use; - -} // namespace internal - -/** - * The list of available implementations compiled into simdutf. - */ -extern SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list & -get_available_implementations(); - -/** - * The active implementation. - * - * Automatically initialized on first use to the most advanced implementation - * supported by this hardware. - */ -extern SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr & -get_active_implementation(); - -} // namespace simdutf - -#endif // SIMDUTF_IMPLEMENTATION_H -/* end file include/simdutf/implementation.h */ - -// Implementation-internal files (must be included before the implementations -// themselves, to keep amalgamation working--otherwise, the first time a file is -// included, it might be put inside the #ifdef -// SIMDUTF_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other -// implementations can't compile unless that implementation is turned on). - -SIMDUTF_POP_DISABLE_WARNINGS - -#endif // SIMDUTF_H -/* end file include/simdutf.h */ diff --git a/deps/simdutf/unofficial.gni b/deps/simdutf/unofficial.gni deleted file mode 100644 index 544466c5344..00000000000 --- a/deps/simdutf/unofficial.gni +++ /dev/null @@ -1,32 +0,0 @@ -# This file is used by GN for building, which is NOT the build system used for -# building official binaries. -# Please edit the gyp files if you are making changes to build system. - -# The actual configurations are put inside a template in unofficial.gni to -# prevent accidental edits from contributors. -template("simdutf_gn_build") { - config("simdutf_config") { - include_dirs = [ "." ] - } - - gypi_values = exec_script("../../tools/gypi_to_gn.py", - [ rebase_path("simdutf.gyp") ], - "scope", - [ "simdutf.gyp" ]) - - source_set(target_name) { - forward_variables_from(invoker, "*") - public_configs = [ ":simdutf_config" ] - sources = gypi_values.simdutf_sources - if (is_clang || !is_win) { - cflags_cc = [ - "-Wno-#pragma-messages", - "-Wno-ambiguous-reversed-operator", - "-Wno-unreachable-code-break", - "-Wno-unused-const-variable", - "-Wno-unused-function", - "-Wno-c++98-compat-extra-semi", - ] - } - } -} diff --git a/doc/contributing/maintaining/maintaining-dependencies.md b/doc/contributing/maintaining/maintaining-dependencies.md index ba709c9a894..7cc3e483d26 100644 --- a/doc/contributing/maintaining/maintaining-dependencies.md +++ b/doc/contributing/maintaining/maintaining-dependencies.md @@ -30,7 +30,6 @@ This a list of all the dependencies: * [openssl][] * [postject][] * [simdjson][] -* [simdutf][] * [sqlite][] * [undici][] * [uvwasi][] @@ -364,11 +363,6 @@ The [postject](https://github.com/nodejs/postject) dependency is used for the The [simdjson](https://github.com/simdjson/simdjson) dependency is a C++ library for fast JSON parsing. -### simdutf - -The [simdutf](https://github.com/simdutf/simdutf) dependency is -a C++ library for fast UTF-8 decoding and encoding. - ### sqlite The [sqlite](https://github.com/sqlite/sqlite) dependency is @@ -435,7 +429,6 @@ according to [RFC 8878](https://datatracker.ietf.org/doc/html/rfc8878). [openssl]: #openssl [postject]: #postject [simdjson]: #simdjson -[simdutf]: #simdutf [sqlite]: #sqlite [undici]: #undici [update-openssl-action]: ../../../.github/workflows/update-openssl.yml diff --git a/tools/dep_updaters/README.md b/tools/dep_updaters/README.md index 32f2d5a628a..ba7a47d8a50 100644 --- a/tools/dep_updaters/README.md +++ b/tools/dep_updaters/README.md @@ -36,31 +36,6 @@ been created with the changes), do the following: [`c61870c`]: https://github.com/nodejs/node/commit/c61870c376e2f5b0dbaa939972c46745e21cdbdd -## simdutf - -The `update-simdutf.sh` script takes the target version to update as its only -argument, downloads it from the [GitHub repo](https://github.com/simdutf/simdutf) -and uses it to replace the contents of `deps/simdutf/`. The contents are replaced -entirely except for the `*.gyp` and `*.gypi` build files, which are part of the -Node.js build definitions and are not present in the upstream repo. - -For example, in order to update to version `2.0.7`, the following command can -be run: - -```bash -./tools/dep_updaters/update-simdutf.sh 2.0.7 -``` - -Once the script has run (either manually, or by CI in which case a PR will have -been created with the changes), do the following: - -1. Check the [changelog](https://github.com/simdutf/simdutf/releases/tag/v2.0.7) for - things that might require changes in Node.js. -2. If necessary, update `simdutf.gyp` with build-related changes. -3. Check that Node.js compiles without errors and the tests pass. -4. Create a commit for the update and in the commit message include the - important/relevant items from the changelog. - ## OpenSSL The `update-openssl.sh` script automates the steps described in diff --git a/tools/dep_updaters/update-simdutf.sh b/tools/dep_updaters/update-simdutf.sh deleted file mode 100755 index bb4ea276548..00000000000 --- a/tools/dep_updaters/update-simdutf.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/sh -set -e -# Shell script to update simdutf in the source tree to a specific version - -BASE_DIR=$(cd "$(dirname "$0")/../.." && pwd) -DEPS_DIR="$BASE_DIR/deps" -[ -z "$NODE" ] && NODE="$BASE_DIR/out/Release/node" -[ -x "$NODE" ] || NODE=$(command -v node) - -# shellcheck disable=SC1091 -. "$BASE_DIR/tools/dep_updaters/utils.sh" - -NEW_VERSION="$("$NODE" --input-type=module <<'EOF' -const res = await fetch('https://api.github.com/repos/simdutf/simdutf/releases/latest', - process.env.GITHUB_TOKEN && { - headers: { - "Authorization": `Bearer ${process.env.GITHUB_TOKEN}` - }, - }); -if (!res.ok) throw new Error(`FetchError: ${res.status} ${res.statusText}`, { cause: res }); -const { tag_name } = await res.json(); -console.log(tag_name.replace('v', '')); -EOF -)" -CURRENT_VERSION=$(grep "#define SIMDUTF_VERSION" "$DEPS_DIR/simdutf/simdutf.h" | sed -n "s/^.*VERSION \"\(.*\)\"/\1/p") - -# This function exit with 0 if new version and current version are the same -compare_dependency_version "simdutf" "$NEW_VERSION" "$CURRENT_VERSION" - -echo "Making temporary workspace..." - -WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp') - -cleanup () { - EXIT_CODE=$? - [ -d "$WORKSPACE" ] && rm -rf "$WORKSPACE" - exit $EXIT_CODE -} - -trap cleanup INT TERM EXIT - -SIMDUTF_REF="v$NEW_VERSION" -SIMDUTF_ZIP="simdutf-$SIMDUTF_REF.zip" -SIMDUTF_LICENSE="LICENSE-MIT" - -cd "$WORKSPACE" - -echo "Fetching simdutf source archive..." -curl -sL -o "$SIMDUTF_ZIP" "https://github.com/simdutf/simdutf/releases/download/$SIMDUTF_REF/singleheader.zip" -log_and_verify_sha256sum "simdutf" "$SIMDUTF_ZIP" -unzip "$SIMDUTF_ZIP" -rm "$SIMDUTF_ZIP" -rm ./*_demo.cpp - -curl -sL -o "$SIMDUTF_LICENSE" "https://raw.githubusercontent.com/simdutf/simdutf/HEAD/LICENSE-MIT" - -echo "Replacing existing simdutf (except GYP and GN build files)" -mv "$DEPS_DIR/simdutf/"*.gyp "$DEPS_DIR/simdutf/"*.gn "$DEPS_DIR/simdutf/"*.gni "$DEPS_DIR/simdutf/README.md" "$WORKSPACE/" -rm -rf "$DEPS_DIR/simdutf" -mv "$WORKSPACE" "$DEPS_DIR/simdutf" - -# Update the version number on maintaining-dependencies.md -# and print the new version as the last line of the script as we need -# to add it to $GITHUB_ENV variable -finalize_version_update "simdutf" "$NEW_VERSION" diff --git a/tools/license-builder.sh b/tools/license-builder.sh index 1731c2d3a78..ceee01f5f09 100755 --- a/tools/license-builder.sh +++ b/tools/license-builder.sh @@ -85,8 +85,8 @@ licenseText="$(sed -e '/The data format used by the zlib library/,$d' -e 's/^\/\ addlicense "zlib" "deps/zlib" "$licenseText" licenseText="$(cat "${rootdir}/deps/simdjson/LICENSE")" addlicense "simdjson" "deps/simdjson" "$licenseText" -licenseText="$(cat "${rootdir}/deps/simdutf/LICENSE-MIT")" -addlicense "simdutf" "deps/simdutf" "$licenseText" +licenseText="$(cat "${rootdir}/deps/v8/third_party/simdutf/LICENSE")" +addlicense "simdutf" "deps/v8/third_party/simdutf" "$licenseText" licenseText="$(curl -sL https://raw.githubusercontent.com/ada-url/ada/HEAD/LICENSE-MIT)" addlicense "ada" "deps/ada" "$licenseText" licenseText="$(cat "${rootdir}/deps/minimatch/LICENSE")"