diff --git a/stdlib/public/core/UTF8EncodingError.swift b/stdlib/public/core/UTF8EncodingError.swift index 49b8904a76bc6..80ea1885ee679 100644 --- a/stdlib/public/core/UTF8EncodingError.swift +++ b/stdlib/public/core/UTF8EncodingError.swift @@ -1,3 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + extension Unicode.UTF8 { /** @@ -5,21 +17,17 @@ extension Unicode.UTF8 { Valid UTF-8 is represented by this table: - ``` - ╔════════════════════╦════════╦════════╦════════╦════════╗ - ║ Scalar value ║ Byte 0 ║ Byte 1 ║ Byte 2 ║ Byte 3 ║ - ╠════════════════════╬════════╬════════╬════════╬════════╣ - ║ U+0000..U+007F ║ 00..7F ║ ║ ║ ║ - ║ U+0080..U+07FF ║ C2..DF ║ 80..BF ║ ║ ║ - ║ U+0800..U+0FFF ║ E0 ║ A0..BF ║ 80..BF ║ ║ - ║ U+1000..U+CFFF ║ E1..EC ║ 80..BF ║ 80..BF ║ ║ - ║ U+D000..U+D7FF ║ ED ║ 80..9F ║ 80..BF ║ ║ - ║ U+E000..U+FFFF ║ EE..EF ║ 80..BF ║ 80..BF ║ ║ - ║ U+10000..U+3FFFF ║ F0 ║ 90..BF ║ 80..BF ║ 80..BF ║ - ║ U+40000..U+FFFFF ║ F1..F3 ║ 80..BF ║ 80..BF ║ 80..BF ║ - ║ U+100000..U+10FFFF ║ F4 ║ 80..8F ║ 80..BF ║ 80..BF ║ - ╚════════════════════╩════════╩════════╩════════╩════════╝ - ``` + | Scalar value | Byte 0 | Byte 1 | Byte 2 | Byte 3 | + | ------------------ | ------ | ------ | ------ | ------ | + | U+0000..U+007F | 00..7F | | | | + | U+0080..U+07FF | C2..DF | 80..BF | | | + | U+0800..U+0FFF | E0 | A0..BF | 80..BF | | + | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | | + | U+D000..U+D7FF | ED | 80..9F | 80..BF | | + | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | | + | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF | + | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF | + | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF | ### Classifying errors @@ -49,8 +57,8 @@ extension Unicode.UTF8 { encodings are invalid UTF-8 and can lead to security issues if not correctly detected: - - https://nvd.nist.gov/vuln/detail/CVE-2008-2938 - - https://nvd.nist.gov/vuln/detail/CVE-2000-0884 + - + - An overlong encoding of `NUL`, `0xC0 0x80`, is used in Java's Modified UTF-8 but is invalid UTF-8. Overlong encoding errors often catch attempts @@ -85,15 +93,11 @@ extension Unicode.UTF8 { the reported range. Similarly, constructing a single error for the longest invalid byte range can be constructed by joining adjacent error ranges. - ``` - ╔═════════════════╦══════╦═════╦═════╦═════╦═════╦═════╦═════╦══════╗ - ║ ║ 61 ║ F1 ║ 80 ║ 80 ║ E1 ║ 80 ║ C2 ║ 62 ║ - ╠═════════════════╬══════╬═════╬═════╬═════╬═════╬═════╬═════╬══════╣ - ║ Longest range ║ U+61 ║ err ║ ║ ║ ║ ║ ║ U+62 ║ - ║ Maximal subpart ║ U+61 ║ err ║ ║ ║ err ║ ║ err ║ U+62 ║ - ║ Error per byte ║ U+61 ║ err ║ err ║ err ║ err ║ err ║ err ║ U+62 ║ - ╚═════════════════╩══════╩═════╩═════╩═════╩═════╩═════╩═════╩══════╝ - ``` + | Algorithm | 61 | F1 | 80 | 80 | E1 | 80 | C2 | 62 | + | --------------- | ---- | --- | --- | --- | --- | --- | --- | ---- | + | Longest range | U+61 | err | | | | | | U+62 | + | Maximal subpart | U+61 | err | | | err | | err | U+62 | + | Error per byte | U+61 | err | err | err | err | err | err | U+62 | */ @available(SwiftStdlib 6.2, *) diff --git a/stdlib/public/core/UTF8Span.swift b/stdlib/public/core/UTF8Span.swift index 8d45e7f817ec0..ebecccd52edd8 100644 --- a/stdlib/public/core/UTF8Span.swift +++ b/stdlib/public/core/UTF8Span.swift @@ -1,7 +1,17 @@ -// TODO: comment header - - -/// A borrowed view into contiguous memory that contains validly-encoded UTF-8 code units. +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +/// A borrowed view into contiguous memory that contains validly-encoded UTF-8 +/// code units. @frozen @safe @available(SwiftStdlib 6.2, *) @@ -13,12 +23,12 @@ public struct UTF8Span: Copyable, ~Escapable, BitwiseCopyable { A bit-packed count and flags (such as isASCII) ╔═══════╦═════╦══════════╦═══════╗ - ║ b63 ║ b62 ║ b61:56 ║ b56:0 ║ + ║ b63 ║ b62 ║ b61:56 ║ b55:0 ║ ╠═══════╬═════╬══════════╬═══════╣ ║ ASCII ║ NFC ║ reserved ║ count ║ ╚═══════╩═════╩══════════╩═══════╝ - ASCII means the contents are known to be all-ASCII (<0x7F). + ASCII means the contents are known to be all-ASCII (<=0x7F). NFC means contents are known to be in normal form C for fast comparisons. */ @usableFromInline @@ -200,7 +210,8 @@ extension UTF8Span { extension String { /// Creates a new string, copying the specified code units. /// - /// This initializer skips UTF-8 validation because `codeUnits` must contain valid UTF-8. + /// This initializer skips UTF-8 validation because `codeUnits` must contain + /// valid UTF-8. /// /// - Complexity: O(n) @available(SwiftStdlib 6.2, *) @@ -241,17 +252,17 @@ extension String { } #if !(os(watchOS) && _pointerBitWidth(_32)) - /// A UTF8span over the code units that make up this string. + /// A UTF-8 span over the code units that make up this string. /// - /// - Note: In the case of bridged UTF16 String instances (on Apple - /// platforms,) this property transcodes the code units the first time - /// it is called. The transcoded buffer is cached, and subsequent calls - /// to `span` can reuse the buffer. + /// - Note: In the case of bridged UTF-16 string instances (on Apple + /// platforms) this property transcodes the code units the first time + /// it's called. The transcoded buffer is cached, and subsequent calls + /// can reuse the buffer. /// - /// Returns: a `UTF8Span` over the code units of this String. + /// - Returns: A `UTF8Span` over the code units of this string. /// - /// Complexity: O(1) for native UTF8 Strings, - /// amortized O(1) for bridged UTF16 Strings. + /// - Complexity: O(1) for native UTF-8 strings, amortized O(1) for bridged + /// UTF-16 strings. @available(SwiftStdlib 6.2, *) public var utf8Span: UTF8Span { @lifetime(borrow self) @@ -262,17 +273,17 @@ extension String { } } - /// A UTF8span over the code units that make up this string. + /// A UTF-8 span over the code units that make up this string. /// - /// - Note: In the case of bridged UTF16 String instances (on Apple - /// platforms,) this property transcodes the code units the first time - /// it is called. The transcoded buffer is cached, and subsequent calls - /// to `span` can reuse the buffer. + /// - Note: In the case of bridged UTF-16 string instances (on Apple + /// platforms) this property transcodes the code units the first time + /// it's called. The transcoded buffer is cached, and subsequent calls + /// can reuse the buffer. /// - /// Returns: a `UTF8Span` over the code units of this String. + /// - Returns: A `UTF8Span` over the code units of this string. /// - /// Complexity: O(1) for native UTF8 Strings, - /// amortized O(1) for bridged UTF16 Strings. + /// - Complexity: O(1) for native UTF-8 strings, amortized O(1) for bridged + /// UTF-16 strings. @available(SwiftStdlib 6.2, *) public var _utf8Span: UTF8Span? { @_alwaysEmitIntoClient @inline(__always) @@ -287,18 +298,18 @@ extension String { fatalError("\(#function) unavailable on 32-bit watchOS") } - /// A UTF8span over the code units that make up this string. + /// A UTF-8 span over the code units that make up this string. /// - /// - Note: In the case of bridged UTF16 String instances (on Apple - /// platforms,) this property transcodes the code units the first time - /// it is called. The transcoded buffer is cached, and subsequent calls - /// to `span` can reuse the buffer. + /// - Note: In the case of bridged UTF-16 string instances (on Apple + /// platforms) this property transcodes the code units the first time + /// it's called. The transcoded buffer is cached, and subsequent calls + /// can reuse the buffer. /// - /// Returns: a `UTF8Span` over the code units of this String, or `nil` - /// if the String does not have a contiguous representation. + /// - Returns: A `UTF8Span` over the code units of this string, or `nil` + /// if the string does not have a contiguous representation. /// - /// Complexity: O(1) for native UTF8 Strings, - /// amortized O(1) for bridged UTF16 Strings. + /// - Complexity: O(1) for native UTF-8 strings, amortized O(1) for bridged + /// UTF-16 strings. @available(SwiftStdlib 6.2, *) public var _utf8Span: UTF8Span? { @lifetime(borrow self) @@ -346,27 +357,34 @@ extension Substring { } #if !(os(watchOS) && _pointerBitWidth(_32)) - /// A UTF8Span over the code units that make up this substring. + /// A UTF-8 span over the code units that make up this substring. + /// + /// - Note: In the case of bridged UTF-16 string instances (on Apple + /// platforms) this property needs to transcode the code units every time + /// it's called. + /// + /// For example, if `string` has the bridged UTF-16 representation, + /// the following code is accidentally quadratic because of this issue: /// - /// - Note: In the case of bridged UTF16 String instances (on Apple - /// platforms,) this property needs to transcode the code units every time - /// it is called. - /// For example, if `string` has the bridged UTF16 representation, /// for word in string.split(separator: " ") { /// useSpan(word.span) /// } - /// is accidentally quadratic because of this issue. A workaround is to - /// explicitly convert the string into its native UTF8 representation: - /// var nativeString = consume string - /// nativeString.makeContiguousUTF8() - /// for word in nativeString.split(separator: " ") { - /// useSpan(word.span) - /// } - /// This second option has linear time complexity, as expected. - /// - /// Returns: a `UTF8Span` over the code units of this Substring. - /// - /// Complexity: O(1) for native UTF8 Strings, O(n) for bridged UTF16 Strings. + /// + /// A workaround is to explicitly convert the string into its native UTF-8 + /// representation: + /// + /// var nativeString = consume string + /// nativeString.makeContiguousUTF8() + /// for word in nativeString.split(separator: " ") { + /// useSpan(word.span) + /// } + /// + /// This second option has linear time complexity, as expected. + /// + /// - Returns: A `UTF8Span` over the code units of this substring. + /// + /// - Complexity: O(1) for native UTF-8 strings, O(n) for bridged UTF-16 + /// strings. @available(SwiftStdlib 6.2, *) public var utf8Span: UTF8Span { @lifetime(borrow self) @@ -377,27 +395,34 @@ extension Substring { } } - /// A UTF8Span over the code units that make up this substring. + /// A UTF-8 span over the code units that make up this substring. + /// + /// - Note: In the case of bridged UTF-16 string instances (on Apple + /// platforms) this property needs to transcode the code units every time + /// it's called. + /// + /// For example, if `string` has the bridged UTF-16 representation, + /// the following code is accidentally quadratic because of this issue: /// - /// - Note: In the case of bridged UTF16 String instances (on Apple - /// platforms,) this property needs to transcode the code units every time - /// it is called. - /// For example, if `string` has the bridged UTF16 representation, /// for word in string.split(separator: " ") { /// useSpan(word.span) /// } - /// is accidentally quadratic because of this issue. A workaround is to - /// explicitly convert the string into its native UTF8 representation: - /// var nativeString = consume string - /// nativeString.makeContiguousUTF8() - /// for word in nativeString.split(separator: " ") { - /// useSpan(word.span) - /// } - /// This second option has linear time complexity, as expected. - /// - /// Returns: a `UTF8Span` over the code units of this Substring. - /// - /// Complexity: O(1) for native UTF8 Strings, O(n) for bridged UTF16 Strings. + /// + /// A workaround is to explicitly convert the string into its native UTF-8 + /// representation: + /// + /// var nativeString = consume string + /// nativeString.makeContiguousUTF8() + /// for word in nativeString.split(separator: " ") { + /// useSpan(word.span) + /// } + /// + /// This second option has linear time complexity, as expected. + /// + /// - Returns: A `UTF8Span` over the code units of this substring. + /// + /// - Complexity: O(1) for native UTF-8 strings, O(n) for bridged UTF-16 + /// strings. @available(SwiftStdlib 6.2, *) public var _utf8Span: UTF8Span? { @_alwaysEmitIntoClient @inline(__always) @@ -412,28 +437,35 @@ extension Substring { fatalError("\(#function) unavailable on 32-bit watchOS") } - /// A UTF8Span over the code units that make up this substring. + /// A UTF-8 span over the code units that make up this substring. + /// + /// - Note: In the case of bridged UTF-16 string instances (on Apple + /// platforms) this property needs to transcode the code units every time + /// it's called. + /// + /// For example, if `string` has the bridged UTF-16 representation, + /// the following code is accidentally quadratic because of this issue: /// - /// - Note: In the case of bridged UTF16 String instances (on Apple - /// platforms,) this property needs to transcode the code units every time - /// it is called. - /// For example, if `string` has the bridged UTF16 representation, /// for word in string.split(separator: " ") { /// useSpan(word.span) /// } - /// is accidentally quadratic because of this issue. A workaround is to - /// explicitly convert the string into its native UTF8 representation: - /// var nativeString = consume string - /// nativeString.makeContiguousUTF8() - /// for word in nativeString.split(separator: " ") { - /// useSpan(word.span) - /// } - /// This second option has linear time complexity, as expected. - /// - /// Returns: a `UTF8Span` over the code units of this Substring, or `nil` - /// if the Substring does not have a contiguous representation. - /// - /// Complexity: O(1) for native UTF8 Strings, O(n) for bridged UTF16 Strings. + /// + /// A workaround is to explicitly convert the string into its native UTF-8 + /// representation: + /// + /// var nativeString = consume string + /// nativeString.makeContiguousUTF8() + /// for word in nativeString.split(separator: " ") { + /// useSpan(word.span) + /// } + /// + /// This second option has linear time complexity, as expected. + /// + /// - Returns: A `UTF8Span` over the code units of this substring, or `nil` + /// if the substring does not have a contiguous representation. + /// + /// - Complexity: O(1) for native UTF-8 strings, O(n) for bridged UTF-16 + /// strings. @available(SwiftStdlib 6.2, *) public var _utf8Span: UTF8Span? { @lifetime(borrow self) diff --git a/stdlib/public/core/UTF8SpanBits.swift b/stdlib/public/core/UTF8SpanBits.swift index 1d54106725a7d..1cda0c5cb8353 100644 --- a/stdlib/public/core/UTF8SpanBits.swift +++ b/stdlib/public/core/UTF8SpanBits.swift @@ -1,3 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + @available(SwiftStdlib 6.2, *) extension UTF8Span { /// Returns whether contents are known to be all-ASCII. A return value of diff --git a/stdlib/public/core/UTF8SpanComparisons.swift b/stdlib/public/core/UTF8SpanComparisons.swift index 7f06963867fbf..6dee10378d75e 100644 --- a/stdlib/public/core/UTF8SpanComparisons.swift +++ b/stdlib/public/core/UTF8SpanComparisons.swift @@ -1,5 +1,14 @@ -// TODO: comment header - +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// @available(SwiftStdlib 6.2, *) extension UTF8Span { diff --git a/stdlib/public/core/UTF8SpanFundamentals.swift b/stdlib/public/core/UTF8SpanFundamentals.swift index 5b8c9b08b0c18..defddbdfe033c 100644 --- a/stdlib/public/core/UTF8SpanFundamentals.swift +++ b/stdlib/public/core/UTF8SpanFundamentals.swift @@ -1,3 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + // Core Scalar API @available(SwiftStdlib 6.2, *) extension UTF8Span { diff --git a/stdlib/public/core/UTF8SpanInternalHelpers.swift b/stdlib/public/core/UTF8SpanInternalHelpers.swift index 9f7a251804213..f0b57b4612e80 100644 --- a/stdlib/public/core/UTF8SpanInternalHelpers.swift +++ b/stdlib/public/core/UTF8SpanInternalHelpers.swift @@ -1,8 +1,14 @@ -/* - - Additional helpers build on stdlibDuplicates.swift - - */ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// // TODO: Should we update our unicode helpers file to call these instead? diff --git a/stdlib/public/core/UTF8SpanIterators.swift b/stdlib/public/core/UTF8SpanIterators.swift index 2355f35188a7e..2aebaaa54a518 100644 --- a/stdlib/public/core/UTF8SpanIterators.swift +++ b/stdlib/public/core/UTF8SpanIterators.swift @@ -1,3 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + @available(SwiftStdlib 6.2, *) extension UTF8Span { /// Returns an iterator that will decode the code units into