From aef29367c79bc4aabd70341eb1b0a5dee12bb4e3 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Sat, 26 Aug 2023 13:23:50 -0400 Subject: [PATCH 1/8] Added an ImapToken cache to try and reduce ImapToken allocations --- MailKit/ByteArrayBuilder.cs | 9 + MailKit/MailKit.csproj | 2 + MailKit/MailKitLite.csproj | 2 + MailKit/Net/Imap/HashCode.cs | 521 +++++++++++++++++++++++++++++ MailKit/Net/Imap/ImapToken.cs | 31 +- MailKit/Net/Imap/ImapTokenCache.cs | 170 ++++++++++ 6 files changed, 734 insertions(+), 1 deletion(-) create mode 100644 MailKit/Net/Imap/HashCode.cs create mode 100644 MailKit/Net/Imap/ImapTokenCache.cs diff --git a/MailKit/ByteArrayBuilder.cs b/MailKit/ByteArrayBuilder.cs index ed3226414c..44f78a73c9 100644 --- a/MailKit/ByteArrayBuilder.cs +++ b/MailKit/ByteArrayBuilder.cs @@ -45,6 +45,15 @@ public int Length { get { return length; } } + public byte this[int index] { + get { return buffer[index]; } + } + + public byte[] GetBuffer () + { + return buffer; + } + void EnsureCapacity (int capacity) { if (capacity > buffer.Length) { diff --git a/MailKit/MailKit.csproj b/MailKit/MailKit.csproj index 6f63165ccc..8b1bc0fcf9 100644 --- a/MailKit/MailKit.csproj +++ b/MailKit/MailKit.csproj @@ -72,6 +72,7 @@ + @@ -99,6 +100,7 @@ + diff --git a/MailKit/MailKitLite.csproj b/MailKit/MailKitLite.csproj index f039a53f2d..d4389897a2 100644 --- a/MailKit/MailKitLite.csproj +++ b/MailKit/MailKitLite.csproj @@ -73,6 +73,7 @@ + @@ -100,6 +101,7 @@ + diff --git a/MailKit/Net/Imap/HashCode.cs b/MailKit/Net/Imap/HashCode.cs new file mode 100644 index 0000000000..52e91dd9a5 --- /dev/null +++ b/MailKit/Net/Imap/HashCode.cs @@ -0,0 +1,521 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/* + +The xxHash32 implementation is based on the code published by Yann Collet: +https://raw.githubusercontent.com/Cyan4973/xxHash/5c174cfa4e45a42f94082dc0d4539b39696afea1/xxhash.c + + xxHash - Fast Hash algorithm + Copyright (C) 2012-2016, Yann Collet + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash homepage: http://www.xxhash.com + - xxHash source repository : https://github.com/Cyan4973/xxHash + +*/ + +using System; +using System.Collections.Generic; +using System.ComponentModel; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Security.Cryptography; + + +#pragma warning disable CA1066 // Implement IEquatable when overriding Object.Equals + +namespace MailKit.Net.Imap +{ + // xxHash32 is used for the hash code. + // https://github.com/Cyan4973/xxHash + + struct HashCode + { + private static readonly uint s_seed = GenerateGlobalSeed (); + + private const uint Prime1 = 2654435761U; + private const uint Prime2 = 2246822519U; + private const uint Prime3 = 3266489917U; + private const uint Prime4 = 668265263U; + private const uint Prime5 = 374761393U; + + private uint _v1, _v2, _v3, _v4; + private uint _queue1, _queue2, _queue3; + private uint _length; + + private static unsafe uint GenerateGlobalSeed () + { + var data = new byte[sizeof (uint)]; + uint result = 0; + + using (var random = RandomNumberGenerator.Create ()) + random.GetBytes (data); + + for (int i = 0; i < data.Length; i++) + result <<= data[i]; + + return result; + } + + public static int Combine (T1 value1) + { + // Provide a way of diffusing bits from something with a limited + // input hash space. For example, many enums only have a few + // possible hashes, only using the bottom few bits of the code. Some + // collections are built on the assumption that hashes are spread + // over a larger space, so diffusing the bits may help the + // collection work more efficiently. + + uint hc1 = (uint) (value1?.GetHashCode () ?? 0); + + uint hash = MixEmptyState (); + hash += 4; + + hash = QueueRound (hash, hc1); + + hash = MixFinal (hash); + return (int) hash; + } + + public static int Combine (T1 value1, T2 value2) + { + uint hc1 = (uint) (value1?.GetHashCode () ?? 0); + uint hc2 = (uint) (value2?.GetHashCode () ?? 0); + + uint hash = MixEmptyState (); + hash += 8; + + hash = QueueRound (hash, hc1); + hash = QueueRound (hash, hc2); + + hash = MixFinal (hash); + return (int) hash; + } + + public static int Combine (T1 value1, T2 value2, T3 value3) + { + uint hc1 = (uint) (value1?.GetHashCode () ?? 0); + uint hc2 = (uint) (value2?.GetHashCode () ?? 0); + uint hc3 = (uint) (value3?.GetHashCode () ?? 0); + + uint hash = MixEmptyState (); + hash += 12; + + hash = QueueRound (hash, hc1); + hash = QueueRound (hash, hc2); + hash = QueueRound (hash, hc3); + + hash = MixFinal (hash); + return (int) hash; + } + + public static int Combine (T1 value1, T2 value2, T3 value3, T4 value4) + { + uint hc1 = (uint) (value1?.GetHashCode () ?? 0); + uint hc2 = (uint) (value2?.GetHashCode () ?? 0); + uint hc3 = (uint) (value3?.GetHashCode () ?? 0); + uint hc4 = (uint) (value4?.GetHashCode () ?? 0); + + Initialize (out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round (v1, hc1); + v2 = Round (v2, hc2); + v3 = Round (v3, hc3); + v4 = Round (v4, hc4); + + uint hash = MixState (v1, v2, v3, v4); + hash += 16; + + hash = MixFinal (hash); + return (int) hash; + } + + public static int Combine (T1 value1, T2 value2, T3 value3, T4 value4, T5 value5) + { + uint hc1 = (uint) (value1?.GetHashCode () ?? 0); + uint hc2 = (uint) (value2?.GetHashCode () ?? 0); + uint hc3 = (uint) (value3?.GetHashCode () ?? 0); + uint hc4 = (uint) (value4?.GetHashCode () ?? 0); + uint hc5 = (uint) (value5?.GetHashCode () ?? 0); + + Initialize (out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round (v1, hc1); + v2 = Round (v2, hc2); + v3 = Round (v3, hc3); + v4 = Round (v4, hc4); + + uint hash = MixState (v1, v2, v3, v4); + hash += 20; + + hash = QueueRound (hash, hc5); + + hash = MixFinal (hash); + return (int) hash; + } + + public static int Combine (T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6) + { + uint hc1 = (uint) (value1?.GetHashCode () ?? 0); + uint hc2 = (uint) (value2?.GetHashCode () ?? 0); + uint hc3 = (uint) (value3?.GetHashCode () ?? 0); + uint hc4 = (uint) (value4?.GetHashCode () ?? 0); + uint hc5 = (uint) (value5?.GetHashCode () ?? 0); + uint hc6 = (uint) (value6?.GetHashCode () ?? 0); + + Initialize (out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round (v1, hc1); + v2 = Round (v2, hc2); + v3 = Round (v3, hc3); + v4 = Round (v4, hc4); + + uint hash = MixState (v1, v2, v3, v4); + hash += 24; + + hash = QueueRound (hash, hc5); + hash = QueueRound (hash, hc6); + + hash = MixFinal (hash); + return (int) hash; + } + + public static int Combine (T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7) + { + uint hc1 = (uint) (value1?.GetHashCode () ?? 0); + uint hc2 = (uint) (value2?.GetHashCode () ?? 0); + uint hc3 = (uint) (value3?.GetHashCode () ?? 0); + uint hc4 = (uint) (value4?.GetHashCode () ?? 0); + uint hc5 = (uint) (value5?.GetHashCode () ?? 0); + uint hc6 = (uint) (value6?.GetHashCode () ?? 0); + uint hc7 = (uint) (value7?.GetHashCode () ?? 0); + + Initialize (out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round (v1, hc1); + v2 = Round (v2, hc2); + v3 = Round (v3, hc3); + v4 = Round (v4, hc4); + + uint hash = MixState (v1, v2, v3, v4); + hash += 28; + + hash = QueueRound (hash, hc5); + hash = QueueRound (hash, hc6); + hash = QueueRound (hash, hc7); + + hash = MixFinal (hash); + return (int) hash; + } + + public static int Combine (T1 value1, T2 value2, T3 value3, T4 value4, T5 value5, T6 value6, T7 value7, T8 value8) + { + uint hc1 = (uint) (value1?.GetHashCode () ?? 0); + uint hc2 = (uint) (value2?.GetHashCode () ?? 0); + uint hc3 = (uint) (value3?.GetHashCode () ?? 0); + uint hc4 = (uint) (value4?.GetHashCode () ?? 0); + uint hc5 = (uint) (value5?.GetHashCode () ?? 0); + uint hc6 = (uint) (value6?.GetHashCode () ?? 0); + uint hc7 = (uint) (value7?.GetHashCode () ?? 0); + uint hc8 = (uint) (value8?.GetHashCode () ?? 0); + + Initialize (out uint v1, out uint v2, out uint v3, out uint v4); + + v1 = Round (v1, hc1); + v2 = Round (v2, hc2); + v3 = Round (v3, hc3); + v4 = Round (v4, hc4); + + v1 = Round (v1, hc5); + v2 = Round (v2, hc6); + v3 = Round (v3, hc7); + v4 = Round (v4, hc8); + + uint hash = MixState (v1, v2, v3, v4); + hash += 32; + + hash = MixFinal (hash); + return (int) hash; + } + + [MethodImpl (MethodImplOptions.AggressiveInlining)] + private static void Initialize (out uint v1, out uint v2, out uint v3, out uint v4) + { + v1 = s_seed + Prime1 + Prime2; + v2 = s_seed + Prime2; + v3 = s_seed; + v4 = s_seed - Prime1; + } + + [MethodImpl (MethodImplOptions.AggressiveInlining)] + static uint RotateLeft (uint value, int offset) + { + return (value << offset) | (value >> (32 - offset)); + } + + [MethodImpl (MethodImplOptions.AggressiveInlining)] + private static uint Round (uint hash, uint input) + { + return RotateLeft (hash + input * Prime2, 13) * Prime1; + } + + [MethodImpl (MethodImplOptions.AggressiveInlining)] + private static uint QueueRound (uint hash, uint queuedValue) + { + return RotateLeft (hash + queuedValue * Prime3, 17) * Prime4; + } + + [MethodImpl (MethodImplOptions.AggressiveInlining)] + private static uint MixState (uint v1, uint v2, uint v3, uint v4) + { + return RotateLeft (v1, 1) + RotateLeft (v2, 7) + RotateLeft (v3, 12) + RotateLeft (v4, 18); + } + + private static uint MixEmptyState () + { + return s_seed + Prime5; + } + + [MethodImpl (MethodImplOptions.AggressiveInlining)] + private static uint MixFinal (uint hash) + { + hash ^= hash >> 15; + hash *= Prime2; + hash ^= hash >> 13; + hash *= Prime3; + hash ^= hash >> 16; + return hash; + } + + public void Add (T value) + { + Add (value?.GetHashCode () ?? 0); + } + + public void Add (T value, IEqualityComparer? comparer) + { + Add (value is null ? 0 : (comparer?.GetHashCode (value) ?? value.GetHashCode ())); + } + + static int ByteOffset (ref byte origin, ref byte target) + { + var x = target - origin; + + return x; + } + + /// Adds a span of bytes to the hash code. + /// The span. + /// + /// This method does not guarantee that the result of adding a span of bytes will match + /// the result of adding the same bytes individually. + /// + public void AddBytes (ReadOnlySpan value) + { + ref byte pos = ref MemoryMarshal.GetReference (value); + ref byte end = ref Unsafe.Add (ref pos, value.Length); + + if (value.Length < (sizeof (int) * 4)) { + goto Small; + } + + // Usually Add calls Initialize but if we haven't used HashCode before it won't have been called. + if (_length == 0) { + Initialize (out _v1, out _v2, out _v3, out _v4); + } else { + // If we have at least 16 bytes to hash, we can add them in 16-byte batches, + // but we first have to add enough data to flush any queued values. + switch (_length % 4) { + case 1: + //Debug.Assert (Unsafe.ByteOffset (ref pos, ref end) >= sizeof (int)); + Add (Unsafe.ReadUnaligned (ref pos)); + pos = ref Unsafe.Add (ref pos, sizeof (int)); + goto case 2; + case 2: + //Debug.Assert (Unsafe.ByteOffset (ref pos, ref end) >= sizeof (int)); + Add (Unsafe.ReadUnaligned (ref pos)); + pos = ref Unsafe.Add (ref pos, sizeof (int)); + goto case 3; + case 3: + //Debug.Assert (Unsafe.ByteOffset (ref pos, ref end) >= sizeof (int)); + Add (Unsafe.ReadUnaligned (ref pos)); + pos = ref Unsafe.Add (ref pos, sizeof (int)); + break; + } + } + + // With the queue clear, we add sixteen bytes at a time until the input has fewer than sixteen bytes remaining. + // We first have to round the end pointer to the nearest 16-byte block from the offset. This makes the loop's condition simpler. + ref byte blockEnd = ref Unsafe.Subtract (ref end, ByteOffset (ref pos, ref end) % (sizeof (int) * 4)); + while (Unsafe.IsAddressLessThan (ref pos, ref blockEnd)) { + //Debug.Assert (Unsafe.ByteOffset (ref pos, ref blockEnd) >= (sizeof (int) * 4)); + uint v1 = Unsafe.ReadUnaligned (ref pos); + _v1 = Round (_v1, v1); + uint v2 = Unsafe.ReadUnaligned (ref Unsafe.Add (ref pos, sizeof (int) * 1)); + _v2 = Round (_v2, v2); + uint v3 = Unsafe.ReadUnaligned (ref Unsafe.Add (ref pos, sizeof (int) * 2)); + _v3 = Round (_v3, v3); + uint v4 = Unsafe.ReadUnaligned (ref Unsafe.Add (ref pos, sizeof (int) * 3)); + _v4 = Round (_v4, v4); + + _length += 4; + pos = ref Unsafe.Add (ref pos, sizeof (int) * 4); + } + + Small: + // Add four bytes at a time until the input has fewer than four bytes remaining. + while (ByteOffset (ref pos, ref end) >= sizeof (int)) { + Add (Unsafe.ReadUnaligned (ref pos)); + pos = ref Unsafe.Add (ref pos, sizeof (int)); + } + + // Add the remaining bytes a single byte at a time. + while (Unsafe.IsAddressLessThan (ref pos, ref end)) { + Add ((int) pos); + pos = ref Unsafe.Add (ref pos, 1); + } + } + + private void Add (int value) + { + // The original xxHash works as follows: + // 0. Initialize immediately. We can't do this in a struct (no + // default ctor). + // 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators. + // 2. Accumulate remaining blocks of length 4 (1 uint) into the + // hash. + // 3. Accumulate remaining blocks of length 1 into the hash. + + // There is no need for #3 as this type only accepts ints. _queue1, + // _queue2 and _queue3 are basically a buffer so that when + // ToHashCode is called we can execute #2 correctly. + + // We need to initialize the xxHash32 state (_v1 to _v4) lazily (see + // #0) nd the last place that can be done if you look at the + // original code is just before the first block of 16 bytes is mixed + // in. The xxHash32 state is never used for streams containing fewer + // than 16 bytes. + + // To see what's really going on here, have a look at the Combine + // methods. + + uint val = (uint) value; + + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. + uint previousLength = _length++; + uint position = previousLength % 4; + + // Switch can't be inlined. + + if (position == 0) + _queue1 = val; + else if (position == 1) + _queue2 = val; + else if (position == 2) + _queue3 = val; + else // position == 3 + { + if (previousLength == 3) + Initialize (out _v1, out _v2, out _v3, out _v4); + + _v1 = Round (_v1, _queue1); + _v2 = Round (_v2, _queue2); + _v3 = Round (_v3, _queue3); + _v4 = Round (_v4, val); + } + } + + public int ToHashCode () + { + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. + uint length = _length; + + // position refers to the *next* queue position in this method, so + // position == 1 means that _queue1 is populated; _queue2 would have + // been populated on the next call to Add. + uint position = length % 4; + + // If the length is less than 4, _v1 to _v4 don't contain anything + // yet. xxHash32 treats this differently. + + uint hash = length < 4 ? MixEmptyState () : MixState (_v1, _v2, _v3, _v4); + + // _length is incremented once per Add(Int32) and is therefore 4 + // times too small (xxHash length is in bytes, not ints). + + hash += length * 4; + + // Mix what remains in the queue + + // Switch can't be inlined right now, so use as few branches as + // possible by manually excluding impossible scenarios (position > 1 + // is always false if position is not > 0). + if (position > 0) { + hash = QueueRound (hash, _queue1); + if (position > 1) { + hash = QueueRound (hash, _queue2); + if (position > 2) + hash = QueueRound (hash, _queue3); + } + } + + hash = MixFinal (hash); + return (int) hash; + } + +#pragma warning disable 0809 + // Obsolete member 'memberA' overrides non-obsolete member 'memberB'. + // Disallowing GetHashCode and Equals is by design + + // * We decided to not override GetHashCode() to produce the hash code + // as this would be weird, both naming-wise as well as from a + // behavioral standpoint (GetHashCode() should return the object's + // hash code, not the one being computed). + + // * Even though ToHashCode() can be called safely multiple times on + // this implementation, it is not part of the contract. If the + // implementation has to change in the future we don't want to worry + // about people who might have incorrectly used this type. + + [Obsolete ("HashCode is a mutable struct and should not be compared with other HashCodes. Use ToHashCode to retrieve the computed hash code.", error: true)] + [EditorBrowsable (EditorBrowsableState.Never)] + public override int GetHashCode () => throw new NotSupportedException (); + + [Obsolete ("HashCode is a mutable struct and should not be compared with other HashCodes.", error: true)] + [EditorBrowsable (EditorBrowsableState.Never)] + public override bool Equals (object? obj) => throw new NotSupportedException (); +#pragma warning restore 0809 + } +} + +#pragma warning restore CA106 diff --git a/MailKit/Net/Imap/ImapToken.cs b/MailKit/Net/Imap/ImapToken.cs index d3284f5993..d61ea6e0a5 100644 --- a/MailKit/Net/Imap/ImapToken.cs +++ b/MailKit/Net/Imap/ImapToken.cs @@ -94,10 +94,12 @@ class ImapToken static readonly ImapToken XGMMsgId = new ImapToken (ImapTokenType.Atom, "X-GM-MSGID"); static readonly ImapToken XGMThrId = new ImapToken (ImapTokenType.Atom, "X-GM-THRID"); + static readonly ImapTokenCache Cache = new ImapTokenCache (); + public readonly ImapTokenType Type; public readonly object Value; - ImapToken (ImapTokenType type, object value = null) + internal ImapToken (ImapTokenType type, object value = null) { Value = value; Type = type; @@ -125,6 +127,30 @@ public static ImapToken Create (ImapTokenType type, int literalLength) return new ImapToken (type, literalLength); } + static bool IsCacheable (ByteArrayBuilder builder) + { + if (builder.Length < 2 || builder.Length > 32) + return false; + + // Any atom token that starts with a digit is likely to be an integer value, so don't cache it. + if (builder[0] >= (byte) '0' && builder[0] <= (byte) '9') + return false; + + // Any atom token that starts with 'A'->'Z' and is followed by digits is a tag token. Ignore. + if (builder[0] >= (byte) 'A' && builder[0] <= (byte) 'Z' && builder[1] >= (byte) '0' && builder[1] <= (byte) '9') + return false; + + for (int i = 0; i < builder.Length; i++) { + byte c = (byte) builder[i]; + + // Disregard any non-ASCII "atoms". + if (c <= 32 || c >= 127) + return false; + } + + return true; + } + public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) { string value; @@ -183,6 +209,9 @@ public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) return XGMThrId; } + if (IsCacheable (builder)) + return Cache.AddOrGet (type, builder); + value = builder.ToString (); return new ImapToken (type, value); diff --git a/MailKit/Net/Imap/ImapTokenCache.cs b/MailKit/Net/Imap/ImapTokenCache.cs new file mode 100644 index 0000000000..55e45904b8 --- /dev/null +++ b/MailKit/Net/Imap/ImapTokenCache.cs @@ -0,0 +1,170 @@ +// +// ImapTokenCache.cs +// +// Author: Jeffrey Stedfast +// +// Copyright (c) 2013-2023 .NET Foundation and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// + +using System; +using System.Text; +using System.Collections.Generic; + +namespace MailKit.Net.Imap +{ + class ImapTokenCache + { + const int capacity = 128; + + readonly Dictionary> cache; + readonly LinkedList list; + + public ImapTokenCache () + { + cache = new Dictionary> (); + list = new LinkedList (); + } + + public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) + { + // Note: This ImapTokenKey .ctor does not duplicate the buffer and is meant as a temporary key + // in order to avoid memory allocations for lookup purposes. + var key = new ImapTokenKey (builder.GetBuffer (), builder.Length); + + lock (cache) { + if (cache.TryGetValue (key, out var node)) { + // move the node to the head of the list + list.Remove (node); + list.AddFirst (node); + + return node.Value.Token; + } + + if (cache.Count >= capacity) { + // remove the least recently used token + node = list.Last; + list.RemoveLast (); + cache.Remove (node.Value.Key); + } + + var token = new ImapToken (type, builder.ToString ()); + + // Note: We recreate the key here so we have a permanent key. Also this allows for reuse of the token's Value string. + key = new ImapTokenKey ((string) token.Value); + + var item = new ImapTokenItem (key, token); + + node = new LinkedListNode (item); + cache.Add (key, node); + list.AddFirst (node); + + return token; + } + } + + class ImapTokenKey + { + readonly byte[] byteArrayKey; + readonly string stringKey; + readonly int length; + readonly int hashCode; + + public ImapTokenKey (byte[] key, int len) + { + byteArrayKey = key; + length = len; + + var hash = new HashCode (); + for (int i = 0; i < length; i++) + hash.Add ((char) key[i]); + + hashCode = hash.ToHashCode (); + } + + public ImapTokenKey (string key) + { + stringKey = key; + length = key.Length; + + var hash = new HashCode (); + for (int i = 0; i < length; i++) + hash.Add (key[i]); + + hashCode = hash.ToHashCode (); + } + + static bool Equals (string str, byte[] bytes) + { + for (int i = 0; i < str.Length; i++) { + if (str[i] != (char) bytes[i]) + return false; + } + + return true; + } + + static bool Equals (ImapTokenKey self, ImapTokenKey other) + { + if (self.length != other.length) + return false; + + if (self.stringKey != null) { + if (other.stringKey != null) + return self.stringKey.Equals (other.stringKey, StringComparison.Ordinal); + + return Equals (self.stringKey, other.byteArrayKey); + } + + if (other.stringKey != null) + return Equals (other.stringKey, self.byteArrayKey); + + for (int i = 0; i < self.length; i++) { + if (self.byteArrayKey[i] != other.byteArrayKey[i]) + return false; + } + + return true; + } + + public override bool Equals (object obj) + { + return obj is ImapTokenKey other && Equals (this, other); + } + + public override int GetHashCode () + { + return hashCode; + } + } + + class ImapTokenItem + { + public readonly ImapTokenKey Key; + public readonly ImapToken Token; + + public ImapTokenItem (ImapTokenKey key, ImapToken token) + { + Key = key; + Token = token; + } + } + } +} From a18ff54275f8de2950df86eb839aed6b47817f9c Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Sat, 26 Aug 2023 16:30:19 -0400 Subject: [PATCH 2/8] Modified the ImapToken caching logic to include qstring tokens Also reduced memory allocations in ImapTokenCache.AddOrGet() --- MailKit/Net/Imap/ImapStream.cs | 12 ++-- MailKit/Net/Imap/ImapToken.cs | 36 ++++++++---- MailKit/Net/Imap/ImapTokenCache.cs | 89 ++++++++++++++++++++---------- 3 files changed, 89 insertions(+), 48 deletions(-) diff --git a/MailKit/Net/Imap/ImapStream.cs b/MailKit/Net/Imap/ImapStream.cs index 0393741564..b9464aff5f 100644 --- a/MailKit/Net/Imap/ImapStream.cs +++ b/MailKit/Net/Imap/ImapStream.cs @@ -587,9 +587,7 @@ ImapToken ReadQuotedStringToken (CancellationToken cancellationToken) while (!TryReadQuotedString (builder, ref escaped)) ReadAhead (2, cancellationToken); - var qstring = builder.ToString (); - - return ImapToken.Create (ImapTokenType.QString, qstring); + return ImapToken.Create (ImapTokenType.QString, builder); } } @@ -604,9 +602,7 @@ async ValueTask ReadQuotedStringTokenAsync (CancellationToken cancell while (!TryReadQuotedString (builder, ref escaped)) await ReadAheadAsync (2, cancellationToken).ConfigureAwait (false); - var qstring = builder.ToString (); - - return ImapToken.Create (ImapTokenType.QString, qstring); + return ImapToken.Create (ImapTokenType.QString, builder); } } @@ -739,7 +735,7 @@ ImapToken ReadLiteralToken (CancellationToken cancellationToken) inputIndex++; if (!builder.TryParse (1, endIndex, out literalDataLeft)) - return ImapToken.Create (ImapTokenType.Error, builder.ToString ()); + return ImapToken.CreateError (builder); Mode = ImapStreamMode.Literal; @@ -781,7 +777,7 @@ async ValueTask ReadLiteralTokenAsync (CancellationToken cancellation inputIndex++; if (!builder.TryParse (1, endIndex, out literalDataLeft) || literalDataLeft < 0) - return ImapToken.Create (ImapTokenType.Error, builder.ToString ()); + return ImapToken.CreateError (builder); Mode = ImapStreamMode.Literal; diff --git a/MailKit/Net/Imap/ImapToken.cs b/MailKit/Net/Imap/ImapToken.cs index d61ea6e0a5..ba7d774ba3 100644 --- a/MailKit/Net/Imap/ImapToken.cs +++ b/MailKit/Net/Imap/ImapToken.cs @@ -127,6 +127,19 @@ public static ImapToken Create (ImapTokenType type, int literalLength) return new ImapToken (type, literalLength); } + static bool IsAscii (ByteArrayBuilder builder) + { + for (int i = 0; i < builder.Length; i++) { + byte c = builder[i]; + + // Disregard any non-ASCII tokens. + if (c < 32 || c >= 127) + return false; + } + + return true; + } + static bool IsCacheable (ByteArrayBuilder builder) { if (builder.Length < 2 || builder.Length > 32) @@ -140,19 +153,12 @@ static bool IsCacheable (ByteArrayBuilder builder) if (builder[0] >= (byte) 'A' && builder[0] <= (byte) 'Z' && builder[1] >= (byte) '0' && builder[1] <= (byte) '9') return false; - for (int i = 0; i < builder.Length; i++) { - byte c = (byte) builder[i]; - - // Disregard any non-ASCII "atoms". - if (c <= 32 || c >= 127) - return false; - } - - return true; + return IsAscii (builder); } public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) { + bool cachable = false; string value; if (type == ImapTokenType.Flag) { @@ -162,6 +168,8 @@ public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) if (builder.Equals (value, true)) return token; } + + cachable = IsAscii (builder); } else if (type == ImapTokenType.Atom) { if (builder.Equals ("NIL", true)) { // Look for the cached NIL token that matches this capitalization. @@ -207,9 +215,13 @@ public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) return XGMMsgId; if (builder.Equals ("X-GM-THRID", false)) return XGMThrId; + + cachable = IsCacheable (builder); + } else if (type == ImapTokenType.QString) { + cachable = IsAscii (builder); } - if (IsCacheable (builder)) + if (cachable) return Cache.AddOrGet (type, builder); value = builder.ToString (); @@ -217,9 +229,9 @@ public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) return new ImapToken (type, value); } - public static ImapToken Create (ImapTokenType type, string value) + public static ImapToken CreateError (ByteArrayBuilder builder) { - return new ImapToken (type, value); + return new ImapToken (ImapTokenType.Error, builder.ToString ()); } public override string ToString () diff --git a/MailKit/Net/Imap/ImapTokenCache.cs b/MailKit/Net/Imap/ImapTokenCache.cs index 55e45904b8..1f64946284 100644 --- a/MailKit/Net/Imap/ImapTokenCache.cs +++ b/MailKit/Net/Imap/ImapTokenCache.cs @@ -36,44 +36,49 @@ class ImapTokenCache readonly Dictionary> cache; readonly LinkedList list; + readonly ImapTokenKey lookupKey; public ImapTokenCache () { cache = new Dictionary> (); list = new LinkedList (); + lookupKey = new ImapTokenKey (); } public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) { - // Note: This ImapTokenKey .ctor does not duplicate the buffer and is meant as a temporary key - // in order to avoid memory allocations for lookup purposes. - var key = new ImapTokenKey (builder.GetBuffer (), builder.Length); - lock (cache) { - if (cache.TryGetValue (key, out var node)) { + // lookupKey is a pre-allocated key used for lookups + lookupKey.Init (type, builder.GetBuffer (), builder.Length); + + if (cache.TryGetValue (lookupKey, out var node)) { // move the node to the head of the list list.Remove (node); list.AddFirst (node); + node.Value.Count++; return node.Value.Token; } + var token = new ImapToken (type, builder.ToString ()); + if (cache.Count >= capacity) { // remove the least recently used token node = list.Last; list.RemoveLast (); cache.Remove (node.Value.Key); - } - var token = new ImapToken (type, builder.ToString ()); - - // Note: We recreate the key here so we have a permanent key. Also this allows for reuse of the token's Value string. - key = new ImapTokenKey ((string) token.Value); + // re-use the node, item and key to avoid allocations + node.Value.Key.Init (type, (string) token.Value); + node.Value.Token = token; + } else { + var key = new ImapTokenKey (type, (string) token.Value); + var item = new ImapTokenItem (key, token); - var item = new ImapTokenItem (key, token); + node = new LinkedListNode (item); + } - node = new LinkedListNode (item); - cache.Add (key, node); + cache.Add (node.Value.Key, node); list.AddFirst (node); return token; @@ -82,33 +87,49 @@ public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) class ImapTokenKey { - readonly byte[] byteArrayKey; - readonly string stringKey; - readonly int length; - readonly int hashCode; + ImapTokenType type; + byte[] byteArrayKey; + string stringKey; + int length; + int hashCode; + + public ImapTokenKey () + { + } - public ImapTokenKey (byte[] key, int len) + public ImapTokenKey (ImapTokenType type, string key) { - byteArrayKey = key; - length = len; + Init (type, key); + } + + public void Init (ImapTokenType type, byte[] key, int length) + { + this.type = type; + this.byteArrayKey = key; + this.stringKey = null; + this.length = length; var hash = new HashCode (); + hash.Add ((int) type); for (int i = 0; i < length; i++) hash.Add ((char) key[i]); - hashCode = hash.ToHashCode (); + this.hashCode = hash.ToHashCode (); } - public ImapTokenKey (string key) + public void Init (ImapTokenType type, string key) { - stringKey = key; - length = key.Length; + this.type = type; + this.byteArrayKey = null; + this.stringKey = key; + this.length = key.Length; var hash = new HashCode (); + hash.Add ((int) type); for (int i = 0; i < length; i++) hash.Add (key[i]); - hashCode = hash.ToHashCode (); + this.hashCode = hash.ToHashCode (); } static bool Equals (string str, byte[] bytes) @@ -123,7 +144,7 @@ static bool Equals (string str, byte[] bytes) static bool Equals (ImapTokenKey self, ImapTokenKey other) { - if (self.length != other.length) + if (self.type != other.type || self.length != other.length) return false; if (self.stringKey != null) { @@ -153,17 +174,29 @@ public override int GetHashCode () { return hashCode; } + + public override string ToString () + { + return string.Format ("{0}: {1}", type, stringKey ?? Encoding.UTF8.GetString (byteArrayKey, 0, length)); + } } class ImapTokenItem { - public readonly ImapTokenKey Key; - public readonly ImapToken Token; + public ImapTokenKey Key; + public ImapToken Token; + public int Count; public ImapTokenItem (ImapTokenKey key, ImapToken token) { Key = key; Token = token; + Count = 1; + } + + public override string ToString () + { + return $"{Count}"; } } } From 3f1a5c74b16be5eb7021fb66873fdc5f05f3c7c8 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Sun, 27 Aug 2023 09:58:00 -0400 Subject: [PATCH 3/8] Updated ImapStream, ImapEngine, and ImapCommand to reuse ByteArrayBuilders This drastically reduces the number of allocations made when tokenizing IMAP responses. ImapCommand's usage was not really a major issue, but since ImapEngine.ReadLine/Async() needed a reusable ByteArrayBuilder anyway, might as well share that with ImapCommand. --- MailKit/Net/Imap/ImapCommand.cs | 163 ++++++++++++++++---------------- MailKit/Net/Imap/ImapEngine.cs | 46 +++++---- MailKit/Net/Imap/ImapStream.cs | 159 ++++++++++++++++--------------- 3 files changed, 190 insertions(+), 178 deletions(-) diff --git a/MailKit/Net/Imap/ImapCommand.cs b/MailKit/Net/Imap/ImapCommand.cs index 20f4d2b477..eeea8d41ef 100644 --- a/MailKit/Net/Imap/ImapCommand.cs +++ b/MailKit/Net/Imap/ImapCommand.cs @@ -160,92 +160,91 @@ public ImapCommand (ImapEngine engine, CancellationToken cancellationToken, Imap Engine = engine; Folder = folder; - using (var builder = new ByteArrayBuilder (1024)) { - byte[] buf, utf8 = new byte[8]; - int argc = 0; - string str; - - for (int i = 0; i < format.Length; i++) { - if (format[i] == '%') { - switch (format[++i]) { - case '%': // a literal % - builder.Append ((byte) '%'); - break; - case 'd': // an integer - str = ((int) args[argc++]).ToString (CultureInfo.InvariantCulture); - buf = Encoding.ASCII.GetBytes (str); - builder.Append (buf, 0, buf.Length); - break; - case 'u': // an unsigned integer - str = ((uint) args[argc++]).ToString (CultureInfo.InvariantCulture); - buf = Encoding.ASCII.GetBytes (str); - builder.Append (buf, 0, buf.Length); - break; - case 's': - str = (string) args[argc++]; - buf = Encoding.ASCII.GetBytes (str); - builder.Append (buf, 0, buf.Length); - break; - case 'F': // an ImapFolder - var utf7 = ((ImapFolder) args[argc++]).EncodedName; - AppendString (options, true, builder, utf7); - break; - case 'L': // a MimeMessage or a byte[] - var arg = args[argc++]; - ImapLiteral literal; - byte[] prefix; - - if (arg is MimeMessage message) { - prefix = options.International ? UTF8LiteralTokenPrefix : LiteralTokenPrefix; - literal = new ImapLiteral (options, message, UpdateProgress); - } else { - literal = new ImapLiteral (options, (byte[]) arg); - prefix = LiteralTokenPrefix; - } - - var length = literal.Length; - bool wait = true; - - builder.Append (prefix, 0, prefix.Length); - buf = Encoding.ASCII.GetBytes (length.ToString (CultureInfo.InvariantCulture)); - builder.Append (buf, 0, buf.Length); - - if (CanUseNonSynchronizedLiteral (Engine, length)) { - builder.Append ((byte) '+'); - wait = false; - } - - builder.Append (LiteralTokenSuffix, 0, LiteralTokenSuffix.Length); - - totalSize += length; - - parts.Add (new ImapCommandPart (builder.ToArray (), literal, wait)); - builder.Clear (); - - if (prefix == UTF8LiteralTokenPrefix) - builder.Append ((byte) ')'); - break; - case 'S': // a string which may need to be quoted or made into a literal - AppendString (options, true, builder, (string) args[argc++]); - break; - case 'Q': // similar to %S but string must be quoted at a minimum - AppendString (options, false, builder, (string) args[argc++]); - break; - default: - throw new FormatException ($"The %{format[i]} format specifier is not supported."); + var builder = engine.GetCommandBuilder (); + byte[] buf, utf8 = new byte[8]; + int argc = 0; + string str; + + for (int i = 0; i < format.Length; i++) { + if (format[i] == '%') { + switch (format[++i]) { + case '%': // a literal % + builder.Append ((byte) '%'); + break; + case 'd': // an integer + str = ((int) args[argc++]).ToString (CultureInfo.InvariantCulture); + buf = Encoding.ASCII.GetBytes (str); + builder.Append (buf, 0, buf.Length); + break; + case 'u': // an unsigned integer + str = ((uint) args[argc++]).ToString (CultureInfo.InvariantCulture); + buf = Encoding.ASCII.GetBytes (str); + builder.Append (buf, 0, buf.Length); + break; + case 's': + str = (string) args[argc++]; + buf = Encoding.ASCII.GetBytes (str); + builder.Append (buf, 0, buf.Length); + break; + case 'F': // an ImapFolder + var utf7 = ((ImapFolder) args[argc++]).EncodedName; + AppendString (options, true, builder, utf7); + break; + case 'L': // a MimeMessage or a byte[] + var arg = args[argc++]; + ImapLiteral literal; + byte[] prefix; + + if (arg is MimeMessage message) { + prefix = options.International ? UTF8LiteralTokenPrefix : LiteralTokenPrefix; + literal = new ImapLiteral (options, message, UpdateProgress); + } else { + literal = new ImapLiteral (options, (byte[]) arg); + prefix = LiteralTokenPrefix; } - } else if (format[i] < 128) { - builder.Append ((byte) format[i]); - } else { - int nchars = char.IsSurrogate (format[i]) ? 2 : 1; - int nbytes = Encoding.UTF8.GetBytes (format, i, nchars, utf8, 0); - builder.Append (utf8, 0, nbytes); - i += nchars - 1; + + var length = literal.Length; + bool wait = true; + + builder.Append (prefix, 0, prefix.Length); + buf = Encoding.ASCII.GetBytes (length.ToString (CultureInfo.InvariantCulture)); + builder.Append (buf, 0, buf.Length); + + if (CanUseNonSynchronizedLiteral (Engine, length)) { + builder.Append ((byte) '+'); + wait = false; + } + + builder.Append (LiteralTokenSuffix, 0, LiteralTokenSuffix.Length); + + totalSize += length; + + parts.Add (new ImapCommandPart (builder.ToArray (), literal, wait)); + builder.Clear (); + + if (prefix == UTF8LiteralTokenPrefix) + builder.Append ((byte) ')'); + break; + case 'S': // a string which may need to be quoted or made into a literal + AppendString (options, true, builder, (string) args[argc++]); + break; + case 'Q': // similar to %S but string must be quoted at a minimum + AppendString (options, false, builder, (string) args[argc++]); + break; + default: + throw new FormatException ($"The %{format[i]} format specifier is not supported."); } + } else if (format[i] < 128) { + builder.Append ((byte) format[i]); + } else { + int nchars = char.IsSurrogate (format[i]) ? 2 : 1; + int nbytes = Encoding.UTF8.GetBytes (format, i, nchars, utf8, 0); + builder.Append (utf8, 0, nbytes); + i += nchars - 1; } - - parts.Add (new ImapCommandPart (builder.ToArray (), null)); } + + parts.Add (new ImapCommandPart (builder.ToArray (), null)); } /// diff --git a/MailKit/Net/Imap/ImapEngine.cs b/MailKit/Net/Imap/ImapEngine.cs index d741bc0de2..c65caa5bf9 100644 --- a/MailKit/Net/Imap/ImapEngine.cs +++ b/MailKit/Net/Imap/ImapEngine.cs @@ -155,6 +155,7 @@ class ImapEngine : IDisposable readonly CreateImapFolderDelegate createImapFolder; readonly ImapFolderNameComparer cacheComparer; internal ImapQuirksMode QuirksMode; + readonly ByteArrayBuilder builder; readonly List queue; long clientConnectedTimestamp; internal char TagPrefix; @@ -170,6 +171,8 @@ public ImapEngine (CreateImapFolderDelegate createImapFolderDelegate) metrics = Telemetry.ImapClient.Metrics; #endif + // The builder is used as a buffer for line-reading as well as ImapCommand building, so 1K is probably realistic. + builder = new ByteArrayBuilder (1024); cacheComparer = new ImapFolderNameComparer ('.'); FolderCache = new Dictionary (cacheComparer); @@ -540,6 +543,13 @@ public ImapFolder Trash { #endregion + internal ByteArrayBuilder GetCommandBuilder () + { + builder.Clear (); + + return builder; + } + internal ImapFolder CreateImapFolder (string encodedName, FolderAttributes attributes, char delim) { var args = new ImapFolderConstructorArgs (this, encodedName, attributes, delim); @@ -888,18 +898,18 @@ public void Disconnect (Exception ex) /// public string ReadLine (CancellationToken cancellationToken) { - using (var builder = new ByteArrayBuilder (64)) { - bool complete; + builder.Clear (); - do { - complete = Stream.ReadLine (builder, cancellationToken); - } while (!complete); + bool complete; + + do { + complete = Stream.ReadLine (builder, cancellationToken); + } while (!complete); - // FIXME: All callers expect CRLF to be trimmed, but many also want all trailing whitespace trimmed. - builder.TrimNewLine (); + // FIXME: All callers expect CRLF to be trimmed, but many also want all trailing whitespace trimmed. + builder.TrimNewLine (); - return builder.ToString (); - } + return builder.ToString (); } /// @@ -921,18 +931,18 @@ public string ReadLine (CancellationToken cancellationToken) /// public async Task ReadLineAsync (CancellationToken cancellationToken) { - using (var builder = new ByteArrayBuilder (64)) { - bool complete; + builder.Clear (); - do { - complete = await Stream.ReadLineAsync (builder, cancellationToken).ConfigureAwait (false); - } while (!complete); + bool complete; - // FIXME: All callers expect CRLF to be trimmed, but many also want all trailing whitespace trimmed. - builder.TrimNewLine (); + do { + complete = await Stream.ReadLineAsync (builder, cancellationToken).ConfigureAwait (false); + } while (!complete); - return builder.ToString (); - } + // FIXME: All callers expect CRLF to be trimmed, but many also want all trailing whitespace trimmed. + builder.TrimNewLine (); + + return builder.ToString (); } /// diff --git a/MailKit/Net/Imap/ImapStream.cs b/MailKit/Net/Imap/ImapStream.cs index b9464aff5f..daf5325295 100644 --- a/MailKit/Net/Imap/ImapStream.cs +++ b/MailKit/Net/Imap/ImapStream.cs @@ -76,6 +76,7 @@ class ImapStream : Stream, ICancellableStream readonly byte[] output = new byte[BlockSize]; int outputIndex; + readonly ByteArrayBuilder tokenBuilder; readonly Stack tokens; readonly IProtocolLogger logger; int literalDataLeft; @@ -91,6 +92,7 @@ class ImapStream : Stream, ICancellableStream /// The protocol logger. public ImapStream (Stream source, IProtocolLogger protocolLogger) { + tokenBuilder = new ByteArrayBuilder (64); tokens = new Stack (); logger = protocolLogger; IsConnected = true; @@ -583,12 +585,12 @@ ImapToken ReadQuotedStringToken (CancellationToken cancellationToken) // skip over the opening '"' inputIndex++; - using (var builder = new ByteArrayBuilder (64)) { - while (!TryReadQuotedString (builder, ref escaped)) - ReadAhead (2, cancellationToken); + tokenBuilder.Clear (); - return ImapToken.Create (ImapTokenType.QString, builder); - } + while (!TryReadQuotedString (tokenBuilder, ref escaped)) + ReadAhead (2, cancellationToken); + + return ImapToken.Create (ImapTokenType.QString, tokenBuilder); } async ValueTask ReadQuotedStringTokenAsync (CancellationToken cancellationToken) @@ -598,12 +600,12 @@ async ValueTask ReadQuotedStringTokenAsync (CancellationToken cancell // skip over the opening '"' inputIndex++; - using (var builder = new ByteArrayBuilder (64)) { - while (!TryReadQuotedString (builder, ref escaped)) - await ReadAheadAsync (2, cancellationToken).ConfigureAwait (false); + tokenBuilder.Clear (); - return ImapToken.Create (ImapTokenType.QString, builder); - } + while (!TryReadQuotedString (tokenBuilder, ref escaped)) + await ReadAheadAsync (2, cancellationToken).ConfigureAwait (false); + + return ImapToken.Create (ImapTokenType.QString, tokenBuilder); } bool TryReadAtomString (ImapTokenType type, ByteArrayBuilder builder, string specials) @@ -623,28 +625,28 @@ bool TryReadAtomString (ImapTokenType type, ByteArrayBuilder builder, string spe ImapToken ReadAtomString (ImapTokenType type, string specials, CancellationToken cancellationToken) { - using (var builder = new ByteArrayBuilder (32)) { - if (type == ImapTokenType.Flag) - builder.Append ((byte) '\\'); + tokenBuilder.Clear (); - while (!TryReadAtomString (type, builder, specials)) - ReadAhead (1, cancellationToken); + if (type == ImapTokenType.Flag) + tokenBuilder.Append ((byte) '\\'); - return ImapToken.Create (type, builder); - } + while (!TryReadAtomString (type, tokenBuilder, specials)) + ReadAhead (1, cancellationToken); + + return ImapToken.Create (type, tokenBuilder); } async ValueTask ReadAtomStringAsync (ImapTokenType type, string specials, CancellationToken cancellationToken) { - using (var builder = new ByteArrayBuilder (32)) { - if (type == ImapTokenType.Flag) - builder.Append ((byte) '\\'); + tokenBuilder.Clear (); - while (!TryReadAtomString (type, builder, specials)) - await ReadAheadAsync (1, cancellationToken).ConfigureAwait (false); + if (type == ImapTokenType.Flag) + tokenBuilder.Append ((byte) '\\'); - return ImapToken.Create (type, builder); - } + while (!TryReadAtomString (type, tokenBuilder, specials)) + await ReadAheadAsync (1, cancellationToken).ConfigureAwait (false); + + return ImapToken.Create (type, tokenBuilder); } ImapToken ReadAtomToken (string specials, CancellationToken cancellationToken) @@ -703,86 +705,86 @@ bool TrySkipUntilNewLine () ImapToken ReadLiteralToken (CancellationToken cancellationToken) { - using (var builder = new ByteArrayBuilder (16)) { - // skip over the '{' - builder.Append (input[inputIndex++]); - - while (!TryReadLiteralTokenValue (builder)) - ReadAhead (1, cancellationToken); + tokenBuilder.Clear (); - int endIndex = builder.Length; + // skip over the '{' + tokenBuilder.Append (input[inputIndex++]); - if (input[inputIndex] == (byte) '+') - builder.Append (input[inputIndex++]); + while (!TryReadLiteralTokenValue (tokenBuilder)) + ReadAhead (1, cancellationToken); - // technically, we need "}\r\n", but in order to be more lenient, we'll accept "}\n" - ReadAhead (2, cancellationToken); + int endIndex = tokenBuilder.Length; - if (input[inputIndex] != (byte) '}') { - // PROTOCOL ERROR... but maybe we can work around it? - while (!TryReadUntilCloseCurlyBrace (builder)) - ReadAhead (1, cancellationToken); - } + if (input[inputIndex] == (byte) '+') + tokenBuilder.Append (input[inputIndex++]); - // skip over the '}' - builder.Append (input[inputIndex++]); + // technically, we need "}\r\n", but in order to be more lenient, we'll accept "}\n" + ReadAhead (2, cancellationToken); - // read until we get a new line... - while (!TrySkipUntilNewLine ()) + if (input[inputIndex] != (byte) '}') { + // PROTOCOL ERROR... but maybe we can work around it? + while (!TryReadUntilCloseCurlyBrace (tokenBuilder)) ReadAhead (1, cancellationToken); + } - // skip over the '\n' - inputIndex++; + // skip over the '}' + tokenBuilder.Append (input[inputIndex++]); + + // read until we get a new line... + while (!TrySkipUntilNewLine ()) + ReadAhead (1, cancellationToken); - if (!builder.TryParse (1, endIndex, out literalDataLeft)) - return ImapToken.CreateError (builder); + // skip over the '\n' + inputIndex++; - Mode = ImapStreamMode.Literal; + if (!tokenBuilder.TryParse (1, endIndex, out literalDataLeft)) + return ImapToken.CreateError (tokenBuilder); - return ImapToken.Create (ImapTokenType.Literal, literalDataLeft); - } + Mode = ImapStreamMode.Literal; + + return ImapToken.Create (ImapTokenType.Literal, literalDataLeft); } async ValueTask ReadLiteralTokenAsync (CancellationToken cancellationToken) { - using (var builder = new ByteArrayBuilder (16)) { - // skip over the '{' - builder.Append (input[inputIndex++]); + tokenBuilder.Clear (); - while (!TryReadLiteralTokenValue (builder)) - await ReadAheadAsync (1, cancellationToken).ConfigureAwait (false); + // skip over the '{' + tokenBuilder.Append (input[inputIndex++]); - int endIndex = builder.Length; + while (!TryReadLiteralTokenValue (tokenBuilder)) + await ReadAheadAsync (1, cancellationToken).ConfigureAwait (false); - if (input[inputIndex] == (byte) '+') - builder.Append (input[inputIndex++]); + int endIndex = tokenBuilder.Length; - // technically, we need "}\r\n", but in order to be more lenient, we'll accept "}\n" - await ReadAheadAsync (2, cancellationToken).ConfigureAwait (false); + if (input[inputIndex] == (byte) '+') + tokenBuilder.Append (input[inputIndex++]); - if (input[inputIndex] != (byte) '}') { - // PROTOCOL ERROR... but maybe we can work around it? - while (!TryReadUntilCloseCurlyBrace (builder)) - await ReadAheadAsync (1, cancellationToken).ConfigureAwait (false); - } - - // skip over the '}' - builder.Append (input[inputIndex++]); + // technically, we need "}\r\n", but in order to be more lenient, we'll accept "}\n" + await ReadAheadAsync (2, cancellationToken).ConfigureAwait (false); - // read until we get a new line... - while (!TrySkipUntilNewLine ()) + if (input[inputIndex] != (byte) '}') { + // PROTOCOL ERROR... but maybe we can work around it? + while (!TryReadUntilCloseCurlyBrace (tokenBuilder)) await ReadAheadAsync (1, cancellationToken).ConfigureAwait (false); + } - // skip over the '\n' - inputIndex++; + // skip over the '}' + tokenBuilder.Append (input[inputIndex++]); - if (!builder.TryParse (1, endIndex, out literalDataLeft) || literalDataLeft < 0) - return ImapToken.CreateError (builder); + // read until we get a new line... + while (!TrySkipUntilNewLine ()) + await ReadAheadAsync (1, cancellationToken).ConfigureAwait (false); - Mode = ImapStreamMode.Literal; + // skip over the '\n' + inputIndex++; - return ImapToken.Create (ImapTokenType.Literal, literalDataLeft); - } + if (!tokenBuilder.TryParse (1, endIndex, out literalDataLeft) || literalDataLeft < 0) + return ImapToken.CreateError (tokenBuilder); + + Mode = ImapStreamMode.Literal; + + return ImapToken.Create (ImapTokenType.Literal, literalDataLeft); } bool TrySkipWhiteSpace () @@ -1390,6 +1392,7 @@ protected override void Dispose (bool disposing) { if (disposing && !disposed) { IsConnected = false; + tokenBuilder.Dispose (); Stream.Dispose (); } From 90139ed80561a0df89a15a7a16cdf424f440a537 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Sun, 27 Aug 2023 10:01:56 -0400 Subject: [PATCH 4/8] Modified ByteArrayBuilder.Clear() to downsize its buffer if it has grown too large One potential problem with reusing ByteArrayBuilders is that, because they can grow for some abnormally long tokens/lines/commands/etc, those oversized buffers will remain referenced by the ImapStream/ImapEngine until they are disposed which could be the life of the program. If we oportunistically scale back the size of the buffers when they are Clear()'d, then we can periodically reduce memory usage and allow those larger buffers to be used elsewhere. --- MailKit/ByteArrayBuilder.cs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/MailKit/ByteArrayBuilder.cs b/MailKit/ByteArrayBuilder.cs index 44f78a73c9..3a4ed6d575 100644 --- a/MailKit/ByteArrayBuilder.cs +++ b/MailKit/ByteArrayBuilder.cs @@ -27,17 +27,20 @@ using System; using System.Text; using System.Buffers; +using System.Runtime.CompilerServices; namespace MailKit { class ByteArrayBuilder : IDisposable { + readonly int initialCapacity; byte[] buffer; int length; - public ByteArrayBuilder (int initialCapacity) + public ByteArrayBuilder (int capacity) { - buffer = ArrayPool.Shared.Rent (initialCapacity); + buffer = ArrayPool.Shared.Rent (capacity); + initialCapacity = capacity; length = 0; } @@ -54,6 +57,7 @@ public byte[] GetBuffer () return buffer; } + [MethodImpl (MethodImplOptions.AggressiveInlining)] void EnsureCapacity (int capacity) { if (capacity > buffer.Length) { @@ -79,6 +83,11 @@ public void Append (byte[] text, int startIndex, int count) public void Clear () { + if (buffer.Length > initialCapacity * 4) { + ArrayPool.Shared.Return (buffer); + buffer = ArrayPool.Shared.Rent (initialCapacity); + } + length = 0; } From a86e4fa8fcd3aaf4ccb18648f734326824eea052 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Mon, 4 Sep 2023 10:56:00 -0400 Subject: [PATCH 5/8] Updated ImapTokenCache to be able to handle non-ASCII tokens --- MailKit/Net/Imap/ImapToken.cs | 19 +++----------- MailKit/Net/Imap/ImapTokenCache.cs | 42 +++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/MailKit/Net/Imap/ImapToken.cs b/MailKit/Net/Imap/ImapToken.cs index ba7d774ba3..b169e6c91d 100644 --- a/MailKit/Net/Imap/ImapToken.cs +++ b/MailKit/Net/Imap/ImapToken.cs @@ -127,19 +127,6 @@ public static ImapToken Create (ImapTokenType type, int literalLength) return new ImapToken (type, literalLength); } - static bool IsAscii (ByteArrayBuilder builder) - { - for (int i = 0; i < builder.Length; i++) { - byte c = builder[i]; - - // Disregard any non-ASCII tokens. - if (c < 32 || c >= 127) - return false; - } - - return true; - } - static bool IsCacheable (ByteArrayBuilder builder) { if (builder.Length < 2 || builder.Length > 32) @@ -153,7 +140,7 @@ static bool IsCacheable (ByteArrayBuilder builder) if (builder[0] >= (byte) 'A' && builder[0] <= (byte) 'Z' && builder[1] >= (byte) '0' && builder[1] <= (byte) '9') return false; - return IsAscii (builder); + return true; } public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) @@ -169,7 +156,7 @@ public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) return token; } - cachable = IsAscii (builder); + cachable = true; } else if (type == ImapTokenType.Atom) { if (builder.Equals ("NIL", true)) { // Look for the cached NIL token that matches this capitalization. @@ -218,7 +205,7 @@ public static ImapToken Create (ImapTokenType type, ByteArrayBuilder builder) cachable = IsCacheable (builder); } else if (type == ImapTokenType.QString) { - cachable = IsAscii (builder); + cachable = true; } if (cachable) diff --git a/MailKit/Net/Imap/ImapTokenCache.cs b/MailKit/Net/Imap/ImapTokenCache.cs index 1f64946284..1150650b31 100644 --- a/MailKit/Net/Imap/ImapTokenCache.cs +++ b/MailKit/Net/Imap/ImapTokenCache.cs @@ -37,19 +37,29 @@ class ImapTokenCache readonly Dictionary> cache; readonly LinkedList list; readonly ImapTokenKey lookupKey; + readonly Decoder[] decoders; + readonly char[] chars; public ImapTokenCache () { cache = new Dictionary> (); list = new LinkedList (); lookupKey = new ImapTokenKey (); + + // Start with the assumption that token values will be valid UTF-8 and then fall back to iso-8859-1. + decoders = new Decoder[2] { + TextEncodings.UTF8.GetDecoder (), + TextEncodings.Latin1.GetDecoder () + }; + + chars = new char[128]; } public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) { lock (cache) { // lookupKey is a pre-allocated key used for lookups - lookupKey.Init (type, builder.GetBuffer (), builder.Length); + lookupKey.Init (decoders, chars, type, builder.GetBuffer (), builder.Length); if (cache.TryGetValue (lookupKey, out var node)) { // move the node to the head of the list @@ -102,7 +112,7 @@ public ImapTokenKey (ImapTokenType type, string key) Init (type, key); } - public void Init (ImapTokenType type, byte[] key, int length) + public void Init (Decoder[] decoders, char[] chars, ImapTokenType type, byte[] key, int length) { this.type = type; this.byteArrayKey = key; @@ -111,8 +121,32 @@ public void Init (ImapTokenType type, byte[] key, int length) var hash = new HashCode (); hash.Add ((int) type); - for (int i = 0; i < length; i++) - hash.Add ((char) key[i]); + + foreach (var decoder in decoders) { + bool completed; + int index = 0; + + do { + try { + decoder.Convert (key, index, length - index, chars, 0, chars.Length, true, out var bytesUsed, out var charsUsed, out completed); + index += bytesUsed; + + for (int i = 0; i < charsUsed; i++) + hash.Add (chars[i]); + } catch (DecoderFallbackException) { + // Restart the hash... + hash = new HashCode (); + hash.Add ((int) type); + completed = false; + break; + } + } while (!completed); + + decoder.Reset (); + + if (completed) + break; + } this.hashCode = hash.ToHashCode (); } From 6f9f873ce15e3f26e16e42719e64bec56d8f2348 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Mon, 4 Sep 2023 12:17:33 -0400 Subject: [PATCH 6/8] Optimized ImapTokenCache.GetOrAdd() by fast-pathing charset conversion --- MailKit/Net/Imap/ImapTokenCache.cs | 35 +++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/MailKit/Net/Imap/ImapTokenCache.cs b/MailKit/Net/Imap/ImapTokenCache.cs index 1150650b31..b6da1c6457 100644 --- a/MailKit/Net/Imap/ImapTokenCache.cs +++ b/MailKit/Net/Imap/ImapTokenCache.cs @@ -27,6 +27,8 @@ using System; using System.Text; using System.Collections.Generic; +using System.Buffers; +using System.Diagnostics; namespace MailKit.Net.Imap { @@ -59,7 +61,7 @@ public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) { lock (cache) { // lookupKey is a pre-allocated key used for lookups - lookupKey.Init (decoders, chars, type, builder.GetBuffer (), builder.Length); + lookupKey.Init (decoders, chars, type, builder.GetBuffer (), builder.Length, out var decoder, out int charsNeeded); if (cache.TryGetValue (lookupKey, out var node)) { // move the node to the head of the list @@ -70,7 +72,27 @@ public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) return node.Value.Token; } - var token = new ImapToken (type, builder.ToString ()); + string value; + + if (charsNeeded <= chars.Length) { + // If the number of needed chars is <= the length of our temp buffer, then it should all be contained. + value = new string (chars, 0, charsNeeded); + } else { + var buffer = ArrayPool.Shared.Rent (charsNeeded); + try { + // Note: This conversion should go flawlessly, so we'll just Debug.Assert() our expectations. + decoder.Convert (builder.GetBuffer (), 0, builder.Length, buffer, 0, buffer.Length, true, out var bytesUsed, out var charsUsed, out var completed); + Debug.Assert (bytesUsed == builder.Length); + Debug.Assert (charsUsed == charsNeeded); + Debug.Assert (completed); + value = new string (buffer, 0, charsUsed); + } finally { + ArrayPool.Shared.Return (buffer); + decoder.Reset (); + } + } + + var token = new ImapToken (type, value); if (cache.Count >= capacity) { // remove the least recently used token @@ -112,7 +134,7 @@ public ImapTokenKey (ImapTokenType type, string key) Init (type, key); } - public void Init (Decoder[] decoders, char[] chars, ImapTokenType type, byte[] key, int length) + public void Init (Decoder[] decoders, char[] chars, ImapTokenType type, byte[] key, int length, out Decoder correctDecoder, out int charsNeeded) { this.type = type; this.byteArrayKey = key; @@ -122,13 +144,19 @@ public void Init (Decoder[] decoders, char[] chars, ImapTokenType type, byte[] k var hash = new HashCode (); hash.Add ((int) type); + correctDecoder = null; + charsNeeded = 0; + foreach (var decoder in decoders) { bool completed; int index = 0; + correctDecoder = decoder; + do { try { decoder.Convert (key, index, length - index, chars, 0, chars.Length, true, out var bytesUsed, out var charsUsed, out completed); + charsNeeded += charsUsed; index += bytesUsed; for (int i = 0; i < charsUsed; i++) @@ -138,6 +166,7 @@ public void Init (Decoder[] decoders, char[] chars, ImapTokenType type, byte[] k hash = new HashCode (); hash.Add ((int) type); completed = false; + charsNeeded = 0; break; } } while (!completed); From e0021724a9b8038f6e3d01d20d2aab8bf35b6fc2 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Mon, 4 Sep 2023 12:29:14 -0400 Subject: [PATCH 7/8] Removed unnecessary cache locking --- MailKit/Net/Imap/ImapTokenCache.cs | 90 +++++++++++++++--------------- 1 file changed, 44 insertions(+), 46 deletions(-) diff --git a/MailKit/Net/Imap/ImapTokenCache.cs b/MailKit/Net/Imap/ImapTokenCache.cs index b6da1c6457..29b3a4c2b6 100644 --- a/MailKit/Net/Imap/ImapTokenCache.cs +++ b/MailKit/Net/Imap/ImapTokenCache.cs @@ -59,62 +59,60 @@ public ImapTokenCache () public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) { - lock (cache) { - // lookupKey is a pre-allocated key used for lookups - lookupKey.Init (decoders, chars, type, builder.GetBuffer (), builder.Length, out var decoder, out int charsNeeded); + // lookupKey is a pre-allocated key used for lookups + lookupKey.Init (decoders, chars, type, builder.GetBuffer (), builder.Length, out var decoder, out int charsNeeded); - if (cache.TryGetValue (lookupKey, out var node)) { - // move the node to the head of the list - list.Remove (node); - list.AddFirst (node); - node.Value.Count++; + if (cache.TryGetValue (lookupKey, out var node)) { + // move the node to the head of the list + list.Remove (node); + list.AddFirst (node); + node.Value.Count++; - return node.Value.Token; - } + return node.Value.Token; + } - string value; - - if (charsNeeded <= chars.Length) { - // If the number of needed chars is <= the length of our temp buffer, then it should all be contained. - value = new string (chars, 0, charsNeeded); - } else { - var buffer = ArrayPool.Shared.Rent (charsNeeded); - try { - // Note: This conversion should go flawlessly, so we'll just Debug.Assert() our expectations. - decoder.Convert (builder.GetBuffer (), 0, builder.Length, buffer, 0, buffer.Length, true, out var bytesUsed, out var charsUsed, out var completed); - Debug.Assert (bytesUsed == builder.Length); - Debug.Assert (charsUsed == charsNeeded); - Debug.Assert (completed); - value = new string (buffer, 0, charsUsed); - } finally { - ArrayPool.Shared.Return (buffer); - decoder.Reset (); - } + string value; + + if (charsNeeded <= chars.Length) { + // If the number of needed chars is <= the length of our temp buffer, then it should all be contained. + value = new string (chars, 0, charsNeeded); + } else { + var buffer = ArrayPool.Shared.Rent (charsNeeded); + try { + // Note: This conversion should go flawlessly, so we'll just Debug.Assert() our expectations. + decoder.Convert (builder.GetBuffer (), 0, builder.Length, buffer, 0, buffer.Length, true, out var bytesUsed, out var charsUsed, out var completed); + Debug.Assert (bytesUsed == builder.Length); + Debug.Assert (charsUsed == charsNeeded); + Debug.Assert (completed); + value = new string (buffer, 0, charsUsed); + } finally { + ArrayPool.Shared.Return (buffer); + decoder.Reset (); } + } - var token = new ImapToken (type, value); + var token = new ImapToken (type, value); - if (cache.Count >= capacity) { - // remove the least recently used token - node = list.Last; - list.RemoveLast (); - cache.Remove (node.Value.Key); + if (cache.Count >= capacity) { + // remove the least recently used token + node = list.Last; + list.RemoveLast (); + cache.Remove (node.Value.Key); - // re-use the node, item and key to avoid allocations - node.Value.Key.Init (type, (string) token.Value); - node.Value.Token = token; - } else { - var key = new ImapTokenKey (type, (string) token.Value); - var item = new ImapTokenItem (key, token); + // re-use the node, item and key to avoid allocations + node.Value.Key.Init (type, (string) token.Value); + node.Value.Token = token; + } else { + var key = new ImapTokenKey (type, (string) token.Value); + var item = new ImapTokenItem (key, token); - node = new LinkedListNode (item); - } + node = new LinkedListNode (item); + } - cache.Add (node.Value.Key, node); - list.AddFirst (node); + cache.Add (node.Value.Key, node); + list.AddFirst (node); - return token; - } + return token; } class ImapTokenKey From ce66da966707e89bede29a82b53fe45b81ca3030 Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Sat, 2 Mar 2024 12:15:29 -0500 Subject: [PATCH 8/8] Fixed ImapTokenCache to properly equate non-ASCII tokens --- MailKit/Net/Imap/ImapTokenCache.cs | 111 ++++++++++++----------------- 1 file changed, 47 insertions(+), 64 deletions(-) diff --git a/MailKit/Net/Imap/ImapTokenCache.cs b/MailKit/Net/Imap/ImapTokenCache.cs index 29b3a4c2b6..6e03d125fe 100644 --- a/MailKit/Net/Imap/ImapTokenCache.cs +++ b/MailKit/Net/Imap/ImapTokenCache.cs @@ -26,9 +26,9 @@ using System; using System.Text; -using System.Collections.Generic; using System.Buffers; using System.Diagnostics; +using System.Collections.Generic; namespace MailKit.Net.Imap { @@ -40,7 +40,7 @@ class ImapTokenCache readonly LinkedList list; readonly ImapTokenKey lookupKey; readonly Decoder[] decoders; - readonly char[] chars; + char[] charBuffer; public ImapTokenCache () { @@ -54,13 +54,13 @@ public ImapTokenCache () TextEncodings.Latin1.GetDecoder () }; - chars = new char[128]; + charBuffer = ArrayPool.Shared.Rent (256); } public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) { // lookupKey is a pre-allocated key used for lookups - lookupKey.Init (decoders, chars, type, builder.GetBuffer (), builder.Length, out var decoder, out int charsNeeded); + lookupKey.Init (decoders, ref charBuffer, type, builder.GetBuffer (), builder.Length, out int charsNeeded); if (cache.TryGetValue (lookupKey, out var node)) { // move the node to the head of the list @@ -71,26 +71,7 @@ public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) return node.Value.Token; } - string value; - - if (charsNeeded <= chars.Length) { - // If the number of needed chars is <= the length of our temp buffer, then it should all be contained. - value = new string (chars, 0, charsNeeded); - } else { - var buffer = ArrayPool.Shared.Rent (charsNeeded); - try { - // Note: This conversion should go flawlessly, so we'll just Debug.Assert() our expectations. - decoder.Convert (builder.GetBuffer (), 0, builder.Length, buffer, 0, buffer.Length, true, out var bytesUsed, out var charsUsed, out var completed); - Debug.Assert (bytesUsed == builder.Length); - Debug.Assert (charsUsed == charsNeeded); - Debug.Assert (completed); - value = new string (buffer, 0, charsUsed); - } finally { - ArrayPool.Shared.Return (buffer); - decoder.Reset (); - } - } - + var value = new string (charBuffer, 0, charsNeeded); var token = new ImapToken (type, value); if (cache.Count >= capacity) { @@ -100,10 +81,10 @@ public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) cache.Remove (node.Value.Key); // re-use the node, item and key to avoid allocations - node.Value.Key.Init (type, (string) token.Value); + node.Value.Key.Init (type, value, lookupKey); node.Value.Token = token; } else { - var key = new ImapTokenKey (type, (string) token.Value); + var key = new ImapTokenKey (type, value, lookupKey); var item = new ImapTokenItem (key, token); node = new LinkedListNode (item); @@ -118,47 +99,50 @@ public ImapToken AddOrGet (ImapTokenType type, ByteArrayBuilder builder) class ImapTokenKey { ImapTokenType type; - byte[] byteArrayKey; + char[] charBuffer; string stringKey; - int length; int hashCode; + int length; public ImapTokenKey () { } - public ImapTokenKey (ImapTokenType type, string key) + public ImapTokenKey (ImapTokenType type, string value, ImapTokenKey key) { - Init (type, key); + Init (type, value, key); } - public void Init (Decoder[] decoders, char[] chars, ImapTokenType type, byte[] key, int length, out Decoder correctDecoder, out int charsNeeded) + public void Init (Decoder[] decoders, ref char[] charBuffer, ImapTokenType type, byte[] key, int length, out int charsNeeded) { this.type = type; - this.byteArrayKey = key; - this.stringKey = null; - this.length = length; var hash = new HashCode (); hash.Add ((int) type); - correctDecoder = null; charsNeeded = 0; + // Make sure the char buffer is at least as large as the key. + if (charBuffer.Length < length) { + ArrayPool.Shared.Return (charBuffer); + charBuffer = ArrayPool.Shared.Rent (length); + } + foreach (var decoder in decoders) { bool completed; int index = 0; - correctDecoder = decoder; - do { try { - decoder.Convert (key, index, length - index, chars, 0, chars.Length, true, out var bytesUsed, out var charsUsed, out completed); + decoder.Convert (key, index, length - index, charBuffer, charsNeeded, charBuffer.Length - charsNeeded, true, out var bytesUsed, out var charsUsed, out completed); charsNeeded += charsUsed; index += bytesUsed; for (int i = 0; i < charsUsed; i++) - hash.Add (chars[i]); + hash.Add (charBuffer[i]); + + if (completed) + break; } catch (DecoderFallbackException) { // Restart the hash... hash = new HashCode (); @@ -167,7 +151,13 @@ public void Init (Decoder[] decoders, char[] chars, ImapTokenType type, byte[] k charsNeeded = 0; break; } - } while (!completed); + + // The char buffer was not large enough to contain the full token. Resize it and try again. + var newBuffer = ArrayPool.Shared.Rent (charBuffer.Length + (length - index)); + charBuffer.AsSpan (0, charsNeeded).CopyTo (newBuffer); + ArrayPool.Shared.Return (charBuffer); + charBuffer = newBuffer; + } while (true); decoder.Reset (); @@ -175,28 +165,25 @@ public void Init (Decoder[] decoders, char[] chars, ImapTokenType type, byte[] k break; } + this.charBuffer = charBuffer; + this.length = charsNeeded; + this.hashCode = hash.ToHashCode (); } - public void Init (ImapTokenType type, string key) + public void Init (ImapTokenType type, string value, ImapTokenKey key) { this.type = type; - this.byteArrayKey = null; - this.stringKey = key; - this.length = key.Length; - - var hash = new HashCode (); - hash.Add ((int) type); - for (int i = 0; i < length; i++) - hash.Add (key[i]); - - this.hashCode = hash.ToHashCode (); + this.charBuffer = null; + this.stringKey = value; + this.length = value.Length; + this.hashCode = key.hashCode; } - static bool Equals (string str, byte[] bytes) + static bool Equals (string str, char[] chars) { for (int i = 0; i < str.Length; i++) { - if (str[i] != (char) bytes[i]) + if (str[i] != chars[i]) return false; } @@ -208,22 +195,18 @@ static bool Equals (ImapTokenKey self, ImapTokenKey other) if (self.type != other.type || self.length != other.length) return false; + // Note: At most, only one of the ImapTokenKeys will use a charBuffer and that ImapTokenKey will be the lookup key. if (self.stringKey != null) { if (other.stringKey != null) return self.stringKey.Equals (other.stringKey, StringComparison.Ordinal); - return Equals (self.stringKey, other.byteArrayKey); - } - - if (other.stringKey != null) - return Equals (other.stringKey, self.byteArrayKey); + return Equals (self.stringKey, other.charBuffer); + } else { + // Note: 'self' MUST be the lookup key. + Debug.Assert (self.charBuffer != null); - for (int i = 0; i < self.length; i++) { - if (self.byteArrayKey[i] != other.byteArrayKey[i]) - return false; + return Equals (other.stringKey, self.charBuffer); } - - return true; } public override bool Equals (object obj) @@ -238,7 +221,7 @@ public override int GetHashCode () public override string ToString () { - return string.Format ("{0}: {1}", type, stringKey ?? Encoding.UTF8.GetString (byteArrayKey, 0, length)); + return string.Format ("{0}: {1}", type, stringKey ?? new string (charBuffer, 0, length)); } }