From cfbe809e59bba00a8dc81ef0dadc1b87481ff389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20B=C3=A4hr?= Date: Mon, 14 Sep 2020 17:11:28 +0200 Subject: [PATCH] Fix #3906: missing url escape of square backets As System.Uri.GetComponents(...*UriFormat.SafeUnescaped*) is based on the outdated RFC 2396 it unescapes angle brackets, which usage is only declared "unwise" in the original URI RFC from 1998. The uptodate one, RFC 3986, forbidds them in path segments. This causes trouble with software based on the newer RFC, such as the OPC API in `System.IO.Packaing`. Instead of hooking a third layer of escape/unescape layer on top of the existing code, I decided to rewrite the `ensureValidName` method, hoping the intend of the code is clearer now. For more details see the comments in #3906. --- src/Paket.Core/Packaging/NupkgWriter.fs | 67 ++++++++++++------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/src/Paket.Core/Packaging/NupkgWriter.fs b/src/Paket.Core/Packaging/NupkgWriter.fs index 24e30f0c90..7835878f90 100644 --- a/src/Paket.Core/Packaging/NupkgWriter.fs +++ b/src/Paket.Core/Packaging/NupkgWriter.fs @@ -293,43 +293,42 @@ module internal NupkgWriter = normalizePath path = outputFolder || (exclusions |> List.exists (fun f -> f path)) let ensureValidName (target: string) = - // Some characters that are considered reserved by RFC 2396 - // and thus escaped by Uri.EscapeDataString, are valid in folder names. - // Concrete problem solved here: + // Some characters that are considered reserved by (obsolete) RFC 2396 + // and thus escaped by Uri.EscapeDataString, are valid in folder names + // according to the current RFC 3986. + // In nuget packages this over-aggressive escaping does not hurt when + // unpacking using a nuget client. However, it makes the raw package content + // harder to read for humans and may confuse other tools. + // Concrete problem solved here (cf. #1348): // Creating deployable packages for javascript applications // that use javascript packages from NPM, where the @ char // is used in folder names to separate versions. - // - // Ref: https://msdn.microsoft.com/en-us/library/system.uri.escapedatastring(v=vs.110).aspx#Anchor_2 - // http://tools.ietf.org/html/rfc2396#section-2 - let problemChars = ["@","~~at~~"; "+","~~plus~~"; "%","~~percent~~"] - - let fakeEscapeProblemChars (source:string) = - problemChars - |> List.fold (fun (escaped:string) (problem, fakeEscape) -> - escaped.Replace(problem,fakeEscape)) source - - let unFakeEscapeProblemChars (source:string) = - problemChars - |> List.fold (fun (escaped:string) (problem, fakeEscape) -> - escaped.Replace(fakeEscape, problem)) source - - let escapeTarget (target:string) = - let escapedTargetParts = - target.Replace("\\", "/").Split('/') - |> Array.map Uri.EscapeDataString - String.Join("/" ,escapedTargetParts) - - let toUri (escapedTarget:string) = - let uri1 = Uri(escapedTarget, UriKind.Relative) - let uri2 = Uri(uri1.GetComponents(UriComponents.SerializationInfoString, UriFormat.SafeUnescaped), UriKind.Relative) - uri2.GetComponents(UriComponents.SerializationInfoString, UriFormat.UriEscaped) - - target - |> fakeEscapeProblemChars - |> escapeTarget - |> unFakeEscapeProblemChars - |> toUri + + // For a maximum of comfort and compatibility, we unescape everything + // that is allowed by RFC 3986 for path segments (cf. ยง3.3 in the RFC): + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + let sub_delims = ['!'; '$'; '&'; '\''; '('; ')'; '*'; '+'; ','; ';'; '='] + let allowedPathSegmentChars = sub_delims @ [ ':'; '@'] + let replacementMap = + allowedPathSegmentChars + |> List.map (fun c -> sprintf "%%%02X" ((int)c), (string)c) + + let unescapeAllowedPathSegmentChars(source: string) = + replacementMap + |> List.fold (fun (escaped: string) (encoded, plain) -> + escaped.Replace(encoded, plain)) source + + let escapePathSegment segment = + segment + |> Uri.UnescapeDataString // ensure we really work on unescaped data, cf. #1837. Still needed? + |> Uri.EscapeDataString + |> unescapeAllowedPathSegmentChars + + let escapedTargetParts = + target.Replace("\\", "/").Split('/') + |> Array.map escapePathSegment + + String.Join("/" , escapedTargetParts) let addEntry path writerF = if entries.Contains path then () else