use TextEncoder and TextDecoder when available

peaBerberian · peaBerberian · commit 2384d0605b15 · 2021-01-11T18:59:22.000+01:00
This commit allows the RxPlayer to use the `TextEncoder` and `TextDecoder` APIs when available respectively to encode JS strings into an UTF-8 bytes sequence (TextEncoder doesn't seem to be able to encode into any other encoding) and to decode from either UTF-8, UTF-16BE or UTF-16LE into a JS string. Because `TextEncoder` and `TextDecoder` are not defined in old browser versions we claim to support and in IE11, we still fallback to custom implementation either if it doesn't exist or if the operation fails. It is important to note of a sensible difference between using the `TextDecoder` interface and the previous implementation: when encountering invalid byte sequences in the correponding encoding, the `TextDecoder` will replace those by a "REPLACEMENT CHARACTER" (�). This seems fine and even desirable, but the previous implementation just threw in that same situation. This means that we now have two different behaviors, depending on the current platform / browser. Those functions using the `TextDecoder` APIs are even directly defined in the `StringUtils` tools, and thus that new behavior can be directly noticable by applications using it. Thankfully, nothing is defined in our API documentation about invalid sequences. Even if we can consider that this does not break our API (though it is still unclear to me), it should be is something to keep in mind as this might be unexpected for users relying on this API throwing. Also, I tried to add unit tests, but it appears that "jsdom", on which relies jest to perform unit test while simulation a browser in node, does not include either APIs yet. Though it is under way: jsdom/whatwg-encoding#11
diff --git a/src/utils/string_parsing.ts b/src/utils/string_parsing.ts
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+import log from "../log";
 import assert from "./assert";
 
 /**
@@ -56,6 +57,17 @@ function strToBeUtf16(str: string): Uint8Array {
  * @returns {string}
  */
 function utf16LEToStr(bytes : Uint8Array) : string {
+  if (typeof window.TextDecoder === "function") {
+    try {
+      // instanciation throws if the encoding is unsupported
+      const decoder = new TextDecoder("utf-16le");
+      return decoder.decode(bytes);
+    } catch (e) {
+      log.warn("Utils: could not use TextDecoder to parse UTF-16LE, " +
+               "fallbacking to another implementation", e);
+    }
+  }
+
   let str = "";
   for (let i = 0; i < bytes.length; i += 2) {
     str += String.fromCharCode((bytes[i + 1] << 8) + bytes[i]);
@@ -69,6 +81,17 @@ function utf16LEToStr(bytes : Uint8Array) : string {
  * @returns {string}
  */
 function beUtf16ToStr(bytes : Uint8Array) : string {
+  if (typeof window.TextDecoder === "function") {
+    try {
+      // instanciation throws if the encoding is unsupported
+      const decoder = new TextDecoder("utf-16be");
+      return decoder.decode(bytes);
+    } catch (e) {
+      log.warn("Utils: could not use TextDecoder to parse UTF-16BE, " +
+               "fallbacking to another implementation", e);
+    }
+  }
+
   let str = "";
   for (let i = 0; i < bytes.length; i += 2) {
     str += String.fromCharCode((bytes[i] << 8) + bytes[i + 1]);
@@ -83,6 +106,17 @@ function beUtf16ToStr(bytes : Uint8Array) : string {
  * @returns {Uint8Array}
  */
 function strToUtf8(str : string) : Uint8Array {
+  if (typeof window.TextEncoder === "function") {
+    try {
+      // instanciation throws if the encoding is unsupported
+      const encoder = new TextEncoder();
+      return encoder.encode(str);
+    } catch (e) {
+      log.warn("Utils: could not use TextEncoder to encode string into UTF-8, " +
+               "fallbacking to another implementation", e);
+    }
+  }
+
   // http://stackoverflow.com/a/13691499 provides an ugly but functional solution.
   // (Note you have to dig deeper to understand it but I have more faith in
   // stackoverflow not going down in the future so I leave that link.)
@@ -209,6 +243,17 @@ function intToHex(num : number, size : number) : string {
  * @returns {string}
  */
 function utf8ToStr(data : Uint8Array) : string {
+  if (typeof window.TextDecoder === "function") {
+    try {
+      // TextDecoder use UTF-8 by default
+      const decoder = new TextDecoder();
+      return decoder.decode(data);
+    } catch (e) {
+      log.warn("Utils: could not use TextDecoder to parse UTF-8, " +
+               "fallbacking to another implementation", e);
+    }
+  }
+
   let uint8 = data;
 
   // If present, strip off the UTF-8 BOM.