@@ -20,8 +20,8 @@ const { FastBuffer } = require('internal/buffer');
2020const {
2121 ERR_ENCODING_NOT_SUPPORTED ,
2222 ERR_INVALID_ARG_TYPE ,
23+ ERR_ENCODING_INVALID_ENCODED_DATA ,
2324 ERR_INVALID_THIS ,
24- ERR_NO_ICU ,
2525} = require ( 'internal/errors' ) . codes ;
2626const kSingleByte = Symbol ( 'single-byte' ) ;
2727const kHandle = Symbol ( 'handle' ) ;
@@ -30,11 +30,11 @@ const kEncoding = Symbol('encoding');
3030const kDecoder = Symbol ( 'decoder' ) ;
3131const kChunk = Symbol ( 'chunk' ) ;
3232const kFatal = Symbol ( 'kFatal' ) ;
33- const kUTF8FastPath = Symbol ( 'kUTF8FastPath ' ) ;
33+ const kUnicode = Symbol ( 'kUnicode ' ) ;
3434const kIgnoreBOM = Symbol ( 'kIgnoreBOM' ) ;
3535
3636const { isSinglebyteEncoding, createSinglebyteDecoder } = require ( 'internal/encoding/single-byte' ) ;
37- const { unfinishedBytesUtf8 , mergePrefixUtf8 } = require ( 'internal/encoding/util' ) ;
37+ const { unfinishedBytes , mergePrefix } = require ( 'internal/encoding/util' ) ;
3838
3939const {
4040 getConstructorOf,
@@ -419,11 +419,33 @@ if (hasIntl) {
419419
420420const kBOMSeen = Symbol ( 'BOM seen' ) ;
421421
422- let StringDecoder ;
423- function lazyStringDecoder ( ) {
424- if ( StringDecoder === undefined )
425- ( { StringDecoder } = require ( 'string_decoder' ) ) ;
426- return StringDecoder ;
422+ function decodeUTF16bufferLE ( le , ignoreBom , fatal , encoding ) {
423+ let suffix = '' ;
424+ if ( le . length % 2 !== 0 ) {
425+ if ( fatal ) throw new ERR_ENCODING_INVALID_ENCODED_DATA ( encoding , undefined ) ;
426+ le = le . subarray ( 0 , - 1 ) ;
427+ suffix = '\ufffd' ;
428+ }
429+ if ( le . length === 0 ) return suffix ;
430+ let res = le . ucs2Slice ( ) ;
431+ if ( ! ignoreBom && res [ 0 ] === '\ufeff' ) res = StringPrototypeSlice ( res , 1 ) ;
432+ if ( ! fatal ) return res . toWellFormed ( ) + suffix ;
433+ if ( ! res . isWellFormed ( ) ) throw new ERR_ENCODING_INVALID_ENCODED_DATA ( encoding , undefined ) ;
434+ return res ;
435+ }
436+
437+ function decodeUTF16le ( input , ignoreBom , fatal ) {
438+ const le = parseInput ( input ) ;
439+ return decodeUTF16bufferLE ( le , ignoreBom , fatal , 'utf-16le' ) ;
440+ }
441+
442+ function decodeUTF16be ( input , ignoreBom , fatal ) {
443+ const be = parseInput ( input ) ;
444+ const le = new FastBuffer ( be . length ) ;
445+ le . set ( be ) ;
446+ const swap = le . length % 2 === 0 ? le : le . subarray ( 0 , - 1 ) ;
447+ swap . swap16 ( ) ;
448+ return decodeUTF16bufferLE ( le , ignoreBom , fatal , 'utf-16be' ) ;
427449}
428450
429451class TextDecoder {
@@ -446,33 +468,29 @@ class TextDecoder {
446468 this [ kEncoding ] = enc ;
447469 this [ kIgnoreBOM ] = Boolean ( options ?. ignoreBOM ) ;
448470 this [ kFatal ] = Boolean ( options ?. fatal ) ;
449- this [ kUTF8FastPath ] = false ;
471+ this [ kUnicode ] = undefined ;
450472 this [ kHandle ] = undefined ;
451473 this [ kSingleByte ] = undefined ; // Does not care about streaming or BOM
452474 this [ kChunk ] = null ; // A copy of previous streaming tail or null
453475
454476 if ( enc === 'utf-8' ) {
455- this [ kUTF8FastPath ] = true ;
477+ this [ kUnicode ] = decodeUTF8 ;
478+ this [ kBOMSeen ] = false ;
479+ } else if ( enc === 'utf-16le' ) {
480+ this [ kUnicode ] = decodeUTF16le ;
481+ this [ kBOMSeen ] = false ;
482+ } else if ( enc === 'utf-16be' ) {
483+ this [ kUnicode ] = decodeUTF16be ;
456484 this [ kBOMSeen ] = false ;
457485 } else if ( isSinglebyteEncoding ( enc ) ) {
458486 this [ kSingleByte ] = createSinglebyteDecoder ( enc , this [ kFatal ] ) ;
459- } else {
460- this . #prepareConverter( ) ; // Need to throw early if we don't support the encoding
461- }
462- }
463-
464- #prepareConverter( ) {
465- if ( hasIntl ) {
487+ } if ( hasIntl ) {
466488 let icuEncoding = this [ kEncoding ] ;
467489 if ( icuEncoding === 'gbk' ) icuEncoding = 'gb18030' ; // 10.1.1. GBK's decoder is gb18030's decoder
468490 const handle = icuGetConverter ( icuEncoding , this [ kFlags ] ) ;
469491 if ( handle === undefined )
470492 throw new ERR_ENCODING_NOT_SUPPORTED ( this [ kEncoding ] ) ;
471493 this [ kHandle ] = handle ;
472- } else if ( this [ kEncoding ] === 'utf-16le' ) {
473- if ( this [ kFatal ] ) throw new ERR_NO_ICU ( '"fatal" option' ) ;
474- this [ kHandle ] = new ( lazyStringDecoder ( ) ) ( this [ kEncoding ] ) ;
475- this [ kBOMSeen ] = false ;
476494 } else {
477495 throw new ERR_ENCODING_NOT_SUPPORTED ( this [ kEncoding ] ) ;
478496 }
@@ -485,19 +503,19 @@ class TextDecoder {
485503 if ( this [ kSingleByte ] ) return this [ kSingleByte ] ( parseInput ( input ) ) ;
486504
487505 const stream = options ?. stream ;
488- if ( this [ kUTF8FastPath ] ) {
506+ if ( this [ kUnicode ] ) {
489507 const chunk = this [ kChunk ] ;
490508 const ignoreBom = this [ kIgnoreBOM ] || this [ kBOMSeen ] ;
491509 if ( ! stream ) {
492510 this [ kBOMSeen ] = false ;
493- if ( ! chunk ) return decodeUTF8 ( input , ignoreBom , this [ kFatal ] ) ;
511+ if ( ! chunk ) return this [ kUnicode ] ( input , ignoreBom , this [ kFatal ] ) ;
494512 }
495513
496514 let u = parseInput ( input ) ;
497515 if ( u . length === 0 && stream ) return '' ; // no state change
498516 let prefix ;
499517 if ( chunk ) {
500- const merged = mergePrefixUtf8 ( u , this [ kChunk ] ) ;
518+ const merged = mergePrefix ( u , this [ kChunk ] , this [ kEncoding ] ) ;
501519 if ( u . length < 3 ) {
502520 u = merged ; // Might be unfinished, but fully consumed old u
503521 } else {
@@ -510,7 +528,7 @@ class TextDecoder {
510528 }
511529
512530 if ( stream ) {
513- const trail = unfinishedBytesUtf8 ( u , u . length ) ;
531+ const trail = unfinishedBytes ( u , u . length , this [ kEncoding ] ) ;
514532 if ( trail > 0 ) {
515533 this [ kChunk ] = new FastBuffer ( u . subarray ( - trail ) ) ; // copy
516534 if ( ! prefix && trail === u . length ) return '' ; // No further state change
@@ -519,8 +537,8 @@ class TextDecoder {
519537 }
520538
521539 try {
522- const res = ( prefix ? decodeUTF8 ( prefix , ignoreBom , this [ kFatal ] ) : '' ) +
523- decodeUTF8 ( u , ignoreBom || prefix , this [ kFatal ] ) ;
540+ const res = ( prefix ? this [ kUnicode ] ( prefix , ignoreBom , this [ kFatal ] ) : '' ) +
541+ this [ kUnicode ] ( u , ignoreBom || prefix , this [ kFatal ] ) ;
524542
525543 // "BOM seen" is set on the current decode call only if it did not error,
526544 // in "serialize I/O queue" after decoding
@@ -541,22 +559,7 @@ class TextDecoder {
541559 return icuDecode ( this [ kHandle ] , input , flags , this [ kEncoding ] ) ;
542560 }
543561
544- input = parseInput ( input ) ;
545-
546- let result = stream ? this [ kHandle ] . write ( input ) : this [ kHandle ] . end ( input ) ;
547-
548- if ( result . length > 0 && ! this [ kBOMSeen ] && ! this [ kIgnoreBOM ] ) {
549- // If the very first result in the stream is a BOM, and we are not
550- // explicitly told to ignore it, then we discard it.
551- if ( result [ 0 ] === '\ufeff' ) {
552- result = StringPrototypeSlice ( result , 1 ) ;
553- }
554- this [ kBOMSeen ] = true ;
555- }
556-
557- if ( ! stream ) this [ kBOMSeen ] = false ;
558-
559- return result ;
562+ // Unreachable
560563 }
561564}
562565
0 commit comments