|
11 | 11 | #ifdef LTC_DER |
12 | 12 |
|
13 | 13 | /** |
14 | | - Store a UTF8 STRING |
| 14 | + Decode a UTF8 STRING and recover an array of unicode characters. |
15 | 15 | @param in The DER encoded UTF8 STRING |
16 | 16 | @param inlen The size of the DER UTF8 STRING |
17 | | - @param out [out] The array of utf8s stored (one per char) |
18 | | - @param outlen [in/out] The number of utf8s stored |
| 17 | + @param out [out] The array of unicode characters (wchar_t*) |
| 18 | + @param outlen [in/out] The number of unicode characters in the array |
19 | 19 | @return CRYPT_OK if successful |
20 | 20 | */ |
21 | 21 | int der_decode_utf8_string(const unsigned char *in, unsigned long inlen, |
@@ -51,23 +51,47 @@ int der_decode_utf8_string(const unsigned char *in, unsigned long inlen, |
51 | 51 | return CRYPT_INVALID_PACKET; |
52 | 52 | } |
53 | 53 |
|
54 | | - /* proceed to decode */ |
| 54 | + /* proceed to recover unicode characters from utf8 data. |
| 55 | + for reference see Section 3 of RFC 3629: |
| 56 | +
|
| 57 | + https://tools.ietf.org/html/rfc3629#section-3 |
| 58 | + */ |
55 | 59 | for (y = 0; x < inlen; ) { |
56 | | - /* get first byte */ |
| 60 | + /* read first byte */ |
57 | 61 | tmp = in[x++]; |
58 | 62 |
|
59 | | - /* count number of bytes */ |
| 63 | + /* a unicode character is recovered from a sequence of 1 to 4 utf8 bytes. |
| 64 | + the form of those bytes must match a row in the following table: |
| 65 | +
|
| 66 | + 0xxxxxxx |
| 67 | + 110xxxxx 10xxxxxx |
| 68 | + 1110xxxx 10xxxxxx 10xxxxxx |
| 69 | + 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
| 70 | +
|
| 71 | + the number of leading ones in the first byte (0,2,3,4) determines the |
| 72 | + number of remaining bytes to read (0,1,2,3) |
| 73 | + */ |
| 74 | + |
| 75 | + /* determine z, the number of leading ones. |
| 76 | + this is done by left-shifting tmp, which clears the ms-bits */ |
60 | 77 | for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF); |
61 | 78 |
|
62 | | - if (z == 1 || z > 4 || (x + (z - 1) > inlen)) { |
| 79 | + /* z should be in {0,2,3,4} */ |
| 80 | + if (z == 1 || z > 4) { |
63 | 81 | return CRYPT_INVALID_PACKET; |
64 | 82 | } |
65 | 83 |
|
66 | | - /* decode, grab upper bits */ |
| 84 | + /* right-shift tmp to restore least-sig bits */ |
67 | 85 | tmp >>= z; |
68 | 86 |
|
69 | | - /* grab remaining bytes */ |
70 | | - if (z > 1) { --z; } |
| 87 | + /* now update z so it equals the number of additional bytes to read */ |
| 88 | + if (z > 0) { --z; } |
| 89 | + |
| 90 | + if (x + z > inlen) { |
| 91 | + return CRYPT_INVALID_PACKET; |
| 92 | + } |
| 93 | + |
| 94 | + /* read remaining bytes */ |
71 | 95 | while (z-- != 0) { |
72 | 96 | if ((in[x] & 0xC0) != 0x80) { |
73 | 97 | return CRYPT_INVALID_PACKET; |
|
0 commit comments