Skip to content

Commit 910d625

Browse files
authored
Merge pull request #558 from jamuir/develop
Add new utf8 test-vector, update comments explaining utf8 decoding
2 parents 954ab9b + 2092250 commit 910d625

File tree

2 files changed

+54
-10
lines changed

2 files changed

+54
-10
lines changed

src/pk/asn1/der/utf8/der_decode_utf8_string.c

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
#ifdef LTC_DER
1212

1313
/**
14-
Store a UTF8 STRING
14+
Decode a UTF8 STRING and recover an array of unicode characters.
1515
@param in The DER encoded UTF8 STRING
1616
@param inlen The size of the DER UTF8 STRING
17-
@param out [out] The array of utf8s stored (one per char)
18-
@param outlen [in/out] The number of utf8s stored
17+
@param out [out] The array of unicode characters (wchar_t*)
18+
@param outlen [in/out] The number of unicode characters in the array
1919
@return CRYPT_OK if successful
2020
*/
2121
int der_decode_utf8_string(const unsigned char *in, unsigned long inlen,
@@ -51,23 +51,47 @@ int der_decode_utf8_string(const unsigned char *in, unsigned long inlen,
5151
return CRYPT_INVALID_PACKET;
5252
}
5353

54-
/* proceed to decode */
54+
/* proceed to recover unicode characters from utf8 data.
55+
for reference see Section 3 of RFC 3629:
56+
57+
https://tools.ietf.org/html/rfc3629#section-3
58+
*/
5559
for (y = 0; x < inlen; ) {
56-
/* get first byte */
60+
/* read first byte */
5761
tmp = in[x++];
5862

59-
/* count number of bytes */
63+
/* a unicode character is recovered from a sequence of 1 to 4 utf8 bytes.
64+
the form of those bytes must match a row in the following table:
65+
66+
0xxxxxxx
67+
110xxxxx 10xxxxxx
68+
1110xxxx 10xxxxxx 10xxxxxx
69+
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
70+
71+
the number of leading ones in the first byte (0,2,3,4) determines the
72+
number of remaining bytes to read (0,1,2,3)
73+
*/
74+
75+
/* determine z, the number of leading ones.
76+
this is done by left-shifting tmp, which clears the ms-bits */
6077
for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF);
6178

62-
if (z == 1 || z > 4 || (x + (z - 1) > inlen)) {
79+
/* z should be in {0,2,3,4} */
80+
if (z == 1 || z > 4) {
6381
return CRYPT_INVALID_PACKET;
6482
}
6583

66-
/* decode, grab upper bits */
84+
/* right-shift tmp to restore least-sig bits */
6785
tmp >>= z;
6886

69-
/* grab remaining bytes */
70-
if (z > 1) { --z; }
87+
/* now update z so it equals the number of additional bytes to read */
88+
if (z > 0) { --z; }
89+
90+
if (x + z > inlen) {
91+
return CRYPT_INVALID_PACKET;
92+
}
93+
94+
/* read remaining bytes */
7195
while (z-- != 0) {
7296
if ((in[x] & 0xC0) != 0x80) {
7397
return CRYPT_INVALID_PACKET;

tests/der_test.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,6 +1603,8 @@ int der_test(void)
16031603
static const unsigned char utf8_1_der[] = { 0x0C, 0x07, 0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E };
16041604
static const wchar_t utf8_2[] = { 0xD55C, 0xAD6D, 0xC5B4 };
16051605
static const unsigned char utf8_2_der[] = { 0x0C, 0x09, 0xED, 0x95, 0x9C, 0xEA, 0xB5, 0xAD, 0xEC, 0x96, 0xB4 };
1606+
static const wchar_t utf8_3[] = { 0x05E9, 0x05DC, 0x05D5, 0x05DD };
1607+
static const unsigned char utf8_3_der[] = { 0x0C, 0x08, 0xD7, 0xA9, 0xD7, 0x9C, 0xD7, 0x95, 0xD7, 0x9D };
16061608

16071609
unsigned char utf8_buf[32];
16081610
wchar_t utf8_out[32];
@@ -1961,6 +1963,24 @@ tmp_time.off_hh);
19611963
return 1;
19621964
}
19631965

1966+
/* encode it */
1967+
x = sizeof(utf8_buf);
1968+
DO(der_encode_utf8_string(utf8_3, sizeof(utf8_3) / sizeof(utf8_3[0]), utf8_buf, &x));
1969+
if (x != sizeof(utf8_3_der) || memcmp(utf8_buf, utf8_3_der, x)) {
1970+
fprintf(stderr, "DER UTF8_3 encoded to %lu bytes\n", x);
1971+
for (y = 0; y < x; y++) fprintf(stderr, "%02x ", (unsigned)utf8_buf[y]);
1972+
fprintf(stderr, "\n");
1973+
return 1;
1974+
}
1975+
/* decode it */
1976+
y = sizeof(utf8_out) / sizeof(utf8_out[0]);
1977+
DO(der_decode_utf8_string(utf8_buf, x, utf8_out, &y));
1978+
if (y != (sizeof(utf8_3) / sizeof(utf8_3[0])) || memcmp(utf8_3, utf8_out, y * sizeof(wchar_t))) {
1979+
fprintf(stderr, "DER UTF8_3 decoded to %lu wchar_t\n", y);
1980+
for (x = 0; x < y; x++) fprintf(stderr, "%04lx ", (unsigned long)utf8_out[x]);
1981+
fprintf(stderr, "\n");
1982+
return 1;
1983+
}
19641984

19651985
der_set_test();
19661986
der_flexi_test();

0 commit comments

Comments
 (0)