-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathiconv.c
More file actions
191 lines (162 loc) · 3.39 KB
/
iconv.c
File metadata and controls
191 lines (162 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#include <stdlib.h>
#include <string.h>
/* Replace the following line with "#include <ctype.h>" if your
* base character set isn't ASCII or a superset thereof; be aware
* though that tolower() can do basically whatever it wants if
* your OS supports locales and your locale isn't set to C
*/
#define tolower clower
#define _POSIX_SOURCE
#include <errno.h>
#include "iconv.h"
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
#define UTFmax 4
/* sjconv - iconv-compatible Windows-31J (cp932) → UTF-8 converter */
typedef unsigned long Rune;
#include "_cp932tab.c"
struct sjconv_state {
char buf[UTFmax];
};
enum {
EncCP932,
EncUTF8,
EncInvalid = -1,
};
static struct {
const char *name;
int enc;
} enctab[] = {
{"CP932", EncCP932},
{"WINDOWS-31J", EncCP932},
{"MS932", EncCP932},
{"SJIS-WIN", EncCP932},
{"SJIS-OPEN", EncCP932},
{"UTF-8", EncUTF8},
{"UTF8", EncUTF8},
};
inline static char
clower(unsigned char c)
{
return c < 0x80 && c >= 'A' && c <= 'Z' ? c + 0x20 : c;
}
static int
strcaseeq(const char *s1, const char *s2)
{
int r;
while(*s1 && *s2 && (r = tolower(*s1++) == tolower(*s2++)));
return r;
};
static int
encget(const char *name)
{
for(int i = 0; i < nelem(enctab); i++)
if(strcaseeq(enctab[i].name, name))
return enctab[i].enc;
return EncInvalid;
}
static int
cp932torune(Rune *r, unsigned char *s)
{
Rune buf;
int ret = 0;
if((*s >= 0x81 && *s <= 0x9f) ||
(*s >= 0xe0 && *s <= 0xef) ||
(*s >=0xfa && *s <= 0xfc)) /* IBM extensions, not present in standard SJIS */ {
buf = *(s++)<<8;
buf |= *s;
ret = 2;
} else {
buf = *s;
ret = 1;
}
*r = cp932tab[buf];
return ret;
}
static int
runetochar(char *s, Rune *r)
{
if(*r == (*r&0x7f)) {
*s = *r&0x7f;
return 1;
} else if(*r == (*r&0x7ff)) {
*(s++) = 0300 | ((*r>>6)&0277);
*s = 0200 | (*r&077);
return 2;
} else if(*r == (*r&0xffff)) {
*(s++) = 0340 | ((*r>>12)&0337);
*(s++) = 0200 | ((*r>>6)&077);
*s = 0200 | (*r&077);
return 3;
} else if(*r == (*r&0x10ffff)) {
*(s++) = 0360 | ((*r>>18)&0357);
*(s++) = 0200 | ((*r>>12)&077);
*(s++) = 0200 | ((*r>>6)&077);
*s = 0200 | (*r&077);
return 4;
} else {
return -1;
}
}
inline static int
fullsjis(unsigned char *s, int n)
{
if((*s >= 0x81 && *s <= 0x9f) || (*s >= 0xe0 && *s <= 0xef))
return n >= 2;
else
return n >= 1;
}
inline static int
fullcp932(unsigned char *s, int n)
{
if(*s >= 0xfa && *s <= 0xfc) /* IBM extensions */
return n >= 2;
else
return fullsjis(s, n);
}
size_t
iconv(iconv_t s, char **restrict inbuf, size_t *restrict inleft, char **restrict outbuf, size_t *restrict outleft)
{
Rune r;
while(*inleft) {
if(!fullcp932((unsigned char *)*inbuf, *inleft)) {
errno = EINVAL;
return (size_t)-1;
}
int slen = cp932torune(&r, (unsigned char *)*inbuf);
int ulen = runetochar(s->buf, &r);
if(*outleft < ulen) {
errno = E2BIG;
return (size_t)-1;
}
*inbuf += slen;
*inleft -= slen;
memcpy(*outbuf, s->buf, ulen);
*outbuf += ulen;
*outleft -= ulen;
}
return (size_t)0;
}
iconv_t
iconv_open(const char *to, const char *from)
{
int tocode = encget(to), fromcode = encget(from);
iconv_t cd;
if(tocode != EncUTF8 || fromcode != EncCP932) {
errno = EINVAL;
return (iconv_t)-1;
}
cd = malloc(sizeof *cd);
if(cd == NULL)
return (iconv_t)-1;
return cd;
}
int
iconv_close(iconv_t cd)
{
if(cd == NULL) {
errno = EBADF;
return -1;
}
free(cd);
return 0;
}