1 | #include <newlib.h> |
---|
2 | #include <stdlib.h> |
---|
3 | #include <locale.h> |
---|
4 | #include "mbctype.h" |
---|
5 | #include <wchar.h> |
---|
6 | #include <string.h> |
---|
7 | #include <errno.h> |
---|
8 | #include "local.h" |
---|
9 | |
---|
10 | int |
---|
11 | _mbtowc_r (struct _reent *r, |
---|
12 | wchar_t *__restrict pwc, |
---|
13 | const char *__restrict s, |
---|
14 | size_t n, |
---|
15 | mbstate_t *state) |
---|
16 | { |
---|
17 | return __MBTOWC (r, pwc, s, n, state); |
---|
18 | } |
---|
19 | |
---|
20 | int |
---|
21 | __ascii_mbtowc (struct _reent *r, |
---|
22 | wchar_t *pwc, |
---|
23 | const char *s, |
---|
24 | size_t n, |
---|
25 | mbstate_t *state) |
---|
26 | { |
---|
27 | wchar_t dummy; |
---|
28 | unsigned char *t = (unsigned char *)s; |
---|
29 | |
---|
30 | if (pwc == NULL) |
---|
31 | pwc = &dummy; |
---|
32 | |
---|
33 | if (s == NULL) |
---|
34 | return 0; |
---|
35 | |
---|
36 | if (n == 0) |
---|
37 | return -2; |
---|
38 | |
---|
39 | #ifdef __CYGWIN__ |
---|
40 | if ((wchar_t)*t >= 0x80) |
---|
41 | { |
---|
42 | r->_errno = EILSEQ; |
---|
43 | return -1; |
---|
44 | } |
---|
45 | #endif |
---|
46 | |
---|
47 | *pwc = (wchar_t)*t; |
---|
48 | |
---|
49 | if (*t == '\0') |
---|
50 | return 0; |
---|
51 | |
---|
52 | return 1; |
---|
53 | } |
---|
54 | |
---|
55 | #ifdef _MB_CAPABLE |
---|
56 | typedef enum __packed { ESCAPE, DOLLAR, BRACKET, AT, B, J, |
---|
57 | NUL, JIS_CHAR, OTHER, JIS_C_NUM } JIS_CHAR_TYPE; |
---|
58 | typedef enum __packed { ASCII, JIS, A_ESC, A_ESC_DL, JIS_1, J_ESC, J_ESC_BR, |
---|
59 | INV, JIS_S_NUM } JIS_STATE; |
---|
60 | typedef enum __packed { COPY_A, COPY_J1, COPY_J2, MAKE_A, NOOP, EMPTY, ERROR } JIS_ACTION; |
---|
61 | |
---|
62 | /************************************************************************************** |
---|
63 | * state/action tables for processing JIS encoding |
---|
64 | * Where possible, switches to JIS are grouped with proceding JIS characters and switches |
---|
65 | * to ASCII are grouped with preceding JIS characters. Thus, maximum returned length |
---|
66 | * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6. |
---|
67 | *************************************************************************************/ |
---|
68 | |
---|
69 | #ifndef __CYGWIN__ |
---|
70 | static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = { |
---|
71 | /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */ |
---|
72 | /* ASCII */ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII }, |
---|
73 | /* JIS */ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1, INV }, |
---|
74 | /* A_ESC */ { ASCII, A_ESC_DL, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII }, |
---|
75 | /* A_ESC_DL */{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII, ASCII, ASCII }, |
---|
76 | /* JIS_1 */ { INV, JIS, JIS, JIS, JIS, JIS, INV, JIS, INV }, |
---|
77 | /* J_ESC */ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV }, |
---|
78 | /* J_ESC_BR */{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV }, |
---|
79 | }; |
---|
80 | |
---|
81 | static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = { |
---|
82 | /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */ |
---|
83 | /* ASCII */ { NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, EMPTY, COPY_A, COPY_A}, |
---|
84 | /* JIS */ { NOOP, COPY_J1, COPY_J1, COPY_J1, COPY_J1, COPY_J1, ERROR, COPY_J1, ERROR }, |
---|
85 | /* A_ESC */ { COPY_A, NOOP, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A, COPY_A}, |
---|
86 | /* A_ESC_DL */{ COPY_A, COPY_A, COPY_A, NOOP, NOOP, COPY_A, COPY_A, COPY_A, COPY_A}, |
---|
87 | /* JIS_1 */ { ERROR, COPY_J2, COPY_J2, COPY_J2, COPY_J2, COPY_J2, ERROR, COPY_J2, ERROR }, |
---|
88 | /* J_ESC */ { ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR }, |
---|
89 | /* J_ESC_BR */{ ERROR, ERROR, ERROR, ERROR, MAKE_A, MAKE_A, ERROR, ERROR, ERROR }, |
---|
90 | }; |
---|
91 | #endif /* !__CYGWIN__ */ |
---|
92 | |
---|
93 | /* we override the mbstate_t __count field for more complex encodings and use it store a state value */ |
---|
94 | #define __state __count |
---|
95 | |
---|
96 | #ifdef _MB_EXTENDED_CHARSETS_ISO |
---|
97 | static int |
---|
98 | ___iso_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
99 | int iso_idx, mbstate_t *state) |
---|
100 | { |
---|
101 | wchar_t dummy; |
---|
102 | unsigned char *t = (unsigned char *)s; |
---|
103 | |
---|
104 | if (pwc == NULL) |
---|
105 | pwc = &dummy; |
---|
106 | |
---|
107 | if (s == NULL) |
---|
108 | return 0; |
---|
109 | |
---|
110 | if (n == 0) |
---|
111 | return -2; |
---|
112 | |
---|
113 | if (*t >= 0xa0) |
---|
114 | { |
---|
115 | if (iso_idx >= 0) |
---|
116 | { |
---|
117 | *pwc = __iso_8859_conv[iso_idx][*t - 0xa0]; |
---|
118 | if (*pwc == 0) /* Invalid character */ |
---|
119 | { |
---|
120 | r->_errno = EILSEQ; |
---|
121 | return -1; |
---|
122 | } |
---|
123 | return 1; |
---|
124 | } |
---|
125 | } |
---|
126 | |
---|
127 | *pwc = (wchar_t) *t; |
---|
128 | |
---|
129 | if (*t == '\0') |
---|
130 | return 0; |
---|
131 | |
---|
132 | return 1; |
---|
133 | } |
---|
134 | |
---|
135 | static int |
---|
136 | __iso_8859_1_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
137 | mbstate_t *state) |
---|
138 | { |
---|
139 | return ___iso_mbtowc (r, pwc, s, n, -1, state); |
---|
140 | } |
---|
141 | |
---|
142 | static int |
---|
143 | __iso_8859_2_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
144 | mbstate_t *state) |
---|
145 | { |
---|
146 | return ___iso_mbtowc (r, pwc, s, n, 0, state); |
---|
147 | } |
---|
148 | |
---|
149 | static int |
---|
150 | __iso_8859_3_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
151 | mbstate_t *state) |
---|
152 | { |
---|
153 | return ___iso_mbtowc (r, pwc, s, n, 1, state); |
---|
154 | } |
---|
155 | |
---|
156 | static int |
---|
157 | __iso_8859_4_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
158 | mbstate_t *state) |
---|
159 | { |
---|
160 | return ___iso_mbtowc (r, pwc, s, n, 2, state); |
---|
161 | } |
---|
162 | |
---|
163 | static int |
---|
164 | __iso_8859_5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
165 | mbstate_t *state) |
---|
166 | { |
---|
167 | return ___iso_mbtowc (r, pwc, s, n, 3, state); |
---|
168 | } |
---|
169 | |
---|
170 | static int |
---|
171 | __iso_8859_6_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
172 | mbstate_t *state) |
---|
173 | { |
---|
174 | return ___iso_mbtowc (r, pwc, s, n, 4, state); |
---|
175 | } |
---|
176 | |
---|
177 | static int |
---|
178 | __iso_8859_7_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
179 | mbstate_t *state) |
---|
180 | { |
---|
181 | return ___iso_mbtowc (r, pwc, s, n, 5, state); |
---|
182 | } |
---|
183 | |
---|
184 | static int |
---|
185 | __iso_8859_8_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
186 | mbstate_t *state) |
---|
187 | { |
---|
188 | return ___iso_mbtowc (r, pwc, s, n, 6, state); |
---|
189 | } |
---|
190 | |
---|
191 | static int |
---|
192 | __iso_8859_9_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
193 | mbstate_t *state) |
---|
194 | { |
---|
195 | return ___iso_mbtowc (r, pwc, s, n, 7, state); |
---|
196 | } |
---|
197 | |
---|
198 | static int |
---|
199 | __iso_8859_10_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
200 | mbstate_t *state) |
---|
201 | { |
---|
202 | return ___iso_mbtowc (r, pwc, s, n, 8, state); |
---|
203 | } |
---|
204 | |
---|
205 | static int |
---|
206 | __iso_8859_11_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
207 | mbstate_t *state) |
---|
208 | { |
---|
209 | return ___iso_mbtowc (r, pwc, s, n, 9, state); |
---|
210 | } |
---|
211 | |
---|
212 | static int |
---|
213 | __iso_8859_13_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
214 | mbstate_t *state) |
---|
215 | { |
---|
216 | return ___iso_mbtowc (r, pwc, s, n, 10, state); |
---|
217 | } |
---|
218 | |
---|
219 | static int |
---|
220 | __iso_8859_14_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
221 | mbstate_t *state) |
---|
222 | { |
---|
223 | return ___iso_mbtowc (r, pwc, s, n, 11, state); |
---|
224 | } |
---|
225 | |
---|
226 | static int |
---|
227 | __iso_8859_15_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
228 | mbstate_t *state) |
---|
229 | { |
---|
230 | return ___iso_mbtowc (r, pwc, s, n, 12, state); |
---|
231 | } |
---|
232 | |
---|
233 | static int |
---|
234 | __iso_8859_16_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
235 | mbstate_t *state) |
---|
236 | { |
---|
237 | return ___iso_mbtowc (r, pwc, s, n, 13, state); |
---|
238 | } |
---|
239 | |
---|
240 | static mbtowc_p __iso_8859_mbtowc[17] = { |
---|
241 | NULL, |
---|
242 | __iso_8859_1_mbtowc, |
---|
243 | __iso_8859_2_mbtowc, |
---|
244 | __iso_8859_3_mbtowc, |
---|
245 | __iso_8859_4_mbtowc, |
---|
246 | __iso_8859_5_mbtowc, |
---|
247 | __iso_8859_6_mbtowc, |
---|
248 | __iso_8859_7_mbtowc, |
---|
249 | __iso_8859_8_mbtowc, |
---|
250 | __iso_8859_9_mbtowc, |
---|
251 | __iso_8859_10_mbtowc, |
---|
252 | __iso_8859_11_mbtowc, |
---|
253 | NULL, /* No ISO 8859-12 */ |
---|
254 | __iso_8859_13_mbtowc, |
---|
255 | __iso_8859_14_mbtowc, |
---|
256 | __iso_8859_15_mbtowc, |
---|
257 | __iso_8859_16_mbtowc |
---|
258 | }; |
---|
259 | |
---|
260 | /* val *MUST* be valid! All checks for validity are supposed to be |
---|
261 | performed before calling this function. */ |
---|
262 | mbtowc_p |
---|
263 | __iso_mbtowc (int val) |
---|
264 | { |
---|
265 | return __iso_8859_mbtowc[val]; |
---|
266 | } |
---|
267 | #endif /* _MB_EXTENDED_CHARSETS_ISO */ |
---|
268 | |
---|
269 | #ifdef _MB_EXTENDED_CHARSETS_WINDOWS |
---|
270 | static int |
---|
271 | ___cp_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
272 | int cp_idx, mbstate_t *state) |
---|
273 | { |
---|
274 | wchar_t dummy; |
---|
275 | unsigned char *t = (unsigned char *)s; |
---|
276 | |
---|
277 | if (pwc == NULL) |
---|
278 | pwc = &dummy; |
---|
279 | |
---|
280 | if (s == NULL) |
---|
281 | return 0; |
---|
282 | |
---|
283 | if (n == 0) |
---|
284 | return -2; |
---|
285 | |
---|
286 | if (*t >= 0x80) |
---|
287 | { |
---|
288 | if (cp_idx >= 0) |
---|
289 | { |
---|
290 | *pwc = __cp_conv[cp_idx][*t - 0x80]; |
---|
291 | if (*pwc == 0) /* Invalid character */ |
---|
292 | { |
---|
293 | r->_errno = EILSEQ; |
---|
294 | return -1; |
---|
295 | } |
---|
296 | return 1; |
---|
297 | } |
---|
298 | } |
---|
299 | |
---|
300 | *pwc = (wchar_t)*t; |
---|
301 | |
---|
302 | if (*t == '\0') |
---|
303 | return 0; |
---|
304 | |
---|
305 | return 1; |
---|
306 | } |
---|
307 | |
---|
308 | static int |
---|
309 | __cp_437_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
310 | mbstate_t *state) |
---|
311 | { |
---|
312 | return ___cp_mbtowc (r, pwc, s, n, 0, state); |
---|
313 | } |
---|
314 | |
---|
315 | static int |
---|
316 | __cp_720_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
317 | mbstate_t *state) |
---|
318 | { |
---|
319 | return ___cp_mbtowc (r, pwc, s, n, 1, state); |
---|
320 | } |
---|
321 | |
---|
322 | static int |
---|
323 | __cp_737_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
324 | mbstate_t *state) |
---|
325 | { |
---|
326 | return ___cp_mbtowc (r, pwc, s, n, 2, state); |
---|
327 | } |
---|
328 | |
---|
329 | static int |
---|
330 | __cp_775_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
331 | mbstate_t *state) |
---|
332 | { |
---|
333 | return ___cp_mbtowc (r, pwc, s, n, 3, state); |
---|
334 | } |
---|
335 | |
---|
336 | static int |
---|
337 | __cp_850_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
338 | mbstate_t *state) |
---|
339 | { |
---|
340 | return ___cp_mbtowc (r, pwc, s, n, 4, state); |
---|
341 | } |
---|
342 | |
---|
343 | static int |
---|
344 | __cp_852_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
345 | mbstate_t *state) |
---|
346 | { |
---|
347 | return ___cp_mbtowc (r, pwc, s, n, 5, state); |
---|
348 | } |
---|
349 | |
---|
350 | static int |
---|
351 | __cp_855_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
352 | mbstate_t *state) |
---|
353 | { |
---|
354 | return ___cp_mbtowc (r, pwc, s, n, 6, state); |
---|
355 | } |
---|
356 | |
---|
357 | static int |
---|
358 | __cp_857_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
359 | mbstate_t *state) |
---|
360 | { |
---|
361 | return ___cp_mbtowc (r, pwc, s, n, 7, state); |
---|
362 | } |
---|
363 | |
---|
364 | static int |
---|
365 | __cp_858_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
366 | mbstate_t *state) |
---|
367 | { |
---|
368 | return ___cp_mbtowc (r, pwc, s, n, 8, state); |
---|
369 | } |
---|
370 | |
---|
371 | static int |
---|
372 | __cp_862_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
373 | mbstate_t *state) |
---|
374 | { |
---|
375 | return ___cp_mbtowc (r, pwc, s, n, 9, state); |
---|
376 | } |
---|
377 | |
---|
378 | static int |
---|
379 | __cp_866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
380 | mbstate_t *state) |
---|
381 | { |
---|
382 | return ___cp_mbtowc (r, pwc, s, n, 10, state); |
---|
383 | } |
---|
384 | |
---|
385 | static int |
---|
386 | __cp_874_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
387 | mbstate_t *state) |
---|
388 | { |
---|
389 | return ___cp_mbtowc (r, pwc, s, n, 11, state); |
---|
390 | } |
---|
391 | |
---|
392 | static int |
---|
393 | __cp_1125_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
394 | mbstate_t *state) |
---|
395 | { |
---|
396 | return ___cp_mbtowc (r, pwc, s, n, 12, state); |
---|
397 | } |
---|
398 | |
---|
399 | static int |
---|
400 | __cp_1250_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
401 | mbstate_t *state) |
---|
402 | { |
---|
403 | return ___cp_mbtowc (r, pwc, s, n, 13, state); |
---|
404 | } |
---|
405 | |
---|
406 | static int |
---|
407 | __cp_1251_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
408 | mbstate_t *state) |
---|
409 | { |
---|
410 | return ___cp_mbtowc (r, pwc, s, n, 14, state); |
---|
411 | } |
---|
412 | |
---|
413 | static int |
---|
414 | __cp_1252_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
415 | mbstate_t *state) |
---|
416 | { |
---|
417 | return ___cp_mbtowc (r, pwc, s, n, 15, state); |
---|
418 | } |
---|
419 | |
---|
420 | static int |
---|
421 | __cp_1253_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
422 | mbstate_t *state) |
---|
423 | { |
---|
424 | return ___cp_mbtowc (r, pwc, s, n, 16, state); |
---|
425 | } |
---|
426 | |
---|
427 | static int |
---|
428 | __cp_1254_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
429 | mbstate_t *state) |
---|
430 | { |
---|
431 | return ___cp_mbtowc (r, pwc, s, n, 17, state); |
---|
432 | } |
---|
433 | |
---|
434 | static int |
---|
435 | __cp_1255_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
436 | mbstate_t *state) |
---|
437 | { |
---|
438 | return ___cp_mbtowc (r, pwc, s, n, 18, state); |
---|
439 | } |
---|
440 | |
---|
441 | static int |
---|
442 | __cp_1256_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
443 | mbstate_t *state) |
---|
444 | { |
---|
445 | return ___cp_mbtowc (r, pwc, s, n, 19, state); |
---|
446 | } |
---|
447 | |
---|
448 | static int |
---|
449 | __cp_1257_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
450 | mbstate_t *state) |
---|
451 | { |
---|
452 | return ___cp_mbtowc (r, pwc, s, n, 20, state); |
---|
453 | } |
---|
454 | |
---|
455 | static int |
---|
456 | __cp_1258_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
457 | mbstate_t *state) |
---|
458 | { |
---|
459 | return ___cp_mbtowc (r, pwc, s, n, 21, state); |
---|
460 | } |
---|
461 | |
---|
462 | static int |
---|
463 | __cp_20866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
464 | mbstate_t *state) |
---|
465 | { |
---|
466 | return ___cp_mbtowc (r, pwc, s, n, 22, state); |
---|
467 | } |
---|
468 | |
---|
469 | static int |
---|
470 | __cp_21866_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
471 | mbstate_t *state) |
---|
472 | { |
---|
473 | return ___cp_mbtowc (r, pwc, s, n, 23, state); |
---|
474 | } |
---|
475 | |
---|
476 | static int |
---|
477 | __cp_101_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
478 | mbstate_t *state) |
---|
479 | { |
---|
480 | return ___cp_mbtowc (r, pwc, s, n, 24, state); |
---|
481 | } |
---|
482 | |
---|
483 | static int |
---|
484 | __cp_102_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, |
---|
485 | mbstate_t *state) |
---|
486 | { |
---|
487 | return ___cp_mbtowc (r, pwc, s, n, 25, state); |
---|
488 | } |
---|
489 | |
---|
490 | static mbtowc_p __cp_xxx_mbtowc[26] = { |
---|
491 | __cp_437_mbtowc, |
---|
492 | __cp_720_mbtowc, |
---|
493 | __cp_737_mbtowc, |
---|
494 | __cp_775_mbtowc, |
---|
495 | __cp_850_mbtowc, |
---|
496 | __cp_852_mbtowc, |
---|
497 | __cp_855_mbtowc, |
---|
498 | __cp_857_mbtowc, |
---|
499 | __cp_858_mbtowc, |
---|
500 | __cp_862_mbtowc, |
---|
501 | __cp_866_mbtowc, |
---|
502 | __cp_874_mbtowc, |
---|
503 | __cp_1125_mbtowc, |
---|
504 | __cp_1250_mbtowc, |
---|
505 | __cp_1251_mbtowc, |
---|
506 | __cp_1252_mbtowc, |
---|
507 | __cp_1253_mbtowc, |
---|
508 | __cp_1254_mbtowc, |
---|
509 | __cp_1255_mbtowc, |
---|
510 | __cp_1256_mbtowc, |
---|
511 | __cp_1257_mbtowc, |
---|
512 | __cp_1258_mbtowc, |
---|
513 | __cp_20866_mbtowc, |
---|
514 | __cp_21866_mbtowc, |
---|
515 | __cp_101_mbtowc, |
---|
516 | __cp_102_mbtowc |
---|
517 | }; |
---|
518 | |
---|
519 | /* val *MUST* be valid! All checks for validity are supposed to be |
---|
520 | performed before calling this function. */ |
---|
521 | mbtowc_p |
---|
522 | __cp_mbtowc (int val) |
---|
523 | { |
---|
524 | return __cp_xxx_mbtowc[__cp_val_index (val)]; |
---|
525 | } |
---|
526 | #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */ |
---|
527 | |
---|
528 | int |
---|
529 | __utf8_mbtowc (struct _reent *r, |
---|
530 | wchar_t *pwc, |
---|
531 | const char *s, |
---|
532 | size_t n, |
---|
533 | mbstate_t *state) |
---|
534 | { |
---|
535 | wchar_t dummy; |
---|
536 | unsigned char *t = (unsigned char *)s; |
---|
537 | int ch; |
---|
538 | int i = 0; |
---|
539 | |
---|
540 | if (pwc == NULL) |
---|
541 | pwc = &dummy; |
---|
542 | |
---|
543 | if (s == NULL) |
---|
544 | return 0; |
---|
545 | |
---|
546 | if (n == 0) |
---|
547 | return -2; |
---|
548 | |
---|
549 | if (state->__count == 0) |
---|
550 | ch = t[i++]; |
---|
551 | else |
---|
552 | ch = state->__value.__wchb[0]; |
---|
553 | |
---|
554 | if (ch == '\0') |
---|
555 | { |
---|
556 | *pwc = 0; |
---|
557 | state->__count = 0; |
---|
558 | return 0; /* s points to the null character */ |
---|
559 | } |
---|
560 | |
---|
561 | if (ch <= 0x7f) |
---|
562 | { |
---|
563 | /* single-byte sequence */ |
---|
564 | state->__count = 0; |
---|
565 | *pwc = ch; |
---|
566 | return 1; |
---|
567 | } |
---|
568 | if (ch >= 0xc0 && ch <= 0xdf) |
---|
569 | { |
---|
570 | /* two-byte sequence */ |
---|
571 | state->__value.__wchb[0] = ch; |
---|
572 | if (state->__count == 0) |
---|
573 | state->__count = 1; |
---|
574 | else if (n < (size_t)-1) |
---|
575 | ++n; |
---|
576 | if (n < 2) |
---|
577 | return -2; |
---|
578 | ch = t[i++]; |
---|
579 | if (ch < 0x80 || ch > 0xbf) |
---|
580 | { |
---|
581 | r->_errno = EILSEQ; |
---|
582 | return -1; |
---|
583 | } |
---|
584 | if (state->__value.__wchb[0] < 0xc2) |
---|
585 | { |
---|
586 | /* overlong UTF-8 sequence */ |
---|
587 | r->_errno = EILSEQ; |
---|
588 | return -1; |
---|
589 | } |
---|
590 | state->__count = 0; |
---|
591 | *pwc = (wchar_t)((state->__value.__wchb[0] & 0x1f) << 6) |
---|
592 | | (wchar_t)(ch & 0x3f); |
---|
593 | return i; |
---|
594 | } |
---|
595 | if (ch >= 0xe0 && ch <= 0xef) |
---|
596 | { |
---|
597 | /* three-byte sequence */ |
---|
598 | wchar_t tmp; |
---|
599 | state->__value.__wchb[0] = ch; |
---|
600 | if (state->__count == 0) |
---|
601 | state->__count = 1; |
---|
602 | else if (n < (size_t)-1) |
---|
603 | ++n; |
---|
604 | if (n < 2) |
---|
605 | return -2; |
---|
606 | ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; |
---|
607 | if (state->__value.__wchb[0] == 0xe0 && ch < 0xa0) |
---|
608 | { |
---|
609 | /* overlong UTF-8 sequence */ |
---|
610 | r->_errno = EILSEQ; |
---|
611 | return -1; |
---|
612 | } |
---|
613 | if (ch < 0x80 || ch > 0xbf) |
---|
614 | { |
---|
615 | r->_errno = EILSEQ; |
---|
616 | return -1; |
---|
617 | } |
---|
618 | state->__value.__wchb[1] = ch; |
---|
619 | if (state->__count == 1) |
---|
620 | state->__count = 2; |
---|
621 | else if (n < (size_t)-1) |
---|
622 | ++n; |
---|
623 | if (n < 3) |
---|
624 | return -2; |
---|
625 | ch = t[i++]; |
---|
626 | if (ch < 0x80 || ch > 0xbf) |
---|
627 | { |
---|
628 | r->_errno = EILSEQ; |
---|
629 | return -1; |
---|
630 | } |
---|
631 | state->__count = 0; |
---|
632 | tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12) |
---|
633 | | (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6) |
---|
634 | | (wchar_t)(ch & 0x3f); |
---|
635 | *pwc = tmp; |
---|
636 | return i; |
---|
637 | } |
---|
638 | if (ch >= 0xf0 && ch <= 0xf4) |
---|
639 | { |
---|
640 | /* four-byte sequence */ |
---|
641 | wint_t tmp; |
---|
642 | state->__value.__wchb[0] = ch; |
---|
643 | if (state->__count == 0) |
---|
644 | state->__count = 1; |
---|
645 | else if (n < (size_t)-1) |
---|
646 | ++n; |
---|
647 | if (n < 2) |
---|
648 | return -2; |
---|
649 | ch = (state->__count == 1) ? t[i++] : state->__value.__wchb[1]; |
---|
650 | if ((state->__value.__wchb[0] == 0xf0 && ch < 0x90) |
---|
651 | || (state->__value.__wchb[0] == 0xf4 && ch >= 0x90)) |
---|
652 | { |
---|
653 | /* overlong UTF-8 sequence or result is > 0x10ffff */ |
---|
654 | r->_errno = EILSEQ; |
---|
655 | return -1; |
---|
656 | } |
---|
657 | if (ch < 0x80 || ch > 0xbf) |
---|
658 | { |
---|
659 | r->_errno = EILSEQ; |
---|
660 | return -1; |
---|
661 | } |
---|
662 | state->__value.__wchb[1] = ch; |
---|
663 | if (state->__count == 1) |
---|
664 | state->__count = 2; |
---|
665 | else if (n < (size_t)-1) |
---|
666 | ++n; |
---|
667 | if (n < 3) |
---|
668 | return -2; |
---|
669 | ch = (state->__count == 2) ? t[i++] : state->__value.__wchb[2]; |
---|
670 | if (ch < 0x80 || ch > 0xbf) |
---|
671 | { |
---|
672 | r->_errno = EILSEQ; |
---|
673 | return -1; |
---|
674 | } |
---|
675 | state->__value.__wchb[2] = ch; |
---|
676 | if (state->__count == 2) |
---|
677 | state->__count = 3; |
---|
678 | else if (n < (size_t)-1) |
---|
679 | ++n; |
---|
680 | if (state->__count == 3 && sizeof(wchar_t) == 2) |
---|
681 | { |
---|
682 | /* On systems which have wchar_t being UTF-16 values, the value |
---|
683 | doesn't fit into a single wchar_t in this case. So what we |
---|
684 | do here is to store the state with a special value of __count |
---|
685 | and return the first half of a surrogate pair. The first |
---|
686 | three bytes of a UTF-8 sequence are enough to generate the |
---|
687 | first half of a UTF-16 surrogate pair. As return value we |
---|
688 | choose to return the number of bytes actually read up to |
---|
689 | here. |
---|
690 | The second half of the surrogate pair is returned in case we |
---|
691 | recognize the special __count value of four, and the next |
---|
692 | byte is actually a valid value. See below. */ |
---|
693 | tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18) |
---|
694 | | (wint_t)((state->__value.__wchb[1] & 0x3f) << 12) |
---|
695 | | (wint_t)((state->__value.__wchb[2] & 0x3f) << 6); |
---|
696 | state->__count = 4; |
---|
697 | *pwc = 0xd800 | ((tmp - 0x10000) >> 10); |
---|
698 | return i; |
---|
699 | } |
---|
700 | if (n < 4) |
---|
701 | return -2; |
---|
702 | ch = t[i++]; |
---|
703 | if (ch < 0x80 || ch > 0xbf) |
---|
704 | { |
---|
705 | r->_errno = EILSEQ; |
---|
706 | return -1; |
---|
707 | } |
---|
708 | tmp = (wint_t)((state->__value.__wchb[0] & 0x07) << 18) |
---|
709 | | (wint_t)((state->__value.__wchb[1] & 0x3f) << 12) |
---|
710 | | (wint_t)((state->__value.__wchb[2] & 0x3f) << 6) |
---|
711 | | (wint_t)(ch & 0x3f); |
---|
712 | if (state->__count == 4 && sizeof(wchar_t) == 2) |
---|
713 | /* Create the second half of the surrogate pair for systems with |
---|
714 | wchar_t == UTF-16 . */ |
---|
715 | *pwc = 0xdc00 | (tmp & 0x3ff); |
---|
716 | else |
---|
717 | *pwc = tmp; |
---|
718 | state->__count = 0; |
---|
719 | return i; |
---|
720 | } |
---|
721 | |
---|
722 | r->_errno = EILSEQ; |
---|
723 | return -1; |
---|
724 | } |
---|
725 | |
---|
726 | /* Cygwin defines its own doublebyte charset conversion functions |
---|
727 | because the underlying OS requires wchar_t == UTF-16. */ |
---|
728 | #ifndef __CYGWIN__ |
---|
729 | int |
---|
730 | __sjis_mbtowc (struct _reent *r, |
---|
731 | wchar_t *pwc, |
---|
732 | const char *s, |
---|
733 | size_t n, |
---|
734 | mbstate_t *state) |
---|
735 | { |
---|
736 | wchar_t dummy; |
---|
737 | unsigned char *t = (unsigned char *)s; |
---|
738 | int ch; |
---|
739 | int i = 0; |
---|
740 | |
---|
741 | if (pwc == NULL) |
---|
742 | pwc = &dummy; |
---|
743 | |
---|
744 | if (s == NULL) |
---|
745 | return 0; /* not state-dependent */ |
---|
746 | |
---|
747 | if (n == 0) |
---|
748 | return -2; |
---|
749 | |
---|
750 | ch = t[i++]; |
---|
751 | if (state->__count == 0) |
---|
752 | { |
---|
753 | if (_issjis1 (ch)) |
---|
754 | { |
---|
755 | state->__value.__wchb[0] = ch; |
---|
756 | state->__count = 1; |
---|
757 | if (n <= 1) |
---|
758 | return -2; |
---|
759 | ch = t[i++]; |
---|
760 | } |
---|
761 | } |
---|
762 | if (state->__count == 1) |
---|
763 | { |
---|
764 | if (_issjis2 (ch)) |
---|
765 | { |
---|
766 | *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; |
---|
767 | state->__count = 0; |
---|
768 | return i; |
---|
769 | } |
---|
770 | else |
---|
771 | { |
---|
772 | r->_errno = EILSEQ; |
---|
773 | return -1; |
---|
774 | } |
---|
775 | } |
---|
776 | |
---|
777 | *pwc = (wchar_t)*t; |
---|
778 | |
---|
779 | if (*t == '\0') |
---|
780 | return 0; |
---|
781 | |
---|
782 | return 1; |
---|
783 | } |
---|
784 | |
---|
785 | int |
---|
786 | __eucjp_mbtowc (struct _reent *r, |
---|
787 | wchar_t *pwc, |
---|
788 | const char *s, |
---|
789 | size_t n, |
---|
790 | mbstate_t *state) |
---|
791 | { |
---|
792 | wchar_t dummy; |
---|
793 | unsigned char *t = (unsigned char *)s; |
---|
794 | int ch; |
---|
795 | int i = 0; |
---|
796 | |
---|
797 | if (pwc == NULL) |
---|
798 | pwc = &dummy; |
---|
799 | |
---|
800 | if (s == NULL) |
---|
801 | return 0; |
---|
802 | |
---|
803 | if (n == 0) |
---|
804 | return -2; |
---|
805 | |
---|
806 | ch = t[i++]; |
---|
807 | if (state->__count == 0) |
---|
808 | { |
---|
809 | if (_iseucjp1 (ch)) |
---|
810 | { |
---|
811 | state->__value.__wchb[0] = ch; |
---|
812 | state->__count = 1; |
---|
813 | if (n <= 1) |
---|
814 | return -2; |
---|
815 | ch = t[i++]; |
---|
816 | } |
---|
817 | } |
---|
818 | if (state->__count == 1) |
---|
819 | { |
---|
820 | if (_iseucjp2 (ch)) |
---|
821 | { |
---|
822 | if (state->__value.__wchb[0] == 0x8f) |
---|
823 | { |
---|
824 | state->__value.__wchb[1] = ch; |
---|
825 | state->__count = 2; |
---|
826 | if (n <= i) |
---|
827 | return -2; |
---|
828 | ch = t[i++]; |
---|
829 | } |
---|
830 | else |
---|
831 | { |
---|
832 | *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)ch; |
---|
833 | state->__count = 0; |
---|
834 | return i; |
---|
835 | } |
---|
836 | } |
---|
837 | else |
---|
838 | { |
---|
839 | r->_errno = EILSEQ; |
---|
840 | return -1; |
---|
841 | } |
---|
842 | } |
---|
843 | if (state->__count == 2) |
---|
844 | { |
---|
845 | if (_iseucjp2 (ch)) |
---|
846 | { |
---|
847 | *pwc = (((wchar_t)state->__value.__wchb[1]) << 8) |
---|
848 | + (wchar_t)(ch & 0x7f); |
---|
849 | state->__count = 0; |
---|
850 | return i; |
---|
851 | } |
---|
852 | else |
---|
853 | { |
---|
854 | r->_errno = EILSEQ; |
---|
855 | return -1; |
---|
856 | } |
---|
857 | } |
---|
858 | |
---|
859 | *pwc = (wchar_t)*t; |
---|
860 | |
---|
861 | if (*t == '\0') |
---|
862 | return 0; |
---|
863 | |
---|
864 | return 1; |
---|
865 | } |
---|
866 | |
---|
867 | int |
---|
868 | __jis_mbtowc (struct _reent *r, |
---|
869 | wchar_t *pwc, |
---|
870 | const char *s, |
---|
871 | size_t n, |
---|
872 | mbstate_t *state) |
---|
873 | { |
---|
874 | wchar_t dummy; |
---|
875 | unsigned char *t = (unsigned char *)s; |
---|
876 | JIS_STATE curr_state; |
---|
877 | JIS_ACTION action; |
---|
878 | JIS_CHAR_TYPE ch; |
---|
879 | unsigned char *ptr; |
---|
880 | unsigned int i; |
---|
881 | int curr_ch; |
---|
882 | |
---|
883 | if (pwc == NULL) |
---|
884 | pwc = &dummy; |
---|
885 | |
---|
886 | if (s == NULL) |
---|
887 | { |
---|
888 | state->__state = ASCII; |
---|
889 | return 1; /* state-dependent */ |
---|
890 | } |
---|
891 | |
---|
892 | if (n == 0) |
---|
893 | return -2; |
---|
894 | |
---|
895 | curr_state = state->__state; |
---|
896 | ptr = t; |
---|
897 | |
---|
898 | for (i = 0; i < n; ++i) |
---|
899 | { |
---|
900 | curr_ch = t[i]; |
---|
901 | switch (curr_ch) |
---|
902 | { |
---|
903 | case ESC_CHAR: |
---|
904 | ch = ESCAPE; |
---|
905 | break; |
---|
906 | case '$': |
---|
907 | ch = DOLLAR; |
---|
908 | break; |
---|
909 | case '@': |
---|
910 | ch = AT; |
---|
911 | break; |
---|
912 | case '(': |
---|
913 | ch = BRACKET; |
---|
914 | break; |
---|
915 | case 'B': |
---|
916 | ch = B; |
---|
917 | break; |
---|
918 | case 'J': |
---|
919 | ch = J; |
---|
920 | break; |
---|
921 | case '\0': |
---|
922 | ch = NUL; |
---|
923 | break; |
---|
924 | default: |
---|
925 | if (_isjis (curr_ch)) |
---|
926 | ch = JIS_CHAR; |
---|
927 | else |
---|
928 | ch = OTHER; |
---|
929 | } |
---|
930 | |
---|
931 | action = JIS_action_table[curr_state][ch]; |
---|
932 | curr_state = JIS_state_table[curr_state][ch]; |
---|
933 | |
---|
934 | switch (action) |
---|
935 | { |
---|
936 | case NOOP: |
---|
937 | break; |
---|
938 | case EMPTY: |
---|
939 | state->__state = ASCII; |
---|
940 | *pwc = (wchar_t)0; |
---|
941 | return 0; |
---|
942 | case COPY_A: |
---|
943 | state->__state = ASCII; |
---|
944 | *pwc = (wchar_t)*ptr; |
---|
945 | return (i + 1); |
---|
946 | case COPY_J1: |
---|
947 | state->__value.__wchb[0] = t[i]; |
---|
948 | break; |
---|
949 | case COPY_J2: |
---|
950 | state->__state = JIS; |
---|
951 | *pwc = (((wchar_t)state->__value.__wchb[0]) << 8) + (wchar_t)(t[i]); |
---|
952 | return (i + 1); |
---|
953 | case MAKE_A: |
---|
954 | ptr = (unsigned char *)(t + i + 1); |
---|
955 | break; |
---|
956 | case ERROR: |
---|
957 | default: |
---|
958 | r->_errno = EILSEQ; |
---|
959 | return -1; |
---|
960 | } |
---|
961 | |
---|
962 | } |
---|
963 | |
---|
964 | state->__state = curr_state; |
---|
965 | return -2; /* n < bytes needed */ |
---|
966 | } |
---|
967 | #endif /* !__CYGWIN__*/ |
---|
968 | #endif /* _MB_CAPABLE */ |
---|