Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

locale.c @ 690

Last change on this file since 690 was 444, checked in by satin@…, 6 years ago
add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc
File size: 27.3 KB

Line
1	/*
2	FUNCTION
3	<<setlocale>>, <<localeconv>>---select or query locale
4
5	INDEX
6	setlocale
7	INDEX
8	localeconv
9	INDEX
10	_setlocale_r
11	INDEX
12	_localeconv_r
13
14	SYNOPSIS
15	#include <locale.h>
16	char setlocale(int <[category]>, const char <[locale]>);
17	lconv *localeconv(void);
18
19	char _setlocale_r(void <[reent]>,
20	int <[category]>, const char *<[locale]>);
21	lconv _localeconv_r(void <[reent]>);
22
23	DESCRIPTION
24	<<setlocale>> is the facility defined by ANSI C to condition the
25	execution environment for international collating and formatting
26	information; <<localeconv>> reports on the settings of the current
27	locale.
28
29	This is a minimal implementation, supporting only the required <<"POSIX">>
30	and <<"C">> values for <[locale]>; strings representing other locales are not
31	honored unless _MB_CAPABLE is defined.
32
33	If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
34	the form
35
36	language[_TERRITORY][.charset][@@modifier]
37
38	<<"language">> is a two character string per ISO 639, or, if not available
39	for a given language, a three character string per ISO 639-3.
40	<<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
41	<<"modifier">> see below.
42
43	Additionally to the POSIX specifier, the following extension is supported
44	for backward compatibility with older implementations using newlib:
45	<<"C-charset">>.
46	Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
47	to specify language neutral locales while using other charsets than ASCII,
48	for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
49	but uses the UTF-8 charset.
50
51	The following charsets are recognized:
52	<<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
53	<<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">> with
54	1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852, 855,
55	857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255, 1256,
56	1257, 1258].
57
58	Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
59	are equivalent. Charset names with dashes can also be written without
60	dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
61	<<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
62
63	Full support for all of the above charsets requires that newlib has been
64	build with multibyte support and support for all ISO and Windows Codepage.
65	Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
66	only newlib for Cygwin is built with full charset support by default.
67	Under Cygwin, this implementation additionally supports the charsets
68	<<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and <<"Big5">>. Cygwin
69	does not support <<"JIS">>.
70
71	Cygwin additionally supports locales from the file
72	/usr/share/locale/locale.alias.
73
74	(<<"">> is also accepted; if given, the settings are read from the
75	corresponding LC_* environment variables and $LANG according to POSIX rules.)
76
77	This implementation also supports the modifier <<"cjknarrow">>, which
78	affects how the functions <<wcwidth>> and <<wcswidth>> handle characters
79	from the "CJK Ambiguous Width" category of characters described at
80	http://www.unicode.org/reports/tr11/#Ambiguous. These characters have a width
81	of 1 for singlebyte charsets and a width of 2 for multibyte charsets
82	other than UTF-8. For UTF-8, their width depends on the language specifier:
83	it is 2 for <<"zh">> (Chinese), <<"ja">> (Japanese), and <<"ko">> (Korean),
84	and 1 for everything else. Specifying <<"cjknarrow">> forces a width of 1,
85	independent of charset and language.
86
87	If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
88	pointer to the string representing the current locale. The acceptable
89	values for <[category]> are defined in `<<locale.h>>' as macros
90	beginning with <<"LC_">>.
91
92	<<localeconv>> returns a pointer to a structure (also defined in
93	`<<locale.h>>') describing the locale-specific conventions currently
94	in effect.
95
96	<<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
97	<<localeconv>> and <<setlocale>> respectively. The extra argument
98	<[reent]> is a pointer to a reentrancy structure.
99
100	RETURNS
101	A successful call to <<setlocale>> returns a pointer to a string
102	associated with the specified category for the new locale. The string
103	returned by <<setlocale>> is such that a subsequent call using that
104	string will restore that category (or all categories in case of LC_ALL),
105	to that state. The application shall not modify the string returned
106	which may be overwritten by a subsequent call to <<setlocale>>.
107	On error, <<setlocale>> returns <<NULL>>.
108
109	<<localeconv>> returns a pointer to a structure of type <<lconv>>,
110	which describes the formatting and collating conventions in effect (in
111	this implementation, always those of the C locale).
112
113	PORTABILITY
114	ANSI C requires <<setlocale>>, but the only locale required across all
115	implementations is the C locale.
116
117	NOTES
118	There is no ISO-8859-12 codepage. It's also refused by this implementation.
119
120	No supporting OS subroutines are required.
121	*/
122
123	/* Parts of this code are originally taken from FreeBSD. */
124	/*
125	* Copyright (c) 1996 - 2002 FreeBSD Project
126	* Copyright (c) 1991, 1993
127	* The Regents of the University of California. All rights reserved.
128	*
129	* This code is derived from software contributed to Berkeley by
130	* Paul Borman at Krystal Technologies.
131	*
132	* Redistribution and use in source and binary forms, with or without
133	* modification, are permitted provided that the following conditions
134	* are met:
135	* 1. Redistributions of source code must retain the above copyright
136	* notice, this list of conditions and the following disclaimer.
137	* 2. Redistributions in binary form must reproduce the above copyright
138	* notice, this list of conditions and the following disclaimer in the
139	* documentation and/or other materials provided with the distribution.
140	* 4. Neither the name of the University nor the names of its contributors
141	* may be used to endorse or promote products derived from this software
142	* without specific prior written permission.
143	*
144	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
145	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
146	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
147	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
148	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
149	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
150	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
151	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
152	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
153	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
154	* SUCH DAMAGE.
155	*/
156
157	#include <newlib.h>
158	#include <errno.h>
159	#include <string.h>
160	#include <limits.h>
161	#include <reent.h>
162	#include <stdlib.h>
163	#include <wchar.h>
164	#include "setlocale.h"
165	#include "../ctype/ctype_.h"
166	#include "../stdlib/local.h"
167
168	#ifdef __CYGWIN__ /* Has to be kept available as exported symbol for
169	backward compatibility. Set it in setlocale, but
170	otherwise ignore it. Applications compiled after
171	2010 don't use it anymore. */
172	int __EXPORT __mb_cur_max = 6;
173	#endif
174
175	char *_PathLocale = NULL;
176
177	#ifdef _MB_CAPABLE
178	/*
179	* Category names for getenv()
180	*/
181	static char *categories[_LC_LAST] = {
182	"LC_ALL",
183	"LC_COLLATE",
184	"LC_CTYPE",
185	"LC_MONETARY",
186	"LC_NUMERIC",
187	"LC_TIME",
188	"LC_MESSAGES",
189	};
190	#endif /* _MB_CAPABLE */
191
192	/*
193	* Default locale per POSIX. Can be overridden on a per-target base.
194	*/
195	#ifndef DEFAULT_LOCALE
196	#define DEFAULT_LOCALE "C"
197	#endif
198
199	#ifdef _MB_CAPABLE
200	/*
201	* This variable can be changed by any outside mechanism. This allows,
202	* for instance, to load the default locale from a file.
203	*/
204	char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
205
206	const struct __locale_t __C_locale =
207	{
208	{ "C", "C", "C", "C", "C", "C", "C", },
209	__ascii_wctomb,
210	__ascii_mbtowc,
211	0,
212	DEFAULT_CTYPE_PTR,
213	{
214	".", "", "", "", "", "", "", "", "", "",
215	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
216	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
217	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
218	CHAR_MAX, CHAR_MAX
219	},
220	#ifndef __HAVE_LOCALE_INFO__
221	"\1",
222	"ASCII",
223	"ASCII",
224	#else /* __HAVE_LOCALE_INFO__ */
225	{
226	{ NULL, NULL }, /* LC_ALL */
227	#ifdef __CYGWIN__
228	{ &_C_collate_locale, NULL }, /* LC_COLLATE */
229	#else
230	{ NULL, NULL }, /* LC_COLLATE */
231	#endif
232	{ &_C_ctype_locale, NULL }, /* LC_CTYPE */
233	{ &_C_monetary_locale, NULL }, /* LC_MONETARY */
234	{ &_C_numeric_locale, NULL }, /* LC_NUMERIC */
235	{ &_C_time_locale, NULL }, /* LC_TIME */
236	{ &_C_messages_locale, NULL }, /* LC_MESSAGES */
237	},
238	#endif /* __HAVE_LOCALE_INFO__ */
239	};
240	#endif /* _MB_CAPABLE */
241
242	struct __locale_t __global_locale =
243	{
244	{ "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
245	#ifdef __CYGWIN__
246	__utf8_wctomb,
247	__utf8_mbtowc,
248	#else
249	__ascii_wctomb,
250	__ascii_mbtowc,
251	#endif
252	0,
253	DEFAULT_CTYPE_PTR,
254	{
255	".", "", "", "", "", "", "", "", "", "",
256	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
257	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
258	CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
259	CHAR_MAX, CHAR_MAX
260	},
261	#ifndef __HAVE_LOCALE_INFO__
262	"\1",
263	"ASCII",
264	"ASCII",
265	#else /* __HAVE_LOCALE_INFO__ */
266	{
267	{ NULL, NULL }, /* LC_ALL */
268	#ifdef __CYGWIN__
269	{ &_C_collate_locale, NULL }, /* LC_COLLATE */
270	#else
271	{ NULL, NULL }, /* LC_COLLATE */
272	#endif
273	{ &_C_ctype_locale, NULL }, /* LC_CTYPE */
274	{ &_C_monetary_locale, NULL }, /* LC_MONETARY */
275	{ &_C_numeric_locale, NULL }, /* LC_NUMERIC */
276	{ &_C_time_locale, NULL }, /* LC_TIME */
277	{ &_C_messages_locale, NULL }, /* LC_MESSAGES */
278	},
279	#endif /* __HAVE_LOCALE_INFO__ */
280	};
281
282	#ifdef _MB_CAPABLE
283	/* Renamed from current_locale_string to make clear this is only the
284	global string for setlocale (LC_ALL, NULL). There's no equivalent
285	functionality for uselocale. */
286	static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/"/"/ + 1)];
287	static char *currentlocale (void);
288
289	#endif /* _MB_CAPABLE */
290
291	char *
292	_setlocale_r (struct _reent *p,
293	int category,
294	const char *locale)
295	{
296	#ifndef _MB_CAPABLE
297	if (locale)
298	{
299	if (strcmp (locale, "POSIX") && strcmp (locale, "C")
300	&& strcmp (locale, ""))
301	return NULL;
302	}
303	return "C";
304	#else /* _MB_CAPABLE */
305	static char new_categories[_LC_LAST][ENCODING_LEN + 1];
306	static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
307	int i, j, len, saverr;
308	const char env, r;
309
310	if (category < LC_ALL \|\| category >= _LC_LAST)
311	{
312	p->_errno = EINVAL;
313	return NULL;
314	}
315
316	if (locale == NULL)
317	return category != LC_ALL ? __get_global_locale ()->categories[category]
318	: currentlocale();
319
320	/*
321	* Default to the current locale for everything.
322	*/
323	for (i = 1; i < _LC_LAST; ++i)
324	strcpy (new_categories[i], __get_global_locale ()->categories[i]);
325
326	/*
327	* Now go fill up new_categories from the locale argument
328	*/
329	if (!*locale)
330	{
331	if (category == LC_ALL)
332	{
333	for (i = 1; i < _LC_LAST; ++i)
334	{
335	env = __get_locale_env (p, i);
336	if (strlen (env) > ENCODING_LEN)
337	{
338	p->_errno = EINVAL;
339	return NULL;
340	}
341	strcpy (new_categories[i], env);
342	}
343	}
344	else
345	{
346	env = __get_locale_env (p, category);
347	if (strlen (env) > ENCODING_LEN)
348	{
349	p->_errno = EINVAL;
350	return NULL;
351	}
352	strcpy (new_categories[category], env);
353	}
354	}
355	else if (category != LC_ALL)
356	{
357	if (strlen (locale) > ENCODING_LEN)
358	{
359	p->_errno = EINVAL;
360	return NULL;
361	}
362	strcpy (new_categories[category], locale);
363	}
364	else
365	{
366	if ((r = strchr (locale, '/')) == NULL)
367	{
368	if (strlen (locale) > ENCODING_LEN)
369	{
370	p->_errno = EINVAL;
371	return NULL;
372	}
373	for (i = 1; i < _LC_LAST; ++i)
374	strcpy (new_categories[i], locale);
375	}
376	else
377	{
378	for (i = 1; r[1] == '/'; ++r)
379	;
380	if (!r[1])
381	{
382	p->_errno = EINVAL;
383	return NULL; /* Hmm, just slashes... */
384	}
385	do
386	{
387	if (i == _LC_LAST)
388	break; /* Too many slashes... */
389	if ((len = r - locale) > ENCODING_LEN)
390	{
391	p->_errno = EINVAL;
392	return NULL;
393	}
394	strlcpy (new_categories[i], locale, len + 1);
395	i++;
396	while (*r == '/')
397	r++;
398	locale = r;
399	while (r && r != '/')
400	r++;
401	}
402	while (*locale);
403	while (i < _LC_LAST)
404	{
405	strcpy (new_categories[i], new_categories[i-1]);
406	i++;
407	}
408	}
409	}
410
411	if (category != LC_ALL)
412	return __loadlocale (__get_global_locale (), category,
413	new_categories[category]);
414
415	for (i = 1; i < _LC_LAST; ++i)
416	{
417	strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
418	if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
419	{
420	saverr = p->_errno;
421	for (j = 1; j < i; j++)
422	{
423	strcpy (new_categories[j], saved_categories[j]);
424	if (__loadlocale (__get_global_locale (), j, new_categories[j])
425	== NULL)
426	{
427	strcpy (new_categories[j], "C");
428	__loadlocale (__get_global_locale (), j, new_categories[j]);
429	}
430	}
431	p->_errno = saverr;
432	return NULL;
433	}
434	}
435	return currentlocale ();
436	#endif /* _MB_CAPABLE */
437	}
438
439	#ifdef _MB_CAPABLE
440	static char *
441	currentlocale ()
442	{
443	int i;
444
445	strcpy (global_locale_string, __get_global_locale ()->categories[1]);
446
447	for (i = 2; i < _LC_LAST; ++i)
448	if (strcmp (__get_global_locale ()->categories[1],
449	__get_global_locale ()->categories[i]))
450	{
451	for (i = 2; i < _LC_LAST; ++i)
452	{
453	(void)strcat(global_locale_string, "/");
454	(void)strcat(global_locale_string,
455	__get_global_locale ()->categories[i]);
456	}
457	break;
458	}
459	return global_locale_string;
460	}
461
462	extern void __set_ctype (struct __locale_t , const char charset);
463
464	char *
465	__loadlocale (struct __locale_t loc, int category, const char new_locale)
466	{
467	/* At this point a full-featured system would just load the locale
468	specific data from the locale files.
469	What we do here for now is to check the incoming string for correctness.
470	The string must be in one of the allowed locale strings, either
471	one in POSIX-style, or one in the old newlib style to maintain
472	backward compatibility. If the local string is correct, the charset
473	is extracted and stored in ctype_codeset or message_charset
474	dependent on the cateogry. */
475	char *locale = NULL;
476	char charset[ENCODING_LEN + 1];
477	long val = 0;
478	char end, c = NULL;
479	int mbc_max;
480	wctomb_p l_wctomb;
481	mbtowc_p l_mbtowc;
482	int cjknarrow = 0;
483
484	/* Avoid doing everything twice if nothing has changed.
485
486	duplocale relies on this test to go wrong so the locale is actually
487	duplicated when required. Any change here has to be synced with a
488	matching change in duplocale. */
489	if (!strcmp (new_locale, loc->categories[category]))
490	return loc->categories[category];
491
492	#ifdef __CYGWIN__
493	/* This additional code handles the case that the incoming locale string
494	is not valid. If so, it calls the function __set_locale_from_locale_alias,
495	which is only available on Cygwin right now. The function reads the
496	file /usr/share/locale/locale.alias. The file contains locale aliases
497	and their replacement locale. For instance, the alias "french" is
498	translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
499	"th_TH.TIS-620". If successful, the function returns with a pointer
500	to the second argument, which is a buffer in which the replacement locale
501	gets stored. Otherwise the function returns NULL. */
502	char tmp_locale[ENCODING_LEN + 1];
503	int ret = 0;
504
505	restart:
506	if (!locale)
507	locale = (char *) new_locale;
508	else if (locale != tmp_locale)
509	{
510	locale = __set_locale_from_locale_alias (locale, tmp_locale);
511	if (!locale)
512	return NULL;
513	}
514	# define FAIL goto restart
515	#else
516	locale = new_locale;
517	# define FAIL return NULL
518	#endif
519
520	/* "POSIX" is translated to "C", as on Linux. */
521	if (!strcmp (locale, "POSIX"))
522	strcpy (locale, "C");
523	if (!strcmp (locale, "C")) /* Default "C" locale */
524	strcpy (charset, "ASCII");
525	else if (locale[0] == 'C'
526	&& (locale[1] == '-' /* Old newlib style */
527	\|\| locale[1] == '.')) /* Extension for the C locale to allow
528	specifying different charsets while
529	sticking to the C locale in terms
530	of sort order, etc. Proposed in
531	the Debian project. */
532	{
533	char *chp;
534
535	c = locale + 2;
536	strcpy (charset, c);
537	if ((chp = strchr (charset, '@')))
538	/* Strip off modifier */
539	*chp = '\0';
540	c += strlen (charset);
541	}
542	else /* POSIX style */
543	{
544	c = locale;
545
546	/* Don't use ctype macros here, they might be localized. */
547	/* Language */
548	if (c[0] < 'a' \|\| c[0] > 'z'
549	\|\| c[1] < 'a' \|\| c[1] > 'z')
550	FAIL;
551	c += 2;
552	/* Allow three character Language per ISO 639-3 */
553	if (c[0] >= 'a' && c[0] <= 'z')
554	++c;
555	if (c[0] == '_')
556	{
557	/* Territory */
558	++c;
559	if (c[0] < 'A' \|\| c[0] > 'Z'
560	\|\| c[1] < 'A' \|\| c[1] > 'Z')
561	FAIL;
562	c += 2;
563	}
564	if (c[0] == '.')
565	{
566	/* Charset */
567	char *chp;
568
569	++c;
570	strcpy (charset, c);
571	if ((chp = strchr (charset, '@')))
572	/* Strip off modifier */
573	*chp = '\0';
574	c += strlen (charset);
575	}
576	else if (c[0] == '\0' \|\| c[0] == '@')
577	/* End of string or just a modifier */
578	#ifdef __CYGWIN__
579	/* The Cygwin-only function __set_charset_from_locale checks
580	for the default charset which is connected to the given locale.
581	The function uses Windows functions in turn so it can't be easily
582	adapted to other targets. However, if any other target provides
583	equivalent functionality, preferrably using the same function name
584	it would be sufficient to change the guarding #ifdef. */
585	__set_charset_from_locale (locale, charset);
586	#else
587	strcpy (charset, "ISO-8859-1");
588	#endif
589	else
590	/* Invalid string */
591	FAIL;
592	}
593	if (c && c[0] == '@')
594	{
595	/* Modifier */
596	/* Only one modifier is recognized right now. "cjknarrow" is used
597	to modify the behaviour of wcwidth() for East Asian languages.
598	For details see the comment at the end of this function. */
599	if (!strcmp (c + 1, "cjknarrow"))
600	cjknarrow = 1;
601	}
602	/* We only support this subset of charsets. */
603	switch (charset[0])
604	{
605	case 'U':
606	case 'u':
607	if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
608	FAIL;
609	strcpy (charset, "UTF-8");
610	mbc_max = 6;
611	l_wctomb = __utf8_wctomb;
612	l_mbtowc = __utf8_mbtowc;
613	break;
614	#ifndef __CYGWIN__
615	/* Cygwin does not support JIS at all. */
616	case 'J':
617	case 'j':
618	if (strcasecmp (charset, "JIS"))
619	FAIL;
620	strcpy (charset, "JIS");
621	mbc_max = 8;
622	l_wctomb = __jis_wctomb;
623	l_mbtowc = __jis_mbtowc;
624	break;
625	#endif /* !__CYGWIN__ */
626	case 'E':
627	case 'e':
628	if (strncasecmp (charset, "EUC", 3))
629	FAIL;
630	c = charset + 3;
631	if (*c == '-')
632	++c;
633	if (!strcasecmp (c, "JP"))
634	{
635	strcpy (charset, "EUCJP");
636	mbc_max = 3;
637	l_wctomb = __eucjp_wctomb;
638	l_mbtowc = __eucjp_mbtowc;
639	}
640	#ifdef __CYGWIN__
641	/* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
642	implementation requires Windows support. */
643	else if (!strcasecmp (c, "KR"))
644	{
645	strcpy (charset, "EUCKR");
646	mbc_max = 2;
647	l_wctomb = __kr_wctomb;
648	l_mbtowc = __kr_mbtowc;
649	}
650	else if (!strcasecmp (c, "CN"))
651	{
652	strcpy (charset, "EUCCN");
653	mbc_max = 2;
654	l_wctomb = __gbk_wctomb;
655	l_mbtowc = __gbk_mbtowc;
656	}
657	#endif /* __CYGWIN__ */
658	else
659	FAIL;
660	break;
661	case 'S':
662	case 's':
663	if (strcasecmp (charset, "SJIS"))
664	FAIL;
665	strcpy (charset, "SJIS");
666	mbc_max = 2;
667	l_wctomb = __sjis_wctomb;
668	l_mbtowc = __sjis_mbtowc;
669	break;
670	case 'I':
671	case 'i':
672	/* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
673	ISO-8859-12. This code also recognizes the aliases without dashes. */
674	if (strncasecmp (charset, "ISO", 3))
675	FAIL;
676	c = charset + 3;
677	if (*c == '-')
678	++c;
679	if (strncasecmp (c, "8859", 4))
680	FAIL;
681	c += 4;
682	if (*c == '-')
683	++c;
684	val = strtol (c, &end, 10);
685	if (val < 1 \|\| val > 16 \|\| val == 12 \|\| *end)
686	FAIL;
687	strcpy (charset, "ISO-8859-");
688	c = charset + 9;
689	if (val > 10)
690	*c++ = '1';
691	*c++ = val % 10 + '0';
692	*c = '\0';
693	mbc_max = 1;
694	#ifdef _MB_EXTENDED_CHARSETS_ISO
695	l_wctomb = __iso_wctomb (val);
696	l_mbtowc = __iso_mbtowc (val);
697	#else /* !_MB_EXTENDED_CHARSETS_ISO */
698	l_wctomb = __ascii_wctomb;
699	l_mbtowc = __ascii_mbtowc;
700	#endif /* _MB_EXTENDED_CHARSETS_ISO */
701	break;
702	case 'C':
703	case 'c':
704	if (charset[1] != 'P' && charset[1] != 'p')
705	FAIL;
706	strncpy (charset, "CP", 2);
707	val = strtol (charset + 2, &end, 10);
708	if (*end)
709	FAIL;
710	switch (val)
711	{
712	case 437:
713	case 720:
714	case 737:
715	case 775:
716	case 850:
717	case 852:
718	case 855:
719	case 857:
720	case 858:
721	case 862:
722	case 866:
723	case 874:
724	case 1125:
725	case 1250:
726	case 1251:
727	case 1252:
728	case 1253:
729	case 1254:
730	case 1255:
731	case 1256:
732	case 1257:
733	case 1258:
734	mbc_max = 1;
735	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
736	l_wctomb = __cp_wctomb (val);
737	l_mbtowc = __cp_mbtowc (val);
738	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
739	l_wctomb = __ascii_wctomb;
740	l_mbtowc = __ascii_mbtowc;
741	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
742	break;
743	case 932:
744	mbc_max = 2;
745	l_wctomb = __sjis_wctomb;
746	l_mbtowc = __sjis_mbtowc;
747	break;
748	default:
749	FAIL;
750	}
751	break;
752	case 'K':
753	case 'k':
754	/* KOI8-R, KOI8-U and the aliases without dash */
755	if (strncasecmp (charset, "KOI8", 4))
756	FAIL;
757	c = charset + 4;
758	if (*c == '-')
759	++c;
760	if (c == 'R' \|\| c == 'r')
761	{
762	val = 20866;
763	strcpy (charset, "CP20866");
764	}
765	else if (c == 'U' \|\| c == 'u')
766	{
767	val = 21866;
768	strcpy (charset, "CP21866");
769	}
770	else
771	FAIL;
772	mbc_max = 1;
773	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
774	l_wctomb = __cp_wctomb (val);
775	l_mbtowc = __cp_mbtowc (val);
776	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
777	l_wctomb = __ascii_wctomb;
778	l_mbtowc = __ascii_mbtowc;
779	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
780	break;
781	case 'A':
782	case 'a':
783	if (strcasecmp (charset, "ASCII"))
784	FAIL;
785	strcpy (charset, "ASCII");
786	mbc_max = 1;
787	l_wctomb = __ascii_wctomb;
788	l_mbtowc = __ascii_mbtowc;
789	break;
790	case 'G':
791	case 'g':
792	#ifdef __CYGWIN__
793	/* Newlib does not provide GBK/GB2312 and Cygwin's implementation
794	requires Windows support. */
795	if (!strcasecmp (charset, "GBK")
796	\|\| !strcasecmp (charset, "GB2312"))
797	{
798	strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
799	mbc_max = 2;
800	l_wctomb = __gbk_wctomb;
801	l_mbtowc = __gbk_mbtowc;
802	}
803	else
804	#endif /* __CYGWIN__ */
805	/* GEORGIAN-PS and the alias without dash */
806	if (!strncasecmp (charset, "GEORGIAN", 8))
807	{
808	c = charset + 8;
809	if (*c == '-')
810	++c;
811	if (strcasecmp (c, "PS"))
812	FAIL;
813	val = 101;
814	strcpy (charset, "CP101");
815	mbc_max = 1;
816	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
817	l_wctomb = __cp_wctomb (val);
818	l_mbtowc = __cp_mbtowc (val);
819	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
820	l_wctomb = __ascii_wctomb;
821	l_mbtowc = __ascii_mbtowc;
822	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
823	}
824	else
825	FAIL;
826	break;
827	case 'P':
828	case 'p':
829	/* PT154 */
830	if (strcasecmp (charset, "PT154"))
831	FAIL;
832	val = 102;
833	strcpy (charset, "CP102");
834	mbc_max = 1;
835	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
836	l_wctomb = __cp_wctomb (val);
837	l_mbtowc = __cp_mbtowc (val);
838	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
839	l_wctomb = __ascii_wctomb;
840	l_mbtowc = __ascii_mbtowc;
841	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
842	break;
843	case 'T':
844	case 't':
845	if (strncasecmp (charset, "TIS", 3))
846	FAIL;
847	c = charset + 3;
848	if (*c == '-')
849	++c;
850	if (strcasecmp (c, "620"))
851	FAIL;
852	val = 874;
853	strcpy (charset, "CP874");
854	mbc_max = 1;
855	#ifdef _MB_EXTENDED_CHARSETS_WINDOWS
856	l_wctomb = __cp_wctomb (val);
857	l_mbtowc = __cp_mbtowc (val);
858	#else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
859	l_wctomb = __ascii_wctomb;
860	l_mbtowc = __ascii_mbtowc;
861	#endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
862	break;
863	#ifdef __CYGWIN__
864	/* Newlib does not provide Big5 and Cygwin's implementation
865	requires Windows support. */
866	case 'B':
867	case 'b':
868	if (strcasecmp (charset, "BIG5"))
869	FAIL;
870	strcpy (charset, "BIG5");
871	mbc_max = 2;
872	l_wctomb = __big5_wctomb;
873	l_mbtowc = __big5_mbtowc;
874	break;
875	#endif /* __CYGWIN__ */
876	default:
877	FAIL;
878	}
879	switch (category)
880	{
881	case LC_CTYPE:
882	#ifndef __HAVE_LOCALE_INFO__
883	strcpy (loc->ctype_codeset, charset);
884	loc->mb_cur_max[0] = mbc_max;
885	#endif
886	#ifdef __CYGWIN__
887	__mb_cur_max = mbc_max; /* Only for backward compat */
888	#endif
889	loc->wctomb = l_wctomb;
890	loc->mbtowc = l_mbtowc;
891	__set_ctype (loc, charset);
892	/* Determine the width for the "CJK Ambiguous Width" category of
893	characters. This is used in wcwidth(). Assume single width for
894	single-byte charsets, and double width for multi-byte charsets
895	other than UTF-8. For UTF-8, use double width for the East Asian
896	languages ("ja", "ko", "zh"), and single width for everything else.
897	Single width can also be forced with the "@cjknarrow" modifier. */
898	loc->cjk_lang = !cjknarrow && mbc_max > 1
899	&& (charset[0] != 'U'
900	\|\| strncmp (locale, "ja", 2) == 0
901	\|\| strncmp (locale, "ko", 2) == 0
902	\|\| strncmp (locale, "zh", 2) == 0);
903	#ifdef __HAVE_LOCALE_INFO__
904	ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
905	mbc_max);
906	#endif /* __HAVE_LOCALE_INFO__ */
907	break;
908	case LC_MESSAGES:
909	#ifdef __HAVE_LOCALE_INFO__
910	ret = __messages_load_locale (loc, locale, (void *) l_wctomb, charset);
911	if (!ret)
912	#else
913	strcpy (loc->message_codeset, charset);
914	#endif /* __HAVE_LOCALE_INFO__ */
915	break;
916	#ifdef __HAVE_LOCALE_INFO__
917	#ifdef __CYGWIN__
918	/* Right now only Cygwin supports a __collate_load_locale function at all. */
919	case LC_COLLATE:
920	ret = __collate_load_locale (loc, locale, (void *) l_mbtowc, charset);
921	break;
922	#endif
923	case LC_MONETARY:
924	ret = __monetary_load_locale (loc, locale, (void *) l_wctomb, charset);
925	break;
926	case LC_NUMERIC:
927	ret = __numeric_load_locale (loc, locale, (void *) l_wctomb, charset);
928	break;
929	case LC_TIME:
930	ret = __time_load_locale (loc, locale, (void *) l_wctomb, charset);
931	break;
932	#endif /* __HAVE_LOCALE_INFO__ */
933	default:
934	break;
935	}
936	#ifdef __HAVE_LOCALE_INFO__
937	if (ret)
938	FAIL;
939	#endif /* __HAVE_LOCALE_INFO__ */
940	return strcpy(loc->categories[category], new_locale);
941	}
942
943	const char *
944	__get_locale_env (struct _reent *p, int category)
945	{
946	const char *env;
947
948	/* 1. check LC_ALL. */
949	env = _getenv_r (p, categories[0]);
950
951	/* 2. check LC_* */
952	if (env == NULL \|\| !*env)
953	env = _getenv_r (p, categories[category]);
954
955	/* 3. check LANG */
956	if (env == NULL \|\| !*env)
957	env = _getenv_r (p, "LANG");
958
959	/* 4. if none is set, fall to default locale */
960	if (env == NULL \|\| !*env)
961	env = __default_locale;
962
963	return env;
964	}
965	#endif /* _MB_CAPABLE */
966
967	int
968	__locale_mb_cur_max (void)
969	{
970	#ifdef __HAVE_LOCALE_INFO__
971	return __get_current_ctype_locale ()->mb_cur_max[0];
972	#else
973	return __get_current_locale ()->mb_cur_max[0];
974	#endif
975	}
976
977	const char *
978	__locale_ctype_ptr_l (struct __locale_t *locale)
979	{
980	return locale->ctype_ptr;
981	}
982
983	const char *
984	__locale_ctype_ptr (void)
985	{
986	return __get_current_locale ()->ctype_ptr;
987	}
988
989	#ifndef _REENT_ONLY
990
991	char *
992	setlocale (int category,
993	const char *locale)
994	{
995	return _setlocale_r (_REENT, category, locale);
996	}
997
998	#endif

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/libs/newlib/src/newlib/libc/locale/locale.c @ 690

Download in other formats: