Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 /* -*- c-file-style: "gnu" -*- */ 00002 /* 00003 * This is a quick-and-dirty emulator of the nl_langinfo(CODESET) 00004 * function defined in the Single Unix Specification for those systems 00005 * (FreeBSD, etc.) that don't have one yet. It behaves as if it had 00006 * been called after setlocale(LC_CTYPE, ""), that is it looks at 00007 * the locale environment variables. 00008 * 00009 * http://www.opengroup.org/onlinepubs/7908799/xsh/langinfo.h.html 00010 * 00011 * Please extend it as needed and suggest improvements to the author. 00012 * This emulator will hopefully become redundant soon as 00013 * nl_langinfo(CODESET) becomes more widely implemented. 00014 * 00015 * Since the proposed Li18nux encoding name registry is still not mature, 00016 * the output follows the MIME registry where possible: 00017 * 00018 * http://www.iana.org/assignments/character-sets 00019 * 00020 * A possible autoconf test for the availability of nl_langinfo(CODESET) 00021 * can be found in 00022 * 00023 * http://www.cl.cam.ac.uk/~mgk25/unicode.html#activate 00024 * 00025 * Markus.Kuhn@cl.cam.ac.uk -- 2002-03-11 00026 * Permission to use, copy, modify, and distribute this software 00027 * for any purpose and without fee is hereby granted. The author 00028 * disclaims all warranties with regard to this software. 00029 * 00030 * Latest version: 00031 * 00032 * http://www.cl.cam.ac.uk/~mgk25/ucs/langinfo.c 00033 */ 00034 00035 #include "ruby/missing.h" 00036 #include <stdlib.h> 00037 #include <string.h> 00038 #if defined _WIN32 || defined __CYGWIN__ 00039 #include <windows.h> 00040 #if defined _WIN32 && !defined strncasecmp 00041 #define strncasecmp strnicmp 00042 #endif 00043 #endif 00044 #ifdef HAVE_LANGINFO_H 00045 #include "langinfo.h" 00046 #endif 00047 00048 #define C_CODESET "US-ASCII" /* Return this as the encoding of the 00049 * C/POSIX locale. Could as well one day 00050 * become "UTF-8". */ 00051 00052 #if defined _WIN32 || defined __CYGWIN__ 00053 #define JA_CODESET "Windows-31J" 00054 #else 00055 #define JA_CODESET "EUC-JP" 00056 #endif 00057 00058 #define digit(x) ((x) >= '0' && (x) <= '9') 00059 #define strstart(s, n) (strncasecmp((s), (n), strlen(n)) == 0) 00060 00061 static char buf[16]; 00062 00063 const char * 00064 nl_langinfo_codeset(void) 00065 { 00066 const char *l, *p; 00067 int n; 00068 00069 if (((l = getenv("LC_ALL")) && *l) || 00070 ((l = getenv("LC_CTYPE")) && *l) || 00071 ((l = getenv("LANG")) && *l)) { 00072 /* check standardized locales */ 00073 if (!strcmp(l, "C") || !strcmp(l, "POSIX")) 00074 return C_CODESET; 00075 /* check for encoding name fragment */ 00076 p = strchr(l, '.'); 00077 if (!p++) p = l; 00078 if (strstart(p, "UTF")) 00079 return "UTF-8"; 00080 if ((n = 5, strstart(p, "8859-")) || (n = 9, strstart(p, "ISO-8859-"))) { 00081 if (digit(p[n])) { 00082 p += n; 00083 memcpy(buf, "ISO-8859-\0\0", 12); 00084 buf[9] = *p++; 00085 if (digit(*p)) buf[10] = *p++; 00086 return buf; 00087 } 00088 } 00089 if (strstart(p, "KOI8-R")) return "KOI8-R"; 00090 if (strstart(p, "KOI8-U")) return "KOI8-U"; 00091 if (strstart(p, "620")) return "TIS-620"; 00092 if (strstart(p, "2312")) return "GB2312"; 00093 if (strstart(p, "HKSCS")) return "Big5HKSCS"; /* no MIME charset */ 00094 if (strstart(p, "BIG5")) return "Big5"; 00095 if (strstart(p, "GBK")) return "GBK"; /* no MIME charset */ 00096 if (strstart(p, "18030")) return "GB18030"; /* no MIME charset */ 00097 if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J"; 00098 /* check for conclusive modifier */ 00099 if (strstart(p, "euro")) return "ISO-8859-15"; 00100 /* check for language (and perhaps country) codes */ 00101 if (strstart(l, "zh_TW")) return "Big5"; 00102 if (strstart(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */ 00103 if (strstart(l, "zh")) return "GB2312"; 00104 if (strstart(l, "ja")) return JA_CODESET; 00105 if (strstart(l, "ko")) return "EUC-KR"; 00106 if (strstart(l, "ru")) return "KOI8-R"; 00107 if (strstart(l, "uk")) return "KOI8-U"; 00108 if (strstart(l, "pl") || strstart(l, "hr") || 00109 strstart(l, "hu") || strstart(l, "cs") || 00110 strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2"; 00111 if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3"; 00112 if (strstart(l, "el")) return "ISO-8859-7"; 00113 if (strstart(l, "he")) return "ISO-8859-8"; 00114 if (strstart(l, "tr")) return "ISO-8859-9"; 00115 if (strstart(l, "th")) return "TIS-620"; /* or ISO-8859-11 */ 00116 if (strstart(l, "lt")) return "ISO-8859-13"; 00117 if (strstart(l, "cy")) return "ISO-8859-14"; 00118 if (strstart(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */ 00119 if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8"; 00120 /* Send me further rules if you like, but don't forget that we are 00121 * *only* interested in locale naming conventions on platforms 00122 * that do not already provide an nl_langinfo(CODESET) implementation. */ 00123 } 00124 return NULL; 00125 } 00126 00127 #ifdef HAVE_LANGINFO_H 00128 char *nl_langinfo(nl_item item) 00129 { 00130 const char *codeset; 00131 if (item != CODESET) 00132 return NULL; 00133 codeset = nl_langinfo_codeset(); 00134 if (!codeset) codeset = C_CODESET; 00135 return (char *)codeset; 00136 } 00137 #endif 00138 00139 /* For a demo, compile with "gcc -W -Wall -o langinfo -D TEST langinfo.c" */ 00140 00141 #ifdef TEST 00142 #include <stdio.h> 00143 int main() 00144 { 00145 printf("%s\n", nl_langinfo(CODESET)); 00146 return 0; 00147 } 00148 #endif 00149