Ruby  2.0.0p247(2013-06-27revision41674)
ext/nkf/nkf-utf8/nkf.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
00003  * Copyright (c) 1996-2010, The nkf Project.
00004  *
00005  * This software is provided 'as-is', without any express or implied
00006  * warranty. In no event will the authors be held liable for any damages
00007  * arising from the use of this software.
00008  *
00009  * Permission is granted to anyone to use this software for any purpose,
00010  * including commercial applications, and to alter it and redistribute it
00011  * freely, subject to the following restrictions:
00012  *
00013  * 1. The origin of this software must not be misrepresented; you must not
00014  * claim that you wrote the original software. If you use this software
00015  * in a product, an acknowledgment in the product documentation would be
00016  * appreciated but is not required.
00017  *
00018  * 2. Altered source versions must be plainly marked as such, and must not be
00019  * misrepresented as being the original software.
00020  *
00021  * 3. This notice may not be removed or altered from any source distribution.
00022  */
00023 #define NKF_VERSION "2.1.3"
00024 #define NKF_RELEASE_DATE "2012-11-22"
00025 #define COPY_RIGHT \
00026     "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
00027     "Copyright (C) 1996-2012, The nkf Project."
00028 
00029 #include "config.h"
00030 #include "nkf.h"
00031 #include "utf8tbl.h"
00032 #ifdef __WIN32__
00033 #include <windows.h>
00034 #include <locale.h>
00035 #endif
00036 #if defined(__OS2__)
00037 # define INCL_DOS
00038 # define INCL_DOSERRORS
00039 # include <os2.h>
00040 #endif
00041 #include <assert.h>
00042 
00043 
00044 /* state of output_mode and input_mode
00045 
00046    c2           0 means ASCII
00047    JIS_X_0201_1976_K
00048    ISO_8859_1
00049    JIS_X_0208
00050    EOF      all termination
00051    c1           32bit data
00052 
00053  */
00054 
00055 /* MIME ENCODE */
00056 
00057 #define         FIXED_MIME      7
00058 #define         STRICT_MIME     8
00059 
00060 /* byte order */
00061 enum byte_order {
00062     ENDIAN_BIG    = 1,
00063     ENDIAN_LITTLE = 2,
00064     ENDIAN_2143   = 3,
00065     ENDIAN_3412   = 4
00066 };
00067 
00068 /* ASCII CODE */
00069 
00070 #define         BS      0x08
00071 #define         TAB     0x09
00072 #define         LF      0x0a
00073 #define         CR      0x0d
00074 #define         ESC     0x1b
00075 #define         SP      0x20
00076 #define         DEL     0x7f
00077 #define         SI      0x0f
00078 #define         SO      0x0e
00079 #define         SS2     0x8e
00080 #define         SS3     0x8f
00081 #define         CRLF    0x0D0A
00082 
00083 
00084 /* encodings */
00085 
00086 enum nkf_encodings {
00087     ASCII,
00088     ISO_8859_1,
00089     ISO_2022_JP,
00090     CP50220,
00091     CP50221,
00092     CP50222,
00093     ISO_2022_JP_1,
00094     ISO_2022_JP_3,
00095     ISO_2022_JP_2004,
00096     SHIFT_JIS,
00097     WINDOWS_31J,
00098     CP10001,
00099     EUC_JP,
00100     EUCJP_NKF,
00101     CP51932,
00102     EUCJP_MS,
00103     EUCJP_ASCII,
00104     SHIFT_JISX0213,
00105     SHIFT_JIS_2004,
00106     EUC_JISX0213,
00107     EUC_JIS_2004,
00108     UTF_8,
00109     UTF_8N,
00110     UTF_8_BOM,
00111     UTF8_MAC,
00112     UTF_16,
00113     UTF_16BE,
00114     UTF_16BE_BOM,
00115     UTF_16LE,
00116     UTF_16LE_BOM,
00117     UTF_32,
00118     UTF_32BE,
00119     UTF_32BE_BOM,
00120     UTF_32LE,
00121     UTF_32LE_BOM,
00122     BINARY,
00123     NKF_ENCODING_TABLE_SIZE,
00124     JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
00125     /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
00126     /* JIS_X_0208_1978   = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
00127     /* JIS_X_0208_1983   = 0x1087, */ /* B */ /* JIS C 6226-1983 */
00128     JIS_X_0208        = 0x1168, /* @B */
00129     JIS_X_0212        = 0x1159, /* D */
00130     /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
00131     JIS_X_0213_2 = 0x1229, /* P */
00132     JIS_X_0213_1 = 0x1233 /* Q */
00133 };
00134 
00135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
00138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
00139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
00140 static void j_oconv(nkf_char c2, nkf_char c1);
00141 static void s_oconv(nkf_char c2, nkf_char c1);
00142 static void e_oconv(nkf_char c2, nkf_char c1);
00143 static void w_oconv(nkf_char c2, nkf_char c1);
00144 static void w_oconv16(nkf_char c2, nkf_char c1);
00145 static void w_oconv32(nkf_char c2, nkf_char c1);
00146 
00147 typedef struct {
00148     const char *name;
00149     nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
00150     void (*oconv)(nkf_char c2, nkf_char c1);
00151 } nkf_native_encoding;
00152 
00153 nkf_native_encoding NkfEncodingASCII =          { "ASCII", e_iconv, e_oconv };
00154 nkf_native_encoding NkfEncodingISO_2022_JP =    { "ISO-2022-JP", e_iconv, j_oconv };
00155 nkf_native_encoding NkfEncodingShift_JIS =      { "Shift_JIS", s_iconv, s_oconv };
00156 nkf_native_encoding NkfEncodingEUC_JP =         { "EUC-JP", e_iconv, e_oconv };
00157 nkf_native_encoding NkfEncodingUTF_8 =          { "UTF-8", w_iconv, w_oconv };
00158 nkf_native_encoding NkfEncodingUTF_16 =         { "UTF-16", w_iconv16, w_oconv16 };
00159 nkf_native_encoding NkfEncodingUTF_32 =         { "UTF-32", w_iconv32, w_oconv32 };
00160 
00161 typedef struct {
00162     const int id;
00163     const char *name;
00164     const nkf_native_encoding *base_encoding;
00165 } nkf_encoding;
00166 
00167 nkf_encoding nkf_encoding_table[] = {
00168     {ASCII,             "US-ASCII",             &NkfEncodingASCII},
00169     {ISO_8859_1,        "ISO-8859-1",           &NkfEncodingASCII},
00170     {ISO_2022_JP,       "ISO-2022-JP",          &NkfEncodingISO_2022_JP},
00171     {CP50220,           "CP50220",              &NkfEncodingISO_2022_JP},
00172     {CP50221,           "CP50221",              &NkfEncodingISO_2022_JP},
00173     {CP50222,           "CP50222",              &NkfEncodingISO_2022_JP},
00174     {ISO_2022_JP_1,     "ISO-2022-JP-1",        &NkfEncodingISO_2022_JP},
00175     {ISO_2022_JP_3,     "ISO-2022-JP-3",        &NkfEncodingISO_2022_JP},
00176     {ISO_2022_JP_2004,  "ISO-2022-JP-2004",     &NkfEncodingISO_2022_JP},
00177     {SHIFT_JIS,         "Shift_JIS",            &NkfEncodingShift_JIS},
00178     {WINDOWS_31J,       "Windows-31J",          &NkfEncodingShift_JIS},
00179     {CP10001,           "CP10001",              &NkfEncodingShift_JIS},
00180     {EUC_JP,            "EUC-JP",               &NkfEncodingEUC_JP},
00181     {EUCJP_NKF,         "eucJP-nkf",            &NkfEncodingEUC_JP},
00182     {CP51932,           "CP51932",              &NkfEncodingEUC_JP},
00183     {EUCJP_MS,          "eucJP-MS",             &NkfEncodingEUC_JP},
00184     {EUCJP_ASCII,       "eucJP-ASCII",          &NkfEncodingEUC_JP},
00185     {SHIFT_JISX0213,    "Shift_JISX0213",       &NkfEncodingShift_JIS},
00186     {SHIFT_JIS_2004,    "Shift_JIS-2004",       &NkfEncodingShift_JIS},
00187     {EUC_JISX0213,      "EUC-JISX0213",         &NkfEncodingEUC_JP},
00188     {EUC_JIS_2004,      "EUC-JIS-2004",         &NkfEncodingEUC_JP},
00189     {UTF_8,             "UTF-8",                &NkfEncodingUTF_8},
00190     {UTF_8N,            "UTF-8N",               &NkfEncodingUTF_8},
00191     {UTF_8_BOM,         "UTF-8-BOM",            &NkfEncodingUTF_8},
00192     {UTF8_MAC,          "UTF8-MAC",             &NkfEncodingUTF_8},
00193     {UTF_16,            "UTF-16",               &NkfEncodingUTF_16},
00194     {UTF_16BE,          "UTF-16BE",             &NkfEncodingUTF_16},
00195     {UTF_16BE_BOM,      "UTF-16BE-BOM",         &NkfEncodingUTF_16},
00196     {UTF_16LE,          "UTF-16LE",             &NkfEncodingUTF_16},
00197     {UTF_16LE_BOM,      "UTF-16LE-BOM",         &NkfEncodingUTF_16},
00198     {UTF_32,            "UTF-32",               &NkfEncodingUTF_32},
00199     {UTF_32BE,          "UTF-32BE",             &NkfEncodingUTF_32},
00200     {UTF_32BE_BOM,      "UTF-32BE-BOM",         &NkfEncodingUTF_32},
00201     {UTF_32LE,          "UTF-32LE",             &NkfEncodingUTF_32},
00202     {UTF_32LE_BOM,      "UTF-32LE-BOM",         &NkfEncodingUTF_32},
00203     {BINARY,            "BINARY",               &NkfEncodingASCII},
00204     {-1,                NULL,                   NULL}
00205 };
00206 
00207 struct {
00208     const char *name;
00209     const int id;
00210 } encoding_name_to_id_table[] = {
00211     {"US-ASCII",                ASCII},
00212     {"ASCII",                   ASCII},
00213     {"646",                     ASCII},
00214     {"ROMAN8",                  ASCII},
00215     {"ISO-2022-JP",             ISO_2022_JP},
00216     {"ISO2022JP-CP932",         CP50220},
00217     {"CP50220",                 CP50220},
00218     {"CP50221",                 CP50221},
00219     {"CSISO2022JP",             CP50221},
00220     {"CP50222",                 CP50222},
00221     {"ISO-2022-JP-1",           ISO_2022_JP_1},
00222     {"ISO-2022-JP-3",           ISO_2022_JP_3},
00223     {"ISO-2022-JP-2004",        ISO_2022_JP_2004},
00224     {"SHIFT_JIS",               SHIFT_JIS},
00225     {"SJIS",                    SHIFT_JIS},
00226     {"MS_Kanji",                SHIFT_JIS},
00227     {"PCK",                     SHIFT_JIS},
00228     {"WINDOWS-31J",             WINDOWS_31J},
00229     {"CSWINDOWS31J",            WINDOWS_31J},
00230     {"CP932",                   WINDOWS_31J},
00231     {"MS932",                   WINDOWS_31J},
00232     {"CP10001",                 CP10001},
00233     {"EUCJP",                   EUC_JP},
00234     {"EUC-JP",                  EUC_JP},
00235     {"EUCJP-NKF",               EUCJP_NKF},
00236     {"CP51932",                 CP51932},
00237     {"EUC-JP-MS",               EUCJP_MS},
00238     {"EUCJP-MS",                EUCJP_MS},
00239     {"EUCJPMS",                 EUCJP_MS},
00240     {"EUC-JP-ASCII",            EUCJP_ASCII},
00241     {"EUCJP-ASCII",             EUCJP_ASCII},
00242     {"SHIFT_JISX0213",          SHIFT_JISX0213},
00243     {"SHIFT_JIS-2004",          SHIFT_JIS_2004},
00244     {"EUC-JISX0213",            EUC_JISX0213},
00245     {"EUC-JIS-2004",            EUC_JIS_2004},
00246     {"UTF-8",                   UTF_8},
00247     {"UTF-8N",                  UTF_8N},
00248     {"UTF-8-BOM",               UTF_8_BOM},
00249     {"UTF8-MAC",                UTF8_MAC},
00250     {"UTF-8-MAC",               UTF8_MAC},
00251     {"UTF-16",                  UTF_16},
00252     {"UTF-16BE",                UTF_16BE},
00253     {"UTF-16BE-BOM",            UTF_16BE_BOM},
00254     {"UTF-16LE",                UTF_16LE},
00255     {"UTF-16LE-BOM",            UTF_16LE_BOM},
00256     {"UTF-32",                  UTF_32},
00257     {"UTF-32BE",                UTF_32BE},
00258     {"UTF-32BE-BOM",            UTF_32BE_BOM},
00259     {"UTF-32LE",                UTF_32LE},
00260     {"UTF-32LE-BOM",            UTF_32LE_BOM},
00261     {"BINARY",                  BINARY},
00262     {NULL,                      -1}
00263 };
00264 
00265 #if defined(DEFAULT_CODE_JIS)
00266 #define     DEFAULT_ENCIDX ISO_2022_JP
00267 #elif defined(DEFAULT_CODE_SJIS)
00268 #define     DEFAULT_ENCIDX SHIFT_JIS
00269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
00270 #define     DEFAULT_ENCIDX WINDOWS_31J
00271 #elif defined(DEFAULT_CODE_EUC)
00272 #define     DEFAULT_ENCIDX EUC_JP
00273 #elif defined(DEFAULT_CODE_UTF8)
00274 #define     DEFAULT_ENCIDX UTF_8
00275 #endif
00276 
00277 
00278 #define         is_alnum(c)  \
00279     (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
00280 
00281 /* I don't trust portablity of toupper */
00282 #define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
00283 #define nkf_isoctal(c)  ('0'<=c && c<='7')
00284 #define nkf_isdigit(c)  ('0'<=c && c<='9')
00285 #define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
00286 #define nkf_isblank(c) (c == SP || c == TAB)
00287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
00288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
00289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
00290 #define nkf_isprint(c) (SP<=c && c<='~')
00291 #define nkf_isgraph(c) ('!'<=c && c<='~')
00292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
00293                     ('A'<=c&&c<='F') ? (c-'A'+10) : \
00294                     ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
00295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
00296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
00297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
00298                               ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
00299                                && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
00300 
00301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
00302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
00303 
00304 #define         HOLD_SIZE       1024
00305 #if defined(INT_IS_SHORT)
00306 #define         IOBUF_SIZE      2048
00307 #else
00308 #define         IOBUF_SIZE      16384
00309 #endif
00310 
00311 #define         DEFAULT_J       'B'
00312 #define         DEFAULT_R       'B'
00313 
00314 
00315 #define         GETA1   0x22
00316 #define         GETA2   0x2e
00317 
00318 
00319 /* MIME preprocessor */
00320 
00321 #ifdef EASYWIN /*Easy Win */
00322 extern POINT _BufferSize;
00323 #endif
00324 
00325 struct input_code{
00326     const char *name;
00327     nkf_char stat;
00328     nkf_char score;
00329     nkf_char index;
00330     nkf_char buf[3];
00331     void (*status_func)(struct input_code *, nkf_char);
00332     nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
00333     int _file_stat;
00334 };
00335 
00336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
00337 static nkf_encoding *input_encoding = NULL;
00338 static nkf_encoding *output_encoding = NULL;
00339 
00340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
00341 /* UCS Mapping
00342  * 0: Shift_JIS, eucJP-ascii
00343  * 1: eucJP-ms
00344  * 2: CP932, CP51932
00345  * 3: CP10001
00346  */
00347 #define UCS_MAP_ASCII   0
00348 #define UCS_MAP_MS      1
00349 #define UCS_MAP_CP932   2
00350 #define UCS_MAP_CP10001 3
00351 static int ms_ucs_map_f = UCS_MAP_ASCII;
00352 #endif
00353 #ifdef UTF8_INPUT_ENABLE
00354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
00355 static  int     no_cp932ext_f = FALSE;
00356 /* ignore ZERO WIDTH NO-BREAK SPACE */
00357 static  int     no_best_fit_chars_f = FALSE;
00358 static  int     input_endian = ENDIAN_BIG;
00359 static  int     input_bom_f = FALSE;
00360 static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
00361 static  void    (*encode_fallback)(nkf_char c) = NULL;
00362 static  void    w_status(struct input_code *, nkf_char);
00363 #endif
00364 #ifdef UTF8_OUTPUT_ENABLE
00365 static  int     output_bom_f = FALSE;
00366 static  int     output_endian = ENDIAN_BIG;
00367 #endif
00368 
00369 static  void    std_putc(nkf_char c);
00370 static  nkf_char     std_getc(FILE *f);
00371 static  nkf_char     std_ungetc(nkf_char c,FILE *f);
00372 
00373 static  nkf_char     broken_getc(FILE *f);
00374 static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
00375 
00376 static  nkf_char     mime_getc(FILE *f);
00377 
00378 static void mime_putc(nkf_char c);
00379 
00380 /* buffers */
00381 
00382 #if !defined(PERL_XS) && !defined(WIN32DLL)
00383 static unsigned char   stdibuf[IOBUF_SIZE];
00384 static unsigned char   stdobuf[IOBUF_SIZE];
00385 #endif
00386 
00387 #define NKF_UNSPECIFIED (-TRUE)
00388 
00389 /* flags */
00390 static int             unbuf_f = FALSE;
00391 static int             estab_f = FALSE;
00392 static int             nop_f = FALSE;
00393 static int             binmode_f = TRUE;       /* binary mode */
00394 static int             rot_f = FALSE;          /* rot14/43 mode */
00395 static int             hira_f = FALSE;          /* hira/kata henkan */
00396 static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
00397 static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
00398 static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
00399 static int             mimebuf_f = FALSE;      /* MIME buffered input */
00400 static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
00401 static int             iso8859_f = FALSE;      /* ISO8859 through */
00402 static int             mimeout_f = FALSE;       /* base64 mode */
00403 static int             x0201_f = NKF_UNSPECIFIED;   /* convert JIS X 0201 */
00404 static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
00405 
00406 #ifdef UNICODE_NORMALIZATION
00407 static int nfc_f = FALSE;
00408 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
00409 static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
00410 #endif
00411 
00412 #ifdef INPUT_OPTION
00413 static int cap_f = FALSE;
00414 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
00415 static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
00416 
00417 static int url_f = FALSE;
00418 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
00419 static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
00420 #endif
00421 
00422 #define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
00423 #define CLASS_MASK      NKF_INT32_C(0xFF000000)
00424 #define CLASS_UNICODE   NKF_INT32_C(0x01000000)
00425 #define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
00426 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
00427 #define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
00428 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
00429 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
00430 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
00431 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
00432 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
00433 
00434 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
00435 
00436 #ifdef NUMCHAR_OPTION
00437 static int numchar_f = FALSE;
00438 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
00439 static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
00440 #endif
00441 
00442 #ifdef CHECK_OPTION
00443 static int noout_f = FALSE;
00444 static void no_putc(nkf_char c);
00445 static int debug_f = FALSE;
00446 static void debug(const char *str);
00447 static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
00448 #endif
00449 
00450 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
00451 static  void    set_input_codename(const char *codename);
00452 
00453 #ifdef EXEC_IO
00454 static int exec_f = 0;
00455 #endif
00456 
00457 #ifdef SHIFTJIS_CP932
00458 /* invert IBM extended characters to others */
00459 static int cp51932_f = FALSE;
00460 
00461 /* invert NEC-selected IBM extended characters to IBM extended characters */
00462 static int cp932inv_f = TRUE;
00463 
00464 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
00465 #endif /* SHIFTJIS_CP932 */
00466 
00467 static int x0212_f = FALSE;
00468 static int x0213_f = FALSE;
00469 
00470 static unsigned char prefix_table[256];
00471 
00472 static void e_status(struct input_code *, nkf_char);
00473 static void s_status(struct input_code *, nkf_char);
00474 
00475 struct input_code input_code_list[] = {
00476     {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
00477     {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
00478 #ifdef UTF8_INPUT_ENABLE
00479     {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
00480     {"UTF-16",     0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
00481     {"UTF-32",     0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
00482 #endif
00483     {NULL,        0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
00484 };
00485 
00486 static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
00487 static int              base64_count = 0;
00488 
00489 /* X0208 -> ASCII converter */
00490 
00491 /* fold parameter */
00492 static int             f_line = 0;    /* chars in line */
00493 static int             f_prev = 0;
00494 static int             fold_preserve_f = FALSE; /* preserve new lines */
00495 static int             fold_f  = FALSE;
00496 static int             fold_len  = 0;
00497 
00498 /* options */
00499 static unsigned char   kanji_intro = DEFAULT_J;
00500 static unsigned char   ascii_intro = DEFAULT_R;
00501 
00502 /* Folding */
00503 
00504 #define FOLD_MARGIN  10
00505 #define DEFAULT_FOLD 60
00506 
00507 static int             fold_margin  = FOLD_MARGIN;
00508 
00509 /* process default */
00510 
00511 static nkf_char
00512 no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
00513 {
00514     fprintf(stderr,"nkf internal module connection failure.\n");
00515     exit(EXIT_FAILURE);
00516     return 0; /* LINT */
00517 }
00518 
00519 static void
00520 no_connection(nkf_char c2, nkf_char c1)
00521 {
00522     no_connection2(c2,c1,0);
00523 }
00524 
00525 static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
00526 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
00527 
00528 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
00529 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
00530 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
00531 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
00532 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
00533 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
00534 static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
00535 
00536 /* static redirections */
00537 
00538 static  void   (*o_putc)(nkf_char c) = std_putc;
00539 
00540 static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
00541 static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
00542 
00543 static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
00544 static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
00545 
00546 static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
00547 
00548 static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
00549 static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
00550 
00551 /* for strict mime */
00552 static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
00553 static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
00554 
00555 /* Global states */
00556 static int output_mode = ASCII;    /* output kanji mode */
00557 static int input_mode =  ASCII;    /* input kanji mode */
00558 static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
00559 
00560 /* X0201 / X0208 conversion tables */
00561 
00562 /* X0201 kana conversion table */
00563 /* 90-9F A0-DF */
00564 static const unsigned char cv[]= {
00565     0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
00566     0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
00567     0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
00568     0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
00569     0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
00570     0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
00571     0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
00572     0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
00573     0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
00574     0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
00575     0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
00576     0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
00577     0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
00578     0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
00579     0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
00580     0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
00581     0x00,0x00};
00582 
00583 
00584 /* X0201 kana conversion table for daguten */
00585 /* 90-9F A0-DF */
00586 static const unsigned char dv[]= {
00587     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00588     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00589     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00590     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00591     0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
00592     0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
00593     0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
00594     0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
00595     0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
00596     0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
00597     0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
00598     0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
00599     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00600     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00601     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00602     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00603     0x00,0x00};
00604 
00605 /* X0201 kana conversion table for han-daguten */
00606 /* 90-9F A0-DF */
00607 static const unsigned char ev[]= {
00608     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00609     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00610     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00611     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00612     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00613     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00614     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00615     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00616     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00617     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00618     0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
00619     0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
00620     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00621     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00622     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00623     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00624     0x00,0x00};
00625 
00626 /* X0201 kana to X0213 conversion table for han-daguten */
00627 /* 90-9F A0-DF */
00628 static const unsigned char ev_x0213[]= {
00629     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00630     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00631     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00632     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00633     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00634     0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
00635     0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
00636     0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
00637     0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
00638     0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
00639     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00640     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00641     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00642     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00643     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00644     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00645     0x00,0x00};
00646 
00647 
00648 /* X0208 kigou conversion table */
00649 /* 0x8140 - 0x819e */
00650 static const unsigned char fv[] = {
00651 
00652     0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
00653     0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
00654     0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
00655     0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
00656     0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
00657     0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
00658     0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
00659     0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
00660     0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
00661     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
00662     0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
00663     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
00664 } ;
00665 
00666 
00667 
00668 static int option_mode = 0;
00669 static int             file_out_f = FALSE;
00670 #ifdef OVERWRITE
00671 static int             overwrite_f = FALSE;
00672 static int             preserve_time_f = FALSE;
00673 static int             backup_f = FALSE;
00674 static char            *backup_suffix = "";
00675 #endif
00676 
00677 static int eolmode_f = 0;   /* CR, LF, CRLF */
00678 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
00679 static nkf_char prev_cr = 0; /* CR or 0 */
00680 #ifdef EASYWIN /*Easy Win */
00681 static int             end_check;
00682 #endif /*Easy Win */
00683 
00684 static void *
00685 nkf_xmalloc(size_t size)
00686 {
00687     void *ptr;
00688 
00689     if (size == 0) size = 1;
00690 
00691     ptr = malloc(size);
00692     if (ptr == NULL) {
00693         perror("can't malloc");
00694         exit(EXIT_FAILURE);
00695     }
00696 
00697     return ptr;
00698 }
00699 
00700 static void *
00701 nkf_xrealloc(void *ptr, size_t size)
00702 {
00703     if (size == 0) size = 1;
00704 
00705     ptr = realloc(ptr, size);
00706     if (ptr == NULL) {
00707         perror("can't realloc");
00708         exit(EXIT_FAILURE);
00709     }
00710 
00711     return ptr;
00712 }
00713 
00714 #define nkf_xfree(ptr) free(ptr)
00715 
00716 static int
00717 nkf_str_caseeql(const char *src, const char *target)
00718 {
00719     int i;
00720     for (i = 0; src[i] && target[i]; i++) {
00721         if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
00722     }
00723     if (src[i] || target[i]) return FALSE;
00724     else return TRUE;
00725 }
00726 
00727 static nkf_encoding*
00728 nkf_enc_from_index(int idx)
00729 {
00730     if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
00731         return 0;
00732     }
00733     return &nkf_encoding_table[idx];
00734 }
00735 
00736 static int
00737 nkf_enc_find_index(const char *name)
00738 {
00739     int i;
00740     if (name[0] == 'X' && *(name+1) == '-') name += 2;
00741     for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
00742         if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
00743             return encoding_name_to_id_table[i].id;
00744         }
00745     }
00746     return -1;
00747 }
00748 
00749 static nkf_encoding*
00750 nkf_enc_find(const char *name)
00751 {
00752     int idx = -1;
00753     idx = nkf_enc_find_index(name);
00754     if (idx < 0) return 0;
00755     return nkf_enc_from_index(idx);
00756 }
00757 
00758 #define nkf_enc_name(enc) (enc)->name
00759 #define nkf_enc_to_index(enc) (enc)->id
00760 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
00761 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
00762 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
00763 #define nkf_enc_asciicompat(enc) (\
00764                                   nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
00765                                   nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
00766 #define nkf_enc_unicode_p(enc) (\
00767                                 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
00768                                 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
00769                                 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
00770 #define nkf_enc_cp5022x_p(enc) (\
00771                                 nkf_enc_to_index(enc) == CP50220 ||\
00772                                 nkf_enc_to_index(enc) == CP50221 ||\
00773                                 nkf_enc_to_index(enc) == CP50222)
00774 
00775 #ifdef DEFAULT_CODE_LOCALE
00776 static const char*
00777 nkf_locale_charmap()
00778 {
00779 #ifdef HAVE_LANGINFO_H
00780     return nl_langinfo(CODESET);
00781 #elif defined(__WIN32__)
00782     static char buf[16];
00783     sprintf(buf, "CP%d", GetACP());
00784     return buf;
00785 #elif defined(__OS2__)
00786 # if defined(INT_IS_SHORT)
00787     /* OS/2 1.x */
00788     return NULL;
00789 # else
00790     /* OS/2 32bit */
00791     static char buf[16];
00792     ULONG ulCP[1], ulncp;
00793     DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
00794     if (ulCP[0] == 932 || ulCP[0] == 943)
00795         strcpy(buf, "Shift_JIS");
00796     else
00797         sprintf(buf, "CP%lu", ulCP[0]);
00798     return buf;
00799 # endif
00800 #endif
00801     return NULL;
00802 }
00803 
00804 static nkf_encoding*
00805 nkf_locale_encoding()
00806 {
00807     nkf_encoding *enc = 0;
00808     const char *encname = nkf_locale_charmap();
00809     if (encname)
00810         enc = nkf_enc_find(encname);
00811     return enc;
00812 }
00813 #endif /* DEFAULT_CODE_LOCALE */
00814 
00815 static nkf_encoding*
00816 nkf_utf8_encoding()
00817 {
00818     return &nkf_encoding_table[UTF_8];
00819 }
00820 
00821 static nkf_encoding*
00822 nkf_default_encoding()
00823 {
00824     nkf_encoding *enc = 0;
00825 #ifdef DEFAULT_CODE_LOCALE
00826     enc = nkf_locale_encoding();
00827 #elif defined(DEFAULT_ENCIDX)
00828     enc = nkf_enc_from_index(DEFAULT_ENCIDX);
00829 #endif
00830     if (!enc) enc = nkf_utf8_encoding();
00831     return enc;
00832 }
00833 
00834 typedef struct {
00835     long capa;
00836     long len;
00837     nkf_char *ptr;
00838 } nkf_buf_t;
00839 
00840 static nkf_buf_t *
00841 nkf_buf_new(int length)
00842 {
00843     nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
00844     buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
00845     buf->capa = length;
00846     buf->len = 0;
00847     return buf;
00848 }
00849 
00850 #if 0
00851 static void
00852 nkf_buf_dispose(nkf_buf_t *buf)
00853 {
00854     nkf_xfree(buf->ptr);
00855     nkf_xfree(buf);
00856 }
00857 #endif
00858 
00859 #define nkf_buf_length(buf) ((buf)->len)
00860 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
00861 
00862 static nkf_char
00863 nkf_buf_at(nkf_buf_t *buf, int index)
00864 {
00865     assert(index <= buf->len);
00866     return buf->ptr[index];
00867 }
00868 
00869 static void
00870 nkf_buf_clear(nkf_buf_t *buf)
00871 {
00872     buf->len = 0;
00873 }
00874 
00875 static void
00876 nkf_buf_push(nkf_buf_t *buf, nkf_char c)
00877 {
00878     if (buf->capa <= buf->len) {
00879         exit(EXIT_FAILURE);
00880     }
00881     buf->ptr[buf->len++] = c;
00882 }
00883 
00884 static nkf_char
00885 nkf_buf_pop(nkf_buf_t *buf)
00886 {
00887     assert(!nkf_buf_empty_p(buf));
00888     return buf->ptr[--buf->len];
00889 }
00890 
00891 /* Normalization Form C */
00892 #ifndef PERL_XS
00893 #ifdef WIN32DLL
00894 #define fprintf dllprintf
00895 #endif
00896 
00897 static void
00898 version(void)
00899 {
00900     fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
00901 }
00902 
00903 static void
00904 usage(void)
00905 {
00906     fprintf(HELP_OUTPUT,
00907             "Usage:  nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
00908 #ifdef UTF8_OUTPUT_ENABLE
00909             " j/s/e/w  Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00910             "          UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
00911 #else
00912 #endif
00913 #ifdef UTF8_INPUT_ENABLE
00914             " J/S/E/W  Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00915             "          UTF option is -W[8,[16,32][B,L]]\n"
00916 #else
00917             " J/S/E    Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
00918 #endif
00919             );
00920     fprintf(HELP_OUTPUT,
00921             " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
00922             " M[BQ]    MIME encode [B:base64 Q:quoted]\n"
00923             " f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
00924             );
00925     fprintf(HELP_OUTPUT,
00926             " Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n"
00927             "          1: Kankaku to one space  2: to two spaces  3: HTML Entity\n"
00928             "          4: JISX0208 Katakana to JISX0201 Katakana\n"
00929             " X,x      Convert Halfwidth Katakana to Fullwidth or preserve it\n"
00930             );
00931     fprintf(HELP_OUTPUT,
00932             " O        Output to File (DEFAULT 'nkf.out')\n"
00933             " L[uwm]   Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
00934             );
00935     fprintf(HELP_OUTPUT,
00936             " --ic=<encoding>        Specify the input encoding\n"
00937             " --oc=<encoding>        Specify the output encoding\n"
00938             " --hiragana --katakana  Hiragana/Katakana Conversion\n"
00939             " --katakana-hiragana    Converts each other\n"
00940             );
00941     fprintf(HELP_OUTPUT,
00942 #ifdef INPUT_OPTION
00943             " --{cap, url}-input     Convert hex after ':' or '%%'\n"
00944 #endif
00945 #ifdef NUMCHAR_OPTION
00946             " --numchar-input        Convert Unicode Character Reference\n"
00947 #endif
00948 #ifdef UTF8_INPUT_ENABLE
00949             " --fb-{skip, html, xml, perl, java, subchar}\n"
00950             "                        Specify unassigned character's replacement\n"
00951 #endif
00952             );
00953     fprintf(HELP_OUTPUT,
00954 #ifdef OVERWRITE
00955             " --in-place[=SUF]       Overwrite original files\n"
00956             " --overwrite[=SUF]      Preserve timestamp of original files\n"
00957 #endif
00958             " -g --guess             Guess the input code\n"
00959             " -v --version           Print the version\n"
00960             " --help/-V              Print this help / configuration\n"
00961             );
00962     version();
00963 }
00964 
00965 static void
00966 show_configuration(void)
00967 {
00968     fprintf(HELP_OUTPUT,
00969             "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
00970             "  Compile-time options:\n"
00971             "    Compiled at:                 " __DATE__ " " __TIME__ "\n"
00972            );
00973     fprintf(HELP_OUTPUT,
00974             "    Default output encoding:     "
00975 #ifdef DEFAULT_CODE_LOCALE
00976             "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
00977 #elif defined(DEFAULT_ENCIDX)
00978             "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
00979 #else
00980             "NONE\n"
00981 #endif
00982            );
00983     fprintf(HELP_OUTPUT,
00984             "    Default output end of line:  "
00985 #if DEFAULT_NEWLINE == CR
00986             "CR"
00987 #elif DEFAULT_NEWLINE == CRLF
00988             "CRLF"
00989 #else
00990             "LF"
00991 #endif
00992             "\n"
00993             "    Decode MIME encoded string:  "
00994 #if MIME_DECODE_DEFAULT
00995             "ON"
00996 #else
00997             "OFF"
00998 #endif
00999             "\n"
01000             "    Convert JIS X 0201 Katakana: "
01001 #if X0201_DEFAULT
01002             "ON"
01003 #else
01004             "OFF"
01005 #endif
01006             "\n"
01007             "    --help, --version output:    "
01008 #if HELP_OUTPUT_HELP_OUTPUT
01009             "HELP_OUTPUT"
01010 #else
01011             "STDOUT"
01012 #endif
01013             "\n");
01014 }
01015 #endif /*PERL_XS*/
01016 
01017 #ifdef OVERWRITE
01018 static char*
01019 get_backup_filename(const char *suffix, const char *filename)
01020 {
01021     char *backup_filename;
01022     int asterisk_count = 0;
01023     int i, j;
01024     int filename_length = strlen(filename);
01025 
01026     for(i = 0; suffix[i]; i++){
01027         if(suffix[i] == '*') asterisk_count++;
01028     }
01029 
01030     if(asterisk_count){
01031         backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
01032         for(i = 0, j = 0; suffix[i];){
01033             if(suffix[i] == '*'){
01034                 backup_filename[j] = '\0';
01035                 strncat(backup_filename, filename, filename_length);
01036                 i++;
01037                 j += filename_length;
01038             }else{
01039                 backup_filename[j++] = suffix[i++];
01040             }
01041         }
01042         backup_filename[j] = '\0';
01043     }else{
01044         j = filename_length + strlen(suffix);
01045         backup_filename = nkf_xmalloc(j + 1);
01046         strcpy(backup_filename, filename);
01047         strcat(backup_filename, suffix);
01048         backup_filename[j] = '\0';
01049     }
01050     return backup_filename;
01051 }
01052 #endif
01053 
01054 #ifdef UTF8_INPUT_ENABLE
01055 static void
01056 nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
01057 {
01058     int shift = 20;
01059     c &= VALUE_MASK;
01060     while(shift >= 0){
01061         if(c >= NKF_INT32_C(1)<<shift){
01062             while(shift >= 0){
01063                 (*f)(0, bin2hex(c>>shift));
01064                 shift -= 4;
01065             }
01066         }else{
01067             shift -= 4;
01068         }
01069     }
01070     return;
01071 }
01072 
01073 static void
01074 encode_fallback_html(nkf_char c)
01075 {
01076     (*oconv)(0, '&');
01077     (*oconv)(0, '#');
01078     c &= VALUE_MASK;
01079     if(c >= NKF_INT32_C(1000000))
01080         (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
01081     if(c >= NKF_INT32_C(100000))
01082         (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
01083     if(c >= 10000)
01084         (*oconv)(0, 0x30+(c/10000  )%10);
01085     if(c >= 1000)
01086         (*oconv)(0, 0x30+(c/1000   )%10);
01087     if(c >= 100)
01088         (*oconv)(0, 0x30+(c/100    )%10);
01089     if(c >= 10)
01090         (*oconv)(0, 0x30+(c/10     )%10);
01091     if(c >= 0)
01092         (*oconv)(0, 0x30+ c         %10);
01093     (*oconv)(0, ';');
01094     return;
01095 }
01096 
01097 static void
01098 encode_fallback_xml(nkf_char c)
01099 {
01100     (*oconv)(0, '&');
01101     (*oconv)(0, '#');
01102     (*oconv)(0, 'x');
01103     nkf_each_char_to_hex(oconv, c);
01104     (*oconv)(0, ';');
01105     return;
01106 }
01107 
01108 static void
01109 encode_fallback_java(nkf_char c)
01110 {
01111     (*oconv)(0, '\\');
01112     c &= VALUE_MASK;
01113     if(!nkf_char_unicode_bmp_p(c)){
01114         (*oconv)(0, 'U');
01115         (*oconv)(0, '0');
01116         (*oconv)(0, '0');
01117         (*oconv)(0, bin2hex(c>>20));
01118         (*oconv)(0, bin2hex(c>>16));
01119     }else{
01120         (*oconv)(0, 'u');
01121     }
01122     (*oconv)(0, bin2hex(c>>12));
01123     (*oconv)(0, bin2hex(c>> 8));
01124     (*oconv)(0, bin2hex(c>> 4));
01125     (*oconv)(0, bin2hex(c    ));
01126     return;
01127 }
01128 
01129 static void
01130 encode_fallback_perl(nkf_char c)
01131 {
01132     (*oconv)(0, '\\');
01133     (*oconv)(0, 'x');
01134     (*oconv)(0, '{');
01135     nkf_each_char_to_hex(oconv, c);
01136     (*oconv)(0, '}');
01137     return;
01138 }
01139 
01140 static void
01141 encode_fallback_subchar(nkf_char c)
01142 {
01143     c = unicode_subchar;
01144     (*oconv)((c>>8)&0xFF, c&0xFF);
01145     return;
01146 }
01147 #endif
01148 
01149 static const struct {
01150     const char *name;
01151     const char *alias;
01152 } long_option[] = {
01153     {"ic=", ""},
01154     {"oc=", ""},
01155     {"base64","jMB"},
01156     {"euc","e"},
01157     {"euc-input","E"},
01158     {"fj","jm"},
01159     {"help",""},
01160     {"jis","j"},
01161     {"jis-input","J"},
01162     {"mac","sLm"},
01163     {"mime","jM"},
01164     {"mime-input","m"},
01165     {"msdos","sLw"},
01166     {"sjis","s"},
01167     {"sjis-input","S"},
01168     {"unix","eLu"},
01169     {"version","v"},
01170     {"windows","sLw"},
01171     {"hiragana","h1"},
01172     {"katakana","h2"},
01173     {"katakana-hiragana","h3"},
01174     {"guess=", ""},
01175     {"guess", "g2"},
01176     {"cp932", ""},
01177     {"no-cp932", ""},
01178 #ifdef X0212_ENABLE
01179     {"x0212", ""},
01180 #endif
01181 #ifdef UTF8_OUTPUT_ENABLE
01182     {"utf8", "w"},
01183     {"utf16", "w16"},
01184     {"ms-ucs-map", ""},
01185     {"fb-skip", ""},
01186     {"fb-html", ""},
01187     {"fb-xml", ""},
01188     {"fb-perl", ""},
01189     {"fb-java", ""},
01190     {"fb-subchar", ""},
01191     {"fb-subchar=", ""},
01192 #endif
01193 #ifdef UTF8_INPUT_ENABLE
01194     {"utf8-input", "W"},
01195     {"utf16-input", "W16"},
01196     {"no-cp932ext", ""},
01197     {"no-best-fit-chars",""},
01198 #endif
01199 #ifdef UNICODE_NORMALIZATION
01200     {"utf8mac-input", ""},
01201 #endif
01202 #ifdef OVERWRITE
01203     {"overwrite", ""},
01204     {"overwrite=", ""},
01205     {"in-place", ""},
01206     {"in-place=", ""},
01207 #endif
01208 #ifdef INPUT_OPTION
01209     {"cap-input", ""},
01210     {"url-input", ""},
01211 #endif
01212 #ifdef NUMCHAR_OPTION
01213     {"numchar-input", ""},
01214 #endif
01215 #ifdef CHECK_OPTION
01216     {"no-output", ""},
01217     {"debug", ""},
01218 #endif
01219 #ifdef SHIFTJIS_CP932
01220     {"cp932inv", ""},
01221 #endif
01222 #ifdef EXEC_IO
01223     {"exec-in", ""},
01224     {"exec-out", ""},
01225 #endif
01226     {"prefix=", ""},
01227 };
01228 
01229 static void
01230 set_input_encoding(nkf_encoding *enc)
01231 {
01232     switch (nkf_enc_to_index(enc)) {
01233     case ISO_8859_1:
01234         iso8859_f = TRUE;
01235         break;
01236     case CP50221:
01237     case CP50222:
01238         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01239     case CP50220:
01240 #ifdef SHIFTJIS_CP932
01241         cp51932_f = TRUE;
01242 #endif
01243 #ifdef UTF8_OUTPUT_ENABLE
01244         ms_ucs_map_f = UCS_MAP_CP932;
01245 #endif
01246         break;
01247     case ISO_2022_JP_1:
01248         x0212_f = TRUE;
01249         break;
01250     case ISO_2022_JP_3:
01251         x0212_f = TRUE;
01252         x0213_f = TRUE;
01253         break;
01254     case ISO_2022_JP_2004:
01255         x0212_f = TRUE;
01256         x0213_f = TRUE;
01257         break;
01258     case SHIFT_JIS:
01259         break;
01260     case WINDOWS_31J:
01261         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01262 #ifdef SHIFTJIS_CP932
01263         cp51932_f = TRUE;
01264 #endif
01265 #ifdef UTF8_OUTPUT_ENABLE
01266         ms_ucs_map_f = UCS_MAP_CP932;
01267 #endif
01268         break;
01269         break;
01270     case CP10001:
01271 #ifdef SHIFTJIS_CP932
01272         cp51932_f = TRUE;
01273 #endif
01274 #ifdef UTF8_OUTPUT_ENABLE
01275         ms_ucs_map_f = UCS_MAP_CP10001;
01276 #endif
01277         break;
01278     case EUC_JP:
01279         break;
01280     case EUCJP_NKF:
01281         break;
01282     case CP51932:
01283         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01284 #ifdef SHIFTJIS_CP932
01285         cp51932_f = TRUE;
01286 #endif
01287 #ifdef UTF8_OUTPUT_ENABLE
01288         ms_ucs_map_f = UCS_MAP_CP932;
01289 #endif
01290         break;
01291     case EUCJP_MS:
01292         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01293 #ifdef SHIFTJIS_CP932
01294         cp51932_f = FALSE;
01295 #endif
01296 #ifdef UTF8_OUTPUT_ENABLE
01297         ms_ucs_map_f = UCS_MAP_MS;
01298 #endif
01299         break;
01300     case EUCJP_ASCII:
01301         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01302 #ifdef SHIFTJIS_CP932
01303         cp51932_f = FALSE;
01304 #endif
01305 #ifdef UTF8_OUTPUT_ENABLE
01306         ms_ucs_map_f = UCS_MAP_ASCII;
01307 #endif
01308         break;
01309     case SHIFT_JISX0213:
01310     case SHIFT_JIS_2004:
01311         x0213_f = TRUE;
01312 #ifdef SHIFTJIS_CP932
01313         cp51932_f = FALSE;
01314         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01315 #endif
01316         break;
01317     case EUC_JISX0213:
01318     case EUC_JIS_2004:
01319         x0213_f = TRUE;
01320 #ifdef SHIFTJIS_CP932
01321         cp51932_f = FALSE;
01322 #endif
01323         break;
01324 #ifdef UTF8_INPUT_ENABLE
01325 #ifdef UNICODE_NORMALIZATION
01326     case UTF8_MAC:
01327         nfc_f = TRUE;
01328         break;
01329 #endif
01330     case UTF_16:
01331     case UTF_16BE:
01332     case UTF_16BE_BOM:
01333         input_endian = ENDIAN_BIG;
01334         break;
01335     case UTF_16LE:
01336     case UTF_16LE_BOM:
01337         input_endian = ENDIAN_LITTLE;
01338         break;
01339     case UTF_32:
01340     case UTF_32BE:
01341     case UTF_32BE_BOM:
01342         input_endian = ENDIAN_BIG;
01343         break;
01344     case UTF_32LE:
01345     case UTF_32LE_BOM:
01346         input_endian = ENDIAN_LITTLE;
01347         break;
01348 #endif
01349     }
01350 }
01351 
01352 static void
01353 set_output_encoding(nkf_encoding *enc)
01354 {
01355     switch (nkf_enc_to_index(enc)) {
01356     case CP50220:
01357 #ifdef SHIFTJIS_CP932
01358         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01359 #endif
01360 #ifdef UTF8_OUTPUT_ENABLE
01361         ms_ucs_map_f = UCS_MAP_CP932;
01362 #endif
01363         break;
01364     case CP50221:
01365         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01366 #ifdef SHIFTJIS_CP932
01367         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01368 #endif
01369 #ifdef UTF8_OUTPUT_ENABLE
01370         ms_ucs_map_f = UCS_MAP_CP932;
01371 #endif
01372         break;
01373     case ISO_2022_JP:
01374 #ifdef SHIFTJIS_CP932
01375         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01376 #endif
01377         break;
01378     case ISO_2022_JP_1:
01379         x0212_f = TRUE;
01380 #ifdef SHIFTJIS_CP932
01381         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01382 #endif
01383         break;
01384     case ISO_2022_JP_3:
01385     case ISO_2022_JP_2004:
01386         x0212_f = TRUE;
01387         x0213_f = TRUE;
01388 #ifdef SHIFTJIS_CP932
01389         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01390 #endif
01391         break;
01392     case SHIFT_JIS:
01393         break;
01394     case WINDOWS_31J:
01395         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01396 #ifdef UTF8_OUTPUT_ENABLE
01397         ms_ucs_map_f = UCS_MAP_CP932;
01398 #endif
01399         break;
01400     case CP10001:
01401 #ifdef UTF8_OUTPUT_ENABLE
01402         ms_ucs_map_f = UCS_MAP_CP10001;
01403 #endif
01404         break;
01405     case EUC_JP:
01406         x0212_f = TRUE;
01407 #ifdef SHIFTJIS_CP932
01408         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01409 #endif
01410 #ifdef UTF8_OUTPUT_ENABLE
01411         ms_ucs_map_f = UCS_MAP_ASCII;
01412 #endif
01413         break;
01414     case EUCJP_NKF:
01415         x0212_f = FALSE;
01416 #ifdef SHIFTJIS_CP932
01417         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01418 #endif
01419 #ifdef UTF8_OUTPUT_ENABLE
01420         ms_ucs_map_f = UCS_MAP_ASCII;
01421 #endif
01422         break;
01423     case CP51932:
01424         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01425 #ifdef SHIFTJIS_CP932
01426         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01427 #endif
01428 #ifdef UTF8_OUTPUT_ENABLE
01429         ms_ucs_map_f = UCS_MAP_CP932;
01430 #endif
01431         break;
01432     case EUCJP_MS:
01433         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01434         x0212_f = TRUE;
01435 #ifdef UTF8_OUTPUT_ENABLE
01436         ms_ucs_map_f = UCS_MAP_MS;
01437 #endif
01438         break;
01439     case EUCJP_ASCII:
01440         if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;        /* -x specified implicitly */
01441         x0212_f = TRUE;
01442 #ifdef UTF8_OUTPUT_ENABLE
01443         ms_ucs_map_f = UCS_MAP_ASCII;
01444 #endif
01445         break;
01446     case SHIFT_JISX0213:
01447     case SHIFT_JIS_2004:
01448         x0213_f = TRUE;
01449 #ifdef SHIFTJIS_CP932
01450         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01451 #endif
01452         break;
01453     case EUC_JISX0213:
01454     case EUC_JIS_2004:
01455         x0212_f = TRUE;
01456         x0213_f = TRUE;
01457 #ifdef SHIFTJIS_CP932
01458         if (cp932inv_f == TRUE) cp932inv_f = FALSE;
01459 #endif
01460         break;
01461 #ifdef UTF8_OUTPUT_ENABLE
01462     case UTF_8_BOM:
01463         output_bom_f = TRUE;
01464         break;
01465     case UTF_16:
01466     case UTF_16BE_BOM:
01467         output_bom_f = TRUE;
01468         break;
01469     case UTF_16LE:
01470         output_endian = ENDIAN_LITTLE;
01471         output_bom_f = FALSE;
01472         break;
01473     case UTF_16LE_BOM:
01474         output_endian = ENDIAN_LITTLE;
01475         output_bom_f = TRUE;
01476         break;
01477     case UTF_32:
01478     case UTF_32BE_BOM:
01479         output_bom_f = TRUE;
01480         break;
01481     case UTF_32LE:
01482         output_endian = ENDIAN_LITTLE;
01483         output_bom_f = FALSE;
01484         break;
01485     case UTF_32LE_BOM:
01486         output_endian = ENDIAN_LITTLE;
01487         output_bom_f = TRUE;
01488         break;
01489 #endif
01490     }
01491 }
01492 
01493 static struct input_code*
01494 find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
01495 {
01496     if (iconv_func){
01497         struct input_code *p = input_code_list;
01498         while (p->name){
01499             if (iconv_func == p->iconv_func){
01500                 return p;
01501             }
01502             p++;
01503         }
01504     }
01505     return 0;
01506 }
01507 
01508 static void
01509 set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
01510 {
01511 #ifdef INPUT_CODE_FIX
01512     if (f || !input_encoding)
01513 #endif
01514         if (estab_f != f){
01515             estab_f = f;
01516         }
01517 
01518     if (iconv_func
01519 #ifdef INPUT_CODE_FIX
01520         && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
01521 #endif
01522        ){
01523         iconv = iconv_func;
01524     }
01525 #ifdef CHECK_OPTION
01526     if (estab_f && iconv_for_check != iconv){
01527         struct input_code *p = find_inputcode_byfunc(iconv);
01528         if (p){
01529             set_input_codename(p->name);
01530             debug(p->name);
01531         }
01532         iconv_for_check = iconv;
01533     }
01534 #endif
01535 }
01536 
01537 #ifdef X0212_ENABLE
01538 static nkf_char
01539 x0212_shift(nkf_char c)
01540 {
01541     nkf_char ret = c;
01542     c &= 0x7f;
01543     if (is_eucg3(ret)){
01544         if (0x75 <= c && c <= 0x7f){
01545             ret = c + (0x109 - 0x75);
01546         }
01547     }else{
01548         if (0x75 <= c && c <= 0x7f){
01549             ret = c + (0x113 - 0x75);
01550         }
01551     }
01552     return ret;
01553 }
01554 
01555 
01556 static nkf_char
01557 x0212_unshift(nkf_char c)
01558 {
01559     nkf_char ret = c;
01560     if (0x7f <= c && c <= 0x88){
01561         ret = c + (0x75 - 0x7f);
01562     }else if (0x89 <= c && c <= 0x92){
01563         ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
01564     }
01565     return ret;
01566 }
01567 #endif /* X0212_ENABLE */
01568 
01569 static int
01570 is_x0213_2_in_x0212(nkf_char c1)
01571 {
01572     static const char x0213_2_table[] =
01573         {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
01574     int ku = c1 - 0x20;
01575     if (ku <= 15)
01576         return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */
01577     if (78 <= ku && ku <= 94)
01578         return 1;
01579     return 0;
01580 }
01581 
01582 static nkf_char
01583 e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
01584 {
01585     nkf_char ndx;
01586     if (is_eucg3(c2)){
01587         ndx = c2 & 0x7f;
01588         if (x0213_f && is_x0213_2_in_x0212(ndx)){
01589             if((0x21 <= ndx && ndx <= 0x2F)){
01590                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
01591                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01592                 return 0;
01593             }else if(0x6E <= ndx && ndx <= 0x7E){
01594                 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
01595                 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01596                 return 0;
01597             }
01598             return 1;
01599         }
01600 #ifdef X0212_ENABLE
01601         else if(nkf_isgraph(ndx)){
01602             nkf_char val = 0;
01603             const unsigned short *ptr;
01604             ptr = x0212_shiftjis[ndx - 0x21];
01605             if (ptr){
01606                 val = ptr[(c1 & 0x7f) - 0x21];
01607             }
01608             if (val){
01609                 c2 = val >> 8;
01610                 c1 = val & 0xff;
01611                 if (p2) *p2 = c2;
01612                 if (p1) *p1 = c1;
01613                 return 0;
01614             }
01615             c2 = x0212_shift(c2);
01616         }
01617 #endif /* X0212_ENABLE */
01618     }
01619     if(0x7F < c2) return 1;
01620     if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
01621     if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
01622     return 0;
01623 }
01624 
01625 static nkf_char
01626 s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
01627 {
01628 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
01629     nkf_char val;
01630 #endif
01631     static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
01632     if (0xFC < c1) return 1;
01633 #ifdef SHIFTJIS_CP932
01634     if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
01635         val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
01636         if (val){
01637             c2 = val >> 8;
01638             c1 = val & 0xff;
01639         }
01640     }
01641     if (cp932inv_f
01642         && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
01643         val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
01644         if (val){
01645             c2 = val >> 8;
01646             c1 = val & 0xff;
01647         }
01648     }
01649 #endif /* SHIFTJIS_CP932 */
01650 #ifdef X0212_ENABLE
01651     if (!x0213_f && is_ibmext_in_sjis(c2)){
01652         val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
01653         if (val){
01654             if (val > 0x7FFF){
01655                 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
01656                 c1 = val & 0xff;
01657             }else{
01658                 c2 = val >> 8;
01659                 c1 = val & 0xff;
01660             }
01661             if (p2) *p2 = c2;
01662             if (p1) *p1 = c1;
01663             return 0;
01664         }
01665     }
01666 #endif
01667     if(c2 >= 0x80){
01668         if(x0213_f && c2 >= 0xF0){
01669             if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
01670                 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
01671             }else{ /* 78<=k<=94 */
01672                 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
01673                 if (0x9E < c1) c2++;
01674             }
01675         }else{
01676 #define         SJ0162  0x00e1          /* 01 - 62 ku offset */
01677 #define         SJ6394  0x0161          /* 63 - 94 ku offset */
01678             c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
01679             if (0x9E < c1) c2++;
01680         }
01681         if (c1 < 0x9F)
01682             c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
01683         else {
01684             c1 = c1 - 0x7E;
01685         }
01686     }
01687 
01688 #ifdef X0212_ENABLE
01689     c2 = x0212_unshift(c2);
01690 #endif
01691     if (p2) *p2 = c2;
01692     if (p1) *p1 = c1;
01693     return 0;
01694 }
01695 
01696 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
01697 static void
01698 nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
01699 {
01700     val &= VALUE_MASK;
01701     if (val < 0x80){
01702         *p1 = val;
01703         *p2 = 0;
01704         *p3 = 0;
01705         *p4 = 0;
01706     }else if (val < 0x800){
01707         *p1 = 0xc0 | (val >> 6);
01708         *p2 = 0x80 | (val & 0x3f);
01709         *p3 = 0;
01710         *p4 = 0;
01711     } else if (nkf_char_unicode_bmp_p(val)) {
01712         *p1 = 0xe0 |  (val >> 12);
01713         *p2 = 0x80 | ((val >>  6) & 0x3f);
01714         *p3 = 0x80 | ( val        & 0x3f);
01715         *p4 = 0;
01716     } else if (nkf_char_unicode_value_p(val)) {
01717         *p1 = 0xf0 |  (val >> 18);
01718         *p2 = 0x80 | ((val >> 12) & 0x3f);
01719         *p3 = 0x80 | ((val >>  6) & 0x3f);
01720         *p4 = 0x80 | ( val        & 0x3f);
01721     } else {
01722         *p1 = 0;
01723         *p2 = 0;
01724         *p3 = 0;
01725         *p4 = 0;
01726     }
01727 }
01728 
01729 static nkf_char
01730 nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
01731 {
01732     nkf_char wc;
01733     if (c1 <= 0x7F) {
01734         /* single byte */
01735         wc = c1;
01736     }
01737     else if (c1 <= 0xC1) {
01738         /* trail byte or invalid */
01739         return -1;
01740     }
01741     else if (c1 <= 0xDF) {
01742         /* 2 bytes */
01743         wc  = (c1 & 0x1F) << 6;
01744         wc |= (c2 & 0x3F);
01745     }
01746     else if (c1 <= 0xEF) {
01747         /* 3 bytes */
01748         wc  = (c1 & 0x0F) << 12;
01749         wc |= (c2 & 0x3F) << 6;
01750         wc |= (c3 & 0x3F);
01751     }
01752     else if (c2 <= 0xF4) {
01753         /* 4 bytes */
01754         wc  = (c1 & 0x0F) << 18;
01755         wc |= (c2 & 0x3F) << 12;
01756         wc |= (c3 & 0x3F) << 6;
01757         wc |= (c4 & 0x3F);
01758     }
01759     else {
01760         return -1;
01761     }
01762     return wc;
01763 }
01764 #endif
01765 
01766 #ifdef UTF8_INPUT_ENABLE
01767 static int
01768 unicode_to_jis_common2(nkf_char c1, nkf_char c0,
01769                        const unsigned short *const *pp, nkf_char psize,
01770                        nkf_char *p2, nkf_char *p1)
01771 {
01772     nkf_char c2;
01773     const unsigned short *p;
01774     unsigned short val;
01775 
01776     if (pp == 0) return 1;
01777 
01778     c1 -= 0x80;
01779     if (c1 < 0 || psize <= c1) return 1;
01780     p = pp[c1];
01781     if (p == 0)  return 1;
01782 
01783     c0 -= 0x80;
01784     if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
01785     val = p[c0];
01786     if (val == 0) return 1;
01787     if (no_cp932ext_f && (
01788                           (val>>8) == 0x2D || /* NEC special characters */
01789                           val > NKF_INT32_C(0xF300) /* IBM extended characters */
01790                          )) return 1;
01791 
01792     c2 = val >> 8;
01793     if (val > 0x7FFF){
01794         c2 &= 0x7f;
01795         c2 |= PREFIX_EUCG3;
01796     }
01797     if (c2 == SO) c2 = JIS_X_0201_1976_K;
01798     c1 = val & 0xFF;
01799     if (p2) *p2 = c2;
01800     if (p1) *p1 = c1;
01801     return 0;
01802 }
01803 
01804 static int
01805 unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
01806 {
01807     const unsigned short *const *pp;
01808     const unsigned short *const *const *ppp;
01809     static const char no_best_fit_chars_table_C2[] =
01810     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01811         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01812         1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
01813         0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
01814     static const char no_best_fit_chars_table_C2_ms[] =
01815     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01816         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01817         1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
01818         0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
01819     static const char no_best_fit_chars_table_932_C2[] =
01820     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01821         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01822         1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
01823         0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
01824     static const char no_best_fit_chars_table_932_C3[] =
01825     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01826         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
01827         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01828         1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
01829     nkf_char ret = 0;
01830 
01831     if(c2 < 0x80){
01832         *p2 = 0;
01833         *p1 = c2;
01834     }else if(c2 < 0xe0){
01835         if(no_best_fit_chars_f){
01836             if(ms_ucs_map_f == UCS_MAP_CP932){
01837                 switch(c2){
01838                 case 0xC2:
01839                     if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
01840                     break;
01841                 case 0xC3:
01842                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
01843                     break;
01844                 }
01845             }else if(!cp932inv_f){
01846                 switch(c2){
01847                 case 0xC2:
01848                     if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
01849                     break;
01850                 case 0xC3:
01851                     if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
01852                     break;
01853                 }
01854             }else if(ms_ucs_map_f == UCS_MAP_MS){
01855                 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
01856             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
01857                 switch(c2){
01858                 case 0xC2:
01859                     switch(c1){
01860                     case 0xA2:
01861                     case 0xA3:
01862                     case 0xA5:
01863                     case 0xA6:
01864                     case 0xAC:
01865                     case 0xAF:
01866                     case 0xB8:
01867                         return 1;
01868                     }
01869                     break;
01870                 }
01871             }
01872         }
01873         pp =
01874             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
01875             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
01876             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
01877             x0213_f ? utf8_to_euc_2bytes_x0213 :
01878             utf8_to_euc_2bytes;
01879         ret =  unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
01880     }else if(c0 < 0xF0){
01881         if(no_best_fit_chars_f){
01882             if(ms_ucs_map_f == UCS_MAP_CP932){
01883                 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
01884             }else if(ms_ucs_map_f == UCS_MAP_MS){
01885                 switch(c2){
01886                 case 0xE2:
01887                     switch(c1){
01888                     case 0x80:
01889                         if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
01890                         break;
01891                     case 0x88:
01892                         if(c0 == 0x92) return 1;
01893                         break;
01894                     }
01895                     break;
01896                 case 0xE3:
01897                     if(c1 == 0x80 || c0 == 0x9C) return 1;
01898                     break;
01899                 }
01900             }else if(ms_ucs_map_f == UCS_MAP_CP10001){
01901                 switch(c2){
01902                 case 0xE3:
01903                     switch(c1){
01904                     case 0x82:
01905                         if(c0 == 0x94) return 1;
01906                         break;
01907                     case 0x83:
01908                         if(c0 == 0xBB) return 1;
01909                         break;
01910                     }
01911                     break;
01912                 }
01913             }else{
01914                 switch(c2){
01915                 case 0xE2:
01916                     switch(c1){
01917                     case 0x80:
01918                         if(c0 == 0x95) return 1;
01919                         break;
01920                     case 0x88:
01921                         if(c0 == 0xA5) return 1;
01922                         break;
01923                     }
01924                     break;
01925                 case 0xEF:
01926                     switch(c1){
01927                     case 0xBC:
01928                         if(c0 == 0x8D) return 1;
01929                         break;
01930                     case 0xBD:
01931                         if(c0 == 0x9E && !cp932inv_f) return 1;
01932                         break;
01933                     case 0xBF:
01934                         if(0xA0 <= c0 && c0 <= 0xA5) return 1;
01935                         break;
01936                     }
01937                     break;
01938                 }
01939             }
01940         }
01941         ppp =
01942             ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
01943             ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
01944             ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
01945             x0213_f ? utf8_to_euc_3bytes_x0213 :
01946             utf8_to_euc_3bytes;
01947         ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
01948     }else return -1;
01949 #ifdef SHIFTJIS_CP932
01950     if (!ret && !cp932inv_f && is_eucg3(*p2)) {
01951         nkf_char s2, s1;
01952         if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
01953             s2e_conv(s2, s1, p2, p1);
01954         }else{
01955             ret = 1;
01956         }
01957     }
01958 #endif
01959     return ret;
01960 }
01961 
01962 #ifdef UTF8_OUTPUT_ENABLE
01963 #define X0213_SURROGATE_FIND(tbl, size, euc) do { \
01964         int i; \
01965         for (i = 0; i < size; i++) \
01966             if (tbl[i][0] == euc) { \
01967                 low = tbl[i][2]; \
01968                 break; \
01969             } \
01970     } while (0)
01971 
01972 static nkf_char
01973 e2w_conv(nkf_char c2, nkf_char c1)
01974 {
01975     const unsigned short *p;
01976 
01977     if (c2 == JIS_X_0201_1976_K) {
01978         if (ms_ucs_map_f == UCS_MAP_CP10001) {
01979             switch (c1) {
01980             case 0x20:
01981                 return 0xA0;
01982             case 0x7D:
01983                 return 0xA9;
01984             }
01985         }
01986         p = euc_to_utf8_1byte;
01987 #ifdef X0212_ENABLE
01988     } else if (is_eucg3(c2)){
01989         if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
01990             return 0xA6;
01991         }
01992         c2 = (c2&0x7f) - 0x21;
01993         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
01994             p =
01995                 x0213_f ? x0212_to_utf8_2bytes_x0213[c2] :
01996                 x0212_to_utf8_2bytes[c2];
01997         else
01998             return 0;
01999 #endif
02000     } else {
02001         c2 &= 0x7f;
02002         c2 = (c2&0x7f) - 0x21;
02003         if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
02004             p =
02005                 x0213_f ? euc_to_utf8_2bytes_x0213[c2] :
02006                 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
02007                 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
02008                 euc_to_utf8_2bytes_ms[c2];
02009         else
02010             return 0;
02011     }
02012     if (!p) return 0;
02013     c1 = (c1 & 0x7f) - 0x21;
02014     if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
02015         nkf_char val = p[c1];
02016         if (x0213_f && 0xD800<=val && val<=0xDBFF) {
02017             nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
02018             nkf_char low = 0;
02019             if (p==x0212_to_utf8_2bytes_x0213[c2]) {
02020                 X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
02021             } else {
02022                 X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
02023             }
02024             if (!low) return 0;
02025             return UTF16_TO_UTF32(val, low);
02026         } else {
02027             return val;
02028         }
02029     }
02030     return 0;
02031 }
02032 
02033 static nkf_char
02034 e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
02035 {
02036     nkf_char euc;
02037     int i;
02038     for (i = 0; i < sizeof_x0213_combining_chars; i++)
02039         if (x0213_combining_chars[i] == comb)
02040             break;
02041     if (i >= sizeof_x0213_combining_chars)
02042         return 0;
02043     euc = (c2&0x7f)<<8 | (c1&0x7f);
02044     for (i = 0; i < sizeof_x0213_combining_table; i++)
02045         if (x0213_combining_table[i][0] == euc)
02046             return x0213_combining_table[i][1];
02047     return 0;
02048 }
02049 #endif
02050 
02051 static nkf_char
02052 w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
02053 {
02054     nkf_char ret = 0;
02055 
02056     if (!c1){
02057         *p2 = 0;
02058         *p1 = c2;
02059     }else if (0xc0 <= c2 && c2 <= 0xef) {
02060         ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
02061 #ifdef NUMCHAR_OPTION
02062         if (ret > 0){
02063             if (p2) *p2 = 0;
02064             if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
02065             ret = 0;
02066         }
02067 #endif
02068     }
02069     return ret;
02070 }
02071 
02072 #ifdef UTF8_INPUT_ENABLE
02073 static nkf_char
02074 w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
02075 {
02076     nkf_char c1, c2, c3, c4;
02077     nkf_char ret = 0;
02078     val &= VALUE_MASK;
02079     if (val < 0x80) {
02080         *p2 = 0;
02081         *p1 = val;
02082     }
02083     else if (nkf_char_unicode_bmp_p(val)){
02084         nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
02085         ret =  unicode_to_jis_common(c1, c2, c3, p2, p1);
02086         if (ret > 0){
02087             *p2 = 0;
02088             *p1 = nkf_char_unicode_new(val);
02089             ret = 0;
02090         }
02091     }
02092     else {
02093         int i;
02094         if (x0213_f) {
02095             c1 = (val >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
02096             c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
02097             for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
02098                 if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
02099                     val = x0213_1_surrogate_table[i][0];
02100                     *p2 = val >> 8;
02101                     *p1 = val & 0xFF;
02102                     return 0;
02103                 }
02104             for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
02105                 if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
02106                     val = x0213_2_surrogate_table[i][0];
02107                     *p2 = PREFIX_EUCG3 | (val >> 8);
02108                     *p1 = val & 0xFF;
02109                     return 0;
02110                 }
02111         }
02112         *p2 = 0;
02113         *p1 = nkf_char_unicode_new(val);
02114     }
02115     return ret;
02116 }
02117 #endif
02118 
02119 static nkf_char
02120 e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
02121 {
02122     if (c2 == JIS_X_0201_1976_K || c2 == SS2){
02123         if (iso2022jp_f && !x0201_f) {
02124             c2 = GETA1; c1 = GETA2;
02125         } else {
02126             c2 = JIS_X_0201_1976_K;
02127             c1 &= 0x7f;
02128         }
02129 #ifdef X0212_ENABLE
02130     }else if (c2 == 0x8f){
02131         if (c0 == 0){
02132             return -1;
02133         }
02134         if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
02135             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
02136             c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
02137             c2 = 0;
02138         } else {
02139             c2 = (c2 << 8) | (c1 & 0x7f);
02140             c1 = c0 & 0x7f;
02141 #ifdef SHIFTJIS_CP932
02142             if (cp51932_f){
02143                 nkf_char s2, s1;
02144                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02145                     s2e_conv(s2, s1, &c2, &c1);
02146                     if (c2 < 0x100){
02147                         c1 &= 0x7f;
02148                         c2 &= 0x7f;
02149                     }
02150                 }
02151             }
02152 #endif /* SHIFTJIS_CP932 */
02153         }
02154 #endif /* X0212_ENABLE */
02155     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
02156         /* NOP */
02157     } else {
02158         if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
02159             /* encoding is eucJP-ms, so invert to Unicode Private User Area */
02160             c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
02161             c2 = 0;
02162         } else {
02163             c1 &= 0x7f;
02164             c2 &= 0x7f;
02165 #ifdef SHIFTJIS_CP932
02166             if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
02167                 nkf_char s2, s1;
02168                 if (e2s_conv(c2, c1, &s2, &s1) == 0){
02169                     s2e_conv(s2, s1, &c2, &c1);
02170                     if (c2 < 0x100){
02171                         c1 &= 0x7f;
02172                         c2 &= 0x7f;
02173                     }
02174                 }
02175             }
02176 #endif /* SHIFTJIS_CP932 */
02177         }
02178     }
02179     (*oconv)(c2, c1);
02180     return 0;
02181 }
02182 
02183 static nkf_char
02184 s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
02185 {
02186     if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
02187         if (iso2022jp_f && !x0201_f) {
02188             c2 = GETA1; c1 = GETA2;
02189         } else {
02190             c1 &= 0x7f;
02191         }
02192     } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
02193         /* NOP */
02194     } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
02195         /* CP932 UDC */
02196         if(c1 == 0x7F) return 0;
02197         c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
02198         c2 = 0;
02199     } else {
02200         nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
02201         if (ret) return ret;
02202     }
02203     (*oconv)(c2, c1);
02204     return 0;
02205 }
02206 
02207 static int
02208 x0213_wait_combining_p(nkf_char wc)
02209 {
02210     int i;
02211     for (i = 0; i < sizeof_x0213_combining_table; i++) {
02212         if (x0213_combining_table[i][1] == wc) {
02213             return TRUE;
02214         }
02215     }
02216     return FALSE;
02217 }
02218 
02219 static int
02220 x0213_combining_p(nkf_char wc)
02221 {
02222     int i;
02223     for (i = 0; i < sizeof_x0213_combining_chars; i++) {
02224         if (x0213_combining_chars[i] == wc) {
02225             return TRUE;
02226         }
02227     }
02228     return FALSE;
02229 }
02230 
02231 static nkf_char
02232 w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
02233 {
02234     nkf_char ret = 0, c4 = 0;
02235     static const char w_iconv_utf8_1st_byte[] =
02236     { /* 0xC0 - 0xFF */
02237         20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
02238         21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
02239         30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
02240         40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
02241 
02242     if (c3 > 0xFF) {
02243         c4 = c3 & 0xFF;
02244         c3 >>= 8;
02245     }
02246 
02247     if (c1 < 0 || 0xff < c1) {
02248     }else if (c1 == 0) { /* 0 : 1 byte*/
02249         c3 = 0;
02250     } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
02251         return 0;
02252     } else{
02253         switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
02254         case 21:
02255             if (c2 < 0x80 || 0xBF < c2) return 0;
02256             break;
02257         case 30:
02258             if (c3 == 0) return -1;
02259             if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
02260                 return 0;
02261             break;
02262         case 31:
02263         case 33:
02264             if (c3 == 0) return -1;
02265             if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
02266                 return 0;
02267             break;
02268         case 32:
02269             if (c3 == 0) return -1;
02270             if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
02271                 return 0;
02272             break;
02273         case 40:
02274             if (c3 == 0) return -2;
02275             if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02276                 return 0;
02277             break;
02278         case 41:
02279             if (c3 == 0) return -2;
02280             if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02281                 return 0;
02282             break;
02283         case 42:
02284             if (c3 == 0) return -2;
02285             if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
02286                 return 0;
02287             break;
02288         default:
02289             return 0;
02290             break;
02291         }
02292     }
02293     if (c1 == 0 || c1 == EOF){
02294     } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
02295         c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
02296         c1 = 0;
02297     } else {
02298         if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
02299             return -3;
02300         ret = w2e_conv(c1, c2, c3, &c1, &c2);
02301     }
02302     if (ret == 0){
02303         (*oconv)(c1, c2);
02304     }
02305     return ret;
02306 }
02307 
02308 static nkf_char
02309 w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
02310 {
02311     /* continue from the line below 'return -3;' in w_iconv() */
02312     nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
02313     if (ret == 0){
02314         (*oconv)(c1, c2);
02315     }
02316     return ret;
02317 }
02318 
02319 #define NKF_ICONV_INVALID_CODE_RANGE -13
02320 #define NKF_ICONV_WAIT_COMBINING_CHAR -14
02321 #define NKF_ICONV_NOT_COMBINED -15
02322 static size_t
02323 unicode_iconv(nkf_char wc, int nocombine)
02324 {
02325     nkf_char c1, c2;
02326     int ret = 0;
02327 
02328     if (wc < 0x80) {
02329         c2 = 0;
02330         c1 = wc;
02331     }else if ((wc>>11) == 27) {
02332         /* unpaired surrogate */
02333         return NKF_ICONV_INVALID_CODE_RANGE;
02334     }else if (wc < 0xFFFF) {
02335         if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
02336             return NKF_ICONV_WAIT_COMBINING_CHAR;
02337         ret = w16e_conv(wc, &c2, &c1);
02338         if (ret) return ret;
02339     }else if (wc < 0x10FFFF) {
02340         c2 = 0;
02341         c1 = nkf_char_unicode_new(wc);
02342     } else {
02343         return NKF_ICONV_INVALID_CODE_RANGE;
02344     }
02345     (*oconv)(c2, c1);
02346     return 0;
02347 }
02348 
02349 static nkf_char
02350 unicode_iconv_combine(nkf_char wc, nkf_char wc2)
02351 {
02352     nkf_char c1, c2;
02353     int i;
02354 
02355     if (wc2 < 0x80) {
02356         return NKF_ICONV_NOT_COMBINED;
02357     }else if ((wc2>>11) == 27) {
02358         /* unpaired surrogate */
02359         return NKF_ICONV_INVALID_CODE_RANGE;
02360     }else if (wc2 < 0xFFFF) {
02361         if (!x0213_combining_p(wc2))
02362             return NKF_ICONV_NOT_COMBINED;
02363         for (i = 0; i < sizeof_x0213_combining_table; i++) {
02364             if (x0213_combining_table[i][1] == wc &&
02365                 x0213_combining_table[i][2] == wc2) {
02366                 c2 = x0213_combining_table[i][0] >> 8;
02367                 c1 = x0213_combining_table[i][0] & 0x7f;
02368                 (*oconv)(c2, c1);
02369                 return 0;
02370             }
02371         }
02372     }else if (wc2 < 0x10FFFF) {
02373         return NKF_ICONV_NOT_COMBINED;
02374     } else {
02375         return NKF_ICONV_INVALID_CODE_RANGE;
02376     }
02377     return NKF_ICONV_NOT_COMBINED;
02378 }
02379 
02380 static nkf_char
02381 w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
02382 {
02383     nkf_char wc, wc2;
02384     wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
02385     wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
02386     if (wc2 < 0)
02387         return wc2;
02388     return unicode_iconv_combine(wc, wc2);
02389 }
02390 
02391 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
02392 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
02393 static size_t
02394 nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02395 {
02396     nkf_char wc;
02397 
02398     if (c1 == EOF) {
02399         (*oconv)(EOF, 0);
02400         return 0;
02401     }
02402 
02403     if (input_endian == ENDIAN_BIG) {
02404         if (0xD8 <= c1 && c1 <= 0xDB) {
02405             if (0xDC <= c3 && c3 <= 0xDF) {
02406                 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
02407             } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
02408         } else {
02409             wc = c1 << 8 | c2;
02410         }
02411     } else {
02412         if (0xD8 <= c2 && c2 <= 0xDB) {
02413             if (0xDC <= c4 && c4 <= 0xDF) {
02414                 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
02415             } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
02416         } else {
02417             wc = c2 << 8 | c1;
02418         }
02419     }
02420 
02421     return (*unicode_iconv)(wc, FALSE);
02422 }
02423 
02424 static size_t
02425 nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02426 {
02427     nkf_char wc, wc2;
02428 
02429     if (input_endian == ENDIAN_BIG) {
02430         if (0xD8 <= c3 && c3 <= 0xDB) {
02431             return NKF_ICONV_NOT_COMBINED;
02432         } else {
02433             wc = c1 << 8 | c2;
02434             wc2 = c3 << 8 | c4;
02435         }
02436     } else {
02437         if (0xD8 <= c2 && c2 <= 0xDB) {
02438             return NKF_ICONV_NOT_COMBINED;
02439         } else {
02440             wc = c2 << 8 | c1;
02441             wc2 = c4 << 8 | c3;
02442         }
02443     }
02444 
02445     return unicode_iconv_combine(wc, wc2);
02446 }
02447 
02448 static size_t
02449 nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
02450 {
02451     nkf_char wc;
02452     if (input_endian == ENDIAN_BIG)
02453         wc = c1 << 8 | c2;
02454     else
02455         wc = c2 << 8 | c1;
02456     return (*unicode_iconv)(wc, TRUE);
02457 }
02458 
02459 static nkf_char
02460 w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
02461 {
02462     (*oconv)(c2, c1);
02463     return 16; /* different from w_iconv32 */
02464 }
02465 
02466 static nkf_char
02467 w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
02468 {
02469     (*oconv)(c2, c1);
02470     return 32; /* different from w_iconv16 */
02471 }
02472 
02473 static nkf_char
02474 utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02475 {
02476     nkf_char wc;
02477 
02478     switch(input_endian){
02479     case ENDIAN_BIG:
02480         wc = c2 << 16 | c3 << 8 | c4;
02481         break;
02482     case ENDIAN_LITTLE:
02483         wc = c3 << 16 | c2 << 8 | c1;
02484         break;
02485     case ENDIAN_2143:
02486         wc = c1 << 16 | c4 << 8 | c3;
02487         break;
02488     case ENDIAN_3412:
02489         wc = c4 << 16 | c1 << 8 | c2;
02490         break;
02491     default:
02492         return NKF_ICONV_INVALID_CODE_RANGE;
02493     }
02494     return wc;
02495 }
02496 
02497 static size_t
02498 nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02499 {
02500     nkf_char wc;
02501 
02502     if (c1 == EOF) {
02503         (*oconv)(EOF, 0);
02504         return 0;
02505     }
02506 
02507     wc = utf32_to_nkf_char(c1, c2, c3, c4);
02508     if (wc < 0)
02509         return wc;
02510 
02511     return (*unicode_iconv)(wc, FALSE);
02512 }
02513 
02514 static nkf_char
02515 nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
02516 {
02517     nkf_char wc, wc2;
02518 
02519     wc = utf32_to_nkf_char(c1, c2, c3, c4);
02520     if (wc < 0)
02521         return wc;
02522     wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
02523     if (wc2 < 0)
02524         return wc2;
02525 
02526     return unicode_iconv_combine(wc, wc2);
02527 }
02528 
02529 static size_t
02530 nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
02531 {
02532     nkf_char wc;
02533 
02534     wc = utf32_to_nkf_char(c1, c2, c3, c4);
02535     return (*unicode_iconv)(wc, TRUE);
02536 }
02537 #endif
02538 
02539 #define output_ascii_escape_sequence(mode) do { \
02540             if (output_mode != ASCII && output_mode != ISO_8859_1) { \
02541                     (*o_putc)(ESC); \
02542                     (*o_putc)('('); \
02543                     (*o_putc)(ascii_intro); \
02544                     output_mode = mode; \
02545             } \
02546     } while (0)
02547 
02548 static void
02549 output_escape_sequence(int mode)
02550 {
02551     if (output_mode == mode)
02552         return;
02553     switch(mode) {
02554     case ISO_8859_1:
02555         (*o_putc)(ESC);
02556         (*o_putc)('.');
02557         (*o_putc)('A');
02558         break;
02559     case JIS_X_0201_1976_K:
02560         (*o_putc)(ESC);
02561         (*o_putc)('(');
02562         (*o_putc)('I');
02563         break;
02564     case JIS_X_0208:
02565         (*o_putc)(ESC);
02566         (*o_putc)('$');
02567         (*o_putc)(kanji_intro);
02568         break;
02569     case JIS_X_0212:
02570         (*o_putc)(ESC);
02571         (*o_putc)('$');
02572         (*o_putc)('(');
02573         (*o_putc)('D');
02574         break;
02575     case JIS_X_0213_1:
02576         (*o_putc)(ESC);
02577         (*o_putc)('$');
02578         (*o_putc)('(');
02579         (*o_putc)('Q');
02580         break;
02581     case JIS_X_0213_2:
02582         (*o_putc)(ESC);
02583         (*o_putc)('$');
02584         (*o_putc)('(');
02585         (*o_putc)('P');
02586         break;
02587     }
02588     output_mode = mode;
02589 }
02590 
02591 static void
02592 j_oconv(nkf_char c2, nkf_char c1)
02593 {
02594 #ifdef NUMCHAR_OPTION
02595     if (c2 == 0 && nkf_char_unicode_p(c1)){
02596         w16e_conv(c1, &c2, &c1);
02597         if (c2 == 0 && nkf_char_unicode_p(c1)){
02598             c2 = c1 & VALUE_MASK;
02599             if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
02600                 /* CP5022x UDC */
02601                 c1 &= 0xFFF;
02602                 c2 = 0x7F + c1 / 94;
02603                 c1 = 0x21 + c1 % 94;
02604             } else {
02605                 if (encode_fallback) (*encode_fallback)(c1);
02606                 return;
02607             }
02608         }
02609     }
02610 #endif
02611     if (c2 == 0) {
02612         output_ascii_escape_sequence(ASCII);
02613         (*o_putc)(c1);
02614     }
02615     else if (c2 == EOF) {
02616         output_ascii_escape_sequence(ASCII);
02617         (*o_putc)(EOF);
02618     }
02619     else if (c2 == ISO_8859_1) {
02620         output_ascii_escape_sequence(ISO_8859_1);
02621         (*o_putc)(c1|0x80);
02622     }
02623     else if (c2 == JIS_X_0201_1976_K) {
02624         output_escape_sequence(JIS_X_0201_1976_K);
02625         (*o_putc)(c1);
02626 #ifdef X0212_ENABLE
02627     } else if (is_eucg3(c2)){
02628         output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
02629         (*o_putc)(c2 & 0x7f);
02630         (*o_putc)(c1);
02631 #endif
02632     } else {
02633         if(ms_ucs_map_f
02634            ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
02635            : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
02636         output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
02637         (*o_putc)(c2);
02638         (*o_putc)(c1);
02639     }
02640 }
02641 
02642 static void
02643 e_oconv(nkf_char c2, nkf_char c1)
02644 {
02645     if (c2 == 0 && nkf_char_unicode_p(c1)){
02646         w16e_conv(c1, &c2, &c1);
02647         if (c2 == 0 && nkf_char_unicode_p(c1)){
02648             c2 = c1 & VALUE_MASK;
02649             if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
02650                 /* eucJP-ms UDC */
02651                 c1 &= 0xFFF;
02652                 c2 = c1 / 94;
02653                 c2 += c2 < 10 ? 0x75 : 0x8FEB;
02654                 c1 = 0x21 + c1 % 94;
02655                 if (is_eucg3(c2)){
02656                     (*o_putc)(0x8f);
02657                     (*o_putc)((c2 & 0x7f) | 0x080);
02658                     (*o_putc)(c1 | 0x080);
02659                 }else{
02660                     (*o_putc)((c2 & 0x7f) | 0x080);
02661                     (*o_putc)(c1 | 0x080);
02662                 }
02663                 return;
02664             } else {
02665                 if (encode_fallback) (*encode_fallback)(c1);
02666                 return;
02667             }
02668         }
02669     }
02670 
02671     if (c2 == EOF) {
02672         (*o_putc)(EOF);
02673     } else if (c2 == 0) {
02674         output_mode = ASCII;
02675         (*o_putc)(c1);
02676     } else if (c2 == JIS_X_0201_1976_K) {
02677         output_mode = EUC_JP;
02678         (*o_putc)(SS2); (*o_putc)(c1|0x80);
02679     } else if (c2 == ISO_8859_1) {
02680         output_mode = ISO_8859_1;
02681         (*o_putc)(c1 | 0x080);
02682 #ifdef X0212_ENABLE
02683     } else if (is_eucg3(c2)){
02684         output_mode = EUC_JP;
02685 #ifdef SHIFTJIS_CP932
02686         if (!cp932inv_f){
02687             nkf_char s2, s1;
02688             if (e2s_conv(c2, c1, &s2, &s1) == 0){
02689                 s2e_conv(s2, s1, &c2, &c1);
02690             }
02691         }
02692 #endif
02693         if (c2 == 0) {
02694             output_mode = ASCII;
02695             (*o_putc)(c1);
02696         }else if (is_eucg3(c2)){
02697             if (x0212_f){
02698                 (*o_putc)(0x8f);
02699                 (*o_putc)((c2 & 0x7f) | 0x080);
02700                 (*o_putc)(c1 | 0x080);
02701             }
02702         }else{
02703             (*o_putc)((c2 & 0x7f) | 0x080);
02704             (*o_putc)(c1 | 0x080);
02705         }
02706 #endif
02707     } else {
02708         if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
02709             set_iconv(FALSE, 0);
02710             return; /* too late to rescue this char */
02711         }
02712         output_mode = EUC_JP;
02713         (*o_putc)(c2 | 0x080);
02714         (*o_putc)(c1 | 0x080);
02715     }
02716 }
02717 
02718 static void
02719 s_oconv(nkf_char c2, nkf_char c1)
02720 {
02721 #ifdef NUMCHAR_OPTION
02722     if (c2 == 0 && nkf_char_unicode_p(c1)){
02723         w16e_conv(c1, &c2, &c1);
02724         if (c2 == 0 && nkf_char_unicode_p(c1)){
02725             c2 = c1 & VALUE_MASK;
02726             if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
02727                 /* CP932 UDC */
02728                 c1 &= 0xFFF;
02729                 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
02730                 c1 = c1 % 188;
02731                 c1 += 0x40 + (c1 > 0x3e);
02732                 (*o_putc)(c2);
02733                 (*o_putc)(c1);
02734                 return;
02735             } else {
02736                 if(encode_fallback)(*encode_fallback)(c1);
02737                 return;
02738             }
02739         }
02740     }
02741 #endif
02742     if (c2 == EOF) {
02743         (*o_putc)(EOF);
02744         return;
02745     } else if (c2 == 0) {
02746         output_mode = ASCII;
02747         (*o_putc)(c1);
02748     } else if (c2 == JIS_X_0201_1976_K) {
02749         output_mode = SHIFT_JIS;
02750         (*o_putc)(c1|0x80);
02751     } else if (c2 == ISO_8859_1) {
02752         output_mode = ISO_8859_1;
02753         (*o_putc)(c1 | 0x080);
02754 #ifdef X0212_ENABLE
02755     } else if (is_eucg3(c2)){
02756         output_mode = SHIFT_JIS;
02757         if (e2s_conv(c2, c1, &c2, &c1) == 0){
02758             (*o_putc)(c2);
02759             (*o_putc)(c1);
02760         }
02761 #endif
02762     } else {
02763         if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
02764             set_iconv(FALSE, 0);
02765             return; /* too late to rescue this char */
02766         }
02767         output_mode = SHIFT_JIS;
02768         e2s_conv(c2, c1, &c2, &c1);
02769 
02770 #ifdef SHIFTJIS_CP932
02771         if (cp932inv_f
02772             && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
02773             nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
02774             if (c){
02775                 c2 = c >> 8;
02776                 c1 = c & 0xff;
02777             }
02778         }
02779 #endif /* SHIFTJIS_CP932 */
02780 
02781         (*o_putc)(c2);
02782         if (prefix_table[(unsigned char)c1]){
02783             (*o_putc)(prefix_table[(unsigned char)c1]);
02784         }
02785         (*o_putc)(c1);
02786     }
02787 }
02788 
02789 #ifdef UTF8_OUTPUT_ENABLE
02790 #define OUTPUT_UTF8(val) do { \
02791         nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
02792         (*o_putc)(c1); \
02793         if (c2) (*o_putc)(c2); \
02794         if (c3) (*o_putc)(c3); \
02795         if (c4) (*o_putc)(c4); \
02796     } while (0)
02797 
02798 static void
02799 w_oconv(nkf_char c2, nkf_char c1)
02800 {
02801     nkf_char c3, c4;
02802     nkf_char val, val2;
02803 
02804     if (output_bom_f) {
02805         output_bom_f = FALSE;
02806         (*o_putc)('\357');
02807         (*o_putc)('\273');
02808         (*o_putc)('\277');
02809     }
02810 
02811     if (c2 == EOF) {
02812         (*o_putc)(EOF);
02813         return;
02814     }
02815 
02816     if (c2 == 0 && nkf_char_unicode_p(c1)){
02817         val = c1 & VALUE_MASK;
02818         OUTPUT_UTF8(val);
02819         return;
02820     }
02821 
02822     if (c2 == 0) {
02823         (*o_putc)(c1);
02824     } else {
02825         val = e2w_conv(c2, c1);
02826         if (val){
02827             val2 = e2w_combining(val, c2, c1);
02828             if (val2)
02829                 OUTPUT_UTF8(val2);
02830             OUTPUT_UTF8(val);
02831         }
02832     }
02833 }
02834 
02835 #define OUTPUT_UTF16_BYTES(c1, c2) do { \
02836         if (output_endian == ENDIAN_LITTLE){ \
02837             (*o_putc)(c1); \
02838             (*o_putc)(c2); \
02839         }else{ \
02840             (*o_putc)(c2); \
02841             (*o_putc)(c1); \
02842         } \
02843     } while (0)
02844 
02845 #define OUTPUT_UTF16(val) do { \
02846         if (nkf_char_unicode_bmp_p(val)) { \
02847             c2 = (val >> 8) & 0xff; \
02848             c1 = val & 0xff; \
02849             OUTPUT_UTF16_BYTES(c1, c2); \
02850         } else { \
02851             val &= VALUE_MASK; \
02852             if (val <= UNICODE_MAX) { \
02853                 c2 = (val >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */ \
02854                 c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ \
02855                 OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
02856                 OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
02857             } \
02858         } \
02859     } while (0)
02860 
02861 static void
02862 w_oconv16(nkf_char c2, nkf_char c1)
02863 {
02864     if (output_bom_f) {
02865         output_bom_f = FALSE;
02866         OUTPUT_UTF16_BYTES(0xFF, 0xFE);
02867     }
02868 
02869     if (c2 == EOF) {
02870         (*o_putc)(EOF);
02871         return;
02872     }
02873 
02874     if (c2 == 0 && nkf_char_unicode_p(c1)) {
02875         OUTPUT_UTF16(c1);
02876     } else if (c2) {
02877         nkf_char val, val2;
02878         val = e2w_conv(c2, c1);
02879         if (!val) return;
02880         val2 = e2w_combining(val, c2, c1);
02881         if (val2)
02882             OUTPUT_UTF16(val2);
02883         OUTPUT_UTF16(val);
02884     } else {
02885         OUTPUT_UTF16_BYTES(c1, c2);
02886     }
02887 }
02888 
02889 #define OUTPUT_UTF32(c) do { \
02890         if (output_endian == ENDIAN_LITTLE){ \
02891             (*o_putc)( (c)        & 0xFF); \
02892             (*o_putc)(((c) >>  8) & 0xFF); \
02893             (*o_putc)(((c) >> 16) & 0xFF); \
02894             (*o_putc)(0); \
02895         }else{ \
02896             (*o_putc)(0); \
02897             (*o_putc)(((c) >> 16) & 0xFF); \
02898             (*o_putc)(((c) >>  8) & 0xFF); \
02899             (*o_putc)( (c)        & 0xFF); \
02900         } \
02901     } while (0)
02902 
02903 static void
02904 w_oconv32(nkf_char c2, nkf_char c1)
02905 {
02906     if (output_bom_f) {
02907         output_bom_f = FALSE;
02908         if (output_endian == ENDIAN_LITTLE){
02909             (*o_putc)(0xFF);
02910             (*o_putc)(0xFE);
02911             (*o_putc)(0);
02912             (*o_putc)(0);
02913         }else{
02914             (*o_putc)(0);
02915             (*o_putc)(0);
02916             (*o_putc)(0xFE);
02917             (*o_putc)(0xFF);
02918         }
02919     }
02920 
02921     if (c2 == EOF) {
02922         (*o_putc)(EOF);
02923         return;
02924     }
02925 
02926     if (c2 == ISO_8859_1) {
02927         c1 |= 0x80;
02928     } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
02929         c1 &= VALUE_MASK;
02930     } else if (c2) {
02931         nkf_char val, val2;
02932         val = e2w_conv(c2, c1);
02933         if (!val) return;
02934         val2 = e2w_combining(val, c2, c1);
02935         if (val2)
02936             OUTPUT_UTF32(val2);
02937         c1 = val;
02938     }
02939     OUTPUT_UTF32(c1);
02940 }
02941 #endif
02942 
02943 #define SCORE_L2       (1)                   /* Kanji Level 2 */
02944 #define SCORE_KANA     (SCORE_L2 << 1)       /* Halfwidth Katakana */
02945 #define SCORE_DEPEND   (SCORE_KANA << 1)     /* MD Characters */
02946 #define SCORE_CP932    (SCORE_DEPEND << 1)   /* IBM extended characters */
02947 #define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
02948 #define SCORE_X0213    (SCORE_X0212 << 1)    /* JIS X 0213 */
02949 #define SCORE_NO_EXIST (SCORE_X0213 << 1)    /* Undefined Characters */
02950 #define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME selected */
02951 #define SCORE_ERROR    (SCORE_iMIME << 1) /* Error */
02952 
02953 #define SCORE_INIT (SCORE_iMIME)
02954 
02955 static const nkf_char score_table_A0[] = {
02956     0, 0, 0, 0,
02957     0, 0, 0, 0,
02958     0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
02959     SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_X0213,
02960 };
02961 
02962 static const nkf_char score_table_F0[] = {
02963     SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
02964     SCORE_L2, SCORE_DEPEND, SCORE_X0213, SCORE_X0213,
02965     SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
02966     SCORE_CP932, SCORE_X0213, SCORE_X0213, SCORE_ERROR,
02967 };
02968 
02969 static const nkf_char score_table_8FA0[] = {
02970     0, SCORE_X0213, SCORE_X0212, SCORE_X0213,
02971     SCORE_X0213, SCORE_X0213, SCORE_X0212, SCORE_X0212,
02972     SCORE_X0213, SCORE_X0212, SCORE_X0212, SCORE_X0212,
02973     SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
02974 };
02975 
02976 static const nkf_char score_table_8FE0[] = {
02977     SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
02978     SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
02979     SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
02980     SCORE_X0212, SCORE_X0212, SCORE_X0213, SCORE_X0213,
02981 };
02982 
02983 static const nkf_char score_table_8FF0[] = {
02984     SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0212,
02985     SCORE_X0212, SCORE_X0213, SCORE_X0213, SCORE_X0213,
02986     SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
02987     SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
02988 };
02989 
02990 static void
02991 set_code_score(struct input_code *ptr, nkf_char score)
02992 {
02993     if (ptr){
02994         ptr->score |= score;
02995     }
02996 }
02997 
02998 static void
02999 clr_code_score(struct input_code *ptr, nkf_char score)
03000 {
03001     if (ptr){
03002         ptr->score &= ~score;
03003     }
03004 }
03005 
03006 static void
03007 code_score(struct input_code *ptr)
03008 {
03009     nkf_char c2 = ptr->buf[0];
03010     nkf_char c1 = ptr->buf[1];
03011     if (c2 < 0){
03012         set_code_score(ptr, SCORE_ERROR);
03013     }else if (c2 == SS2){
03014         set_code_score(ptr, SCORE_KANA);
03015     }else if (c2 == 0x8f){
03016         if ((c1 & 0x70) == 0x20){
03017             set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
03018         }else if ((c1 & 0x70) == 0x60){
03019             set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
03020         }else if ((c1 & 0x70) == 0x70){
03021             set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
03022         }else{
03023             set_code_score(ptr, SCORE_X0212);
03024         }
03025 #ifdef UTF8_OUTPUT_ENABLE
03026     }else if (!e2w_conv(c2, c1)){
03027         set_code_score(ptr, SCORE_NO_EXIST);
03028 #endif
03029     }else if ((c2 & 0x70) == 0x20){
03030         set_code_score(ptr, score_table_A0[c2 & 0x0f]);
03031     }else if ((c2 & 0x70) == 0x70){
03032         set_code_score(ptr, score_table_F0[c2 & 0x0f]);
03033     }else if ((c2 & 0x70) >= 0x50){
03034         set_code_score(ptr, SCORE_L2);
03035     }
03036 }
03037 
03038 static void
03039 status_disable(struct input_code *ptr)
03040 {
03041     ptr->stat = -1;
03042     ptr->buf[0] = -1;
03043     code_score(ptr);
03044     if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
03045 }
03046 
03047 static void
03048 status_push_ch(struct input_code *ptr, nkf_char c)
03049 {
03050     ptr->buf[ptr->index++] = c;
03051 }
03052 
03053 static void
03054 status_clear(struct input_code *ptr)
03055 {
03056     ptr->stat = 0;
03057     ptr->index = 0;
03058 }
03059 
03060 static void
03061 status_reset(struct input_code *ptr)
03062 {
03063     status_clear(ptr);
03064     ptr->score = SCORE_INIT;
03065 }
03066 
03067 static void
03068 status_reinit(struct input_code *ptr)
03069 {
03070     status_reset(ptr);
03071     ptr->_file_stat = 0;
03072 }
03073 
03074 static void
03075 status_check(struct input_code *ptr, nkf_char c)
03076 {
03077     if (c <= DEL && estab_f){
03078         status_reset(ptr);
03079     }
03080 }
03081 
03082 static void
03083 s_status(struct input_code *ptr, nkf_char c)
03084 {
03085     switch(ptr->stat){
03086     case -1:
03087         status_check(ptr, c);
03088         break;
03089     case 0:
03090         if (c <= DEL){
03091             break;
03092         }else if (nkf_char_unicode_p(c)){
03093             break;
03094         }else if (0xa1 <= c && c <= 0xdf){
03095             status_push_ch(ptr, SS2);
03096             status_push_ch(ptr, c);
03097             code_score(ptr);
03098             status_clear(ptr);
03099         }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
03100             ptr->stat = 1;
03101             status_push_ch(ptr, c);
03102         }else if (0xed <= c && c <= 0xee){
03103             ptr->stat = 3;
03104             status_push_ch(ptr, c);
03105 #ifdef SHIFTJIS_CP932
03106         }else if (is_ibmext_in_sjis(c)){
03107             ptr->stat = 2;
03108             status_push_ch(ptr, c);
03109 #endif /* SHIFTJIS_CP932 */
03110 #ifdef X0212_ENABLE
03111         }else if (0xf0 <= c && c <= 0xfc){
03112             ptr->stat = 1;
03113             status_push_ch(ptr, c);
03114 #endif /* X0212_ENABLE */
03115         }else{
03116             status_disable(ptr);
03117         }
03118         break;
03119     case 1:
03120         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
03121             status_push_ch(ptr, c);
03122             s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
03123             code_score(ptr);
03124             status_clear(ptr);
03125         }else{
03126             status_disable(ptr);
03127         }
03128         break;
03129     case 2:
03130 #ifdef SHIFTJIS_CP932
03131         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
03132             status_push_ch(ptr, c);
03133             if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
03134                 set_code_score(ptr, SCORE_CP932);
03135                 status_clear(ptr);
03136                 break;
03137             }
03138         }
03139 #endif /* SHIFTJIS_CP932 */
03140         status_disable(ptr);
03141         break;
03142     case 3:
03143         if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
03144             status_push_ch(ptr, c);
03145             s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
03146             set_code_score(ptr, SCORE_CP932);
03147             status_clear(ptr);
03148         }else{
03149             status_disable(ptr);
03150         }
03151         break;
03152     }
03153 }
03154 
03155 static void
03156 e_status(struct input_code *ptr, nkf_char c)
03157 {
03158     switch (ptr->stat){
03159     case -1:
03160         status_check(ptr, c);
03161         break;
03162     case 0:
03163         if (c <= DEL){
03164             break;
03165         }else if (nkf_char_unicode_p(c)){
03166             break;
03167         }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
03168             ptr->stat = 1;
03169             status_push_ch(ptr, c);
03170 #ifdef X0212_ENABLE
03171         }else if (0x8f == c){
03172             ptr->stat = 2;
03173             status_push_ch(ptr, c);
03174 #endif /* X0212_ENABLE */
03175         }else{
03176             status_disable(ptr);
03177         }
03178         break;
03179     case 1:
03180         if (0xa1 <= c && c <= 0xfe){
03181             status_push_ch(ptr, c);
03182             code_score(ptr);
03183             status_clear(ptr);
03184         }else{
03185             status_disable(ptr);
03186         }
03187         break;
03188 #ifdef X0212_ENABLE
03189     case 2:
03190         if (0xa1 <= c && c <= 0xfe){
03191             ptr->stat = 1;
03192             status_push_ch(ptr, c);
03193         }else{
03194             status_disable(ptr);
03195         }
03196 #endif /* X0212_ENABLE */
03197     }
03198 }
03199 
03200 #ifdef UTF8_INPUT_ENABLE
03201 static void
03202 w_status(struct input_code *ptr, nkf_char c)
03203 {
03204     switch (ptr->stat){
03205     case -1:
03206         status_check(ptr, c);
03207         break;
03208     case 0:
03209         if (c <= DEL){
03210             break;
03211         }else if (nkf_char_unicode_p(c)){
03212             break;
03213         }else if (0xc0 <= c && c <= 0xdf){
03214             ptr->stat = 1;
03215             status_push_ch(ptr, c);
03216         }else if (0xe0 <= c && c <= 0xef){
03217             ptr->stat = 2;
03218             status_push_ch(ptr, c);
03219         }else if (0xf0 <= c && c <= 0xf4){
03220             ptr->stat = 3;
03221             status_push_ch(ptr, c);
03222         }else{
03223             status_disable(ptr);
03224         }
03225         break;
03226     case 1:
03227     case 2:
03228         if (0x80 <= c && c <= 0xbf){
03229             status_push_ch(ptr, c);
03230             if (ptr->index > ptr->stat){
03231                 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
03232                            && ptr->buf[2] == 0xbf);
03233                 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
03234                          &ptr->buf[0], &ptr->buf[1]);
03235                 if (!bom){
03236                     code_score(ptr);
03237                 }
03238                 status_clear(ptr);
03239             }
03240         }else{
03241             status_disable(ptr);
03242         }
03243         break;
03244     case 3:
03245         if (0x80 <= c && c <= 0xbf){
03246             if (ptr->index < ptr->stat){
03247                 status_push_ch(ptr, c);
03248             } else {
03249                 status_clear(ptr);
03250             }
03251         }else{
03252             status_disable(ptr);
03253         }
03254         break;
03255     }
03256 }
03257 #endif
03258 
03259 static void
03260 code_status(nkf_char c)
03261 {
03262     int action_flag = 1;
03263     struct input_code *result = 0;
03264     struct input_code *p = input_code_list;
03265     while (p->name){
03266         if (!p->status_func) {
03267             ++p;
03268             continue;
03269         }
03270         if (!p->status_func)
03271             continue;
03272         (p->status_func)(p, c);
03273         if (p->stat > 0){
03274             action_flag = 0;
03275         }else if(p->stat == 0){
03276             if (result){
03277                 action_flag = 0;
03278             }else{
03279                 result = p;
03280             }
03281         }
03282         ++p;
03283     }
03284 
03285     if (action_flag){
03286         if (result && !estab_f){
03287             set_iconv(TRUE, result->iconv_func);
03288         }else if (c <= DEL){
03289             struct input_code *ptr = input_code_list;
03290             while (ptr->name){
03291                 status_reset(ptr);
03292                 ++ptr;
03293             }
03294         }
03295     }
03296 }
03297 
03298 typedef struct {
03299     nkf_buf_t *std_gc_buf;
03300     nkf_char broken_state;
03301     nkf_buf_t *broken_buf;
03302     nkf_char mimeout_state;
03303     nkf_buf_t *nfc_buf;
03304 } nkf_state_t;
03305 
03306 static nkf_state_t *nkf_state = NULL;
03307 
03308 #define STD_GC_BUFSIZE (256)
03309 
03310 static void
03311 nkf_state_init(void)
03312 {
03313     if (nkf_state) {
03314         nkf_buf_clear(nkf_state->std_gc_buf);
03315         nkf_buf_clear(nkf_state->broken_buf);
03316         nkf_buf_clear(nkf_state->nfc_buf);
03317     }
03318     else {
03319         nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
03320         nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
03321         nkf_state->broken_buf = nkf_buf_new(3);
03322         nkf_state->nfc_buf = nkf_buf_new(9);
03323     }
03324     nkf_state->broken_state = 0;
03325     nkf_state->mimeout_state = 0;
03326 }
03327 
03328 #ifndef WIN32DLL
03329 static nkf_char
03330 std_getc(FILE *f)
03331 {
03332     if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
03333         return nkf_buf_pop(nkf_state->std_gc_buf);
03334     }
03335     return getc(f);
03336 }
03337 #endif /*WIN32DLL*/
03338 
03339 static nkf_char
03340 std_ungetc(nkf_char c, ARG_UNUSED FILE *f)
03341 {
03342     nkf_buf_push(nkf_state->std_gc_buf, c);
03343     return c;
03344 }
03345 
03346 #ifndef WIN32DLL
03347 static void
03348 std_putc(nkf_char c)
03349 {
03350     if(c!=EOF)
03351         putchar(c);
03352 }
03353 #endif /*WIN32DLL*/
03354 
03355 static nkf_char   hold_buf[HOLD_SIZE*2];
03356 static int             hold_count = 0;
03357 static nkf_char
03358 push_hold_buf(nkf_char c2)
03359 {
03360     if (hold_count >= HOLD_SIZE*2)
03361         return (EOF);
03362     hold_buf[hold_count++] = c2;
03363     return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
03364 }
03365 
03366 static int
03367 h_conv(FILE *f, nkf_char c1, nkf_char c2)
03368 {
03369     int ret;
03370     int hold_index;
03371     int fromhold_count;
03372     nkf_char c3, c4;
03373 
03378     hold_count = 0;
03379     push_hold_buf(c1);
03380     push_hold_buf(c2);
03381 
03382     while ((c2 = (*i_getc)(f)) != EOF) {
03383         if (c2 == ESC){
03384             (*i_ungetc)(c2,f);
03385             break;
03386         }
03387         code_status(c2);
03388         if (push_hold_buf(c2) == EOF || estab_f) {
03389             break;
03390         }
03391     }
03392 
03393     if (!estab_f) {
03394         struct input_code *p = input_code_list;
03395         struct input_code *result = p;
03396         if (c2 == EOF) {
03397             code_status(c2);
03398         }
03399         while (p->name) {
03400             if (p->status_func && p->score < result->score) {
03401                 result = p;
03402             }
03403             p++;
03404         }
03405         set_iconv(TRUE, result->iconv_func);
03406     }
03407 
03408 
03418     ret = c2;
03419     hold_index = 0;
03420     while (hold_index < hold_count){
03421         c1 = hold_buf[hold_index++];
03422         if (nkf_char_unicode_p(c1)) {
03423             (*oconv)(0, c1);
03424             continue;
03425         }
03426         else if (c1 <= DEL){
03427             (*iconv)(0, c1, 0);
03428             continue;
03429         }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
03430             (*iconv)(JIS_X_0201_1976_K, c1, 0);
03431             continue;
03432         }
03433         fromhold_count = 1;
03434         if (hold_index < hold_count){
03435             c2 = hold_buf[hold_index++];
03436             fromhold_count++;
03437         }else{
03438             c2 = (*i_getc)(f);
03439             if (c2 == EOF){
03440                 c4 = EOF;
03441                 break;
03442             }
03443             code_status(c2);
03444         }
03445         c3 = 0;
03446         switch ((*iconv)(c1, c2, 0)) {  /* can be EUC/SJIS/UTF-8 */
03447         case -2:
03448             /* 4 bytes UTF-8 */
03449             if (hold_index < hold_count){
03450                 c3 = hold_buf[hold_index++];
03451             } else if ((c3 = (*i_getc)(f)) == EOF) {
03452                 ret = EOF;
03453                 break;
03454             }
03455             code_status(c3);
03456             if (hold_index < hold_count){
03457                 c4 = hold_buf[hold_index++];
03458             } else if ((c4 = (*i_getc)(f)) == EOF) {
03459                 c3 = ret = EOF;
03460                 break;
03461             }
03462             code_status(c4);
03463             (*iconv)(c1, c2, (c3<<8)|c4);
03464             break;
03465         case -3:
03466             /* 4 bytes UTF-8 (check combining character) */
03467             if (hold_index < hold_count){
03468                 c3 = hold_buf[hold_index++];
03469                 fromhold_count++;
03470             } else if ((c3 = (*i_getc)(f)) == EOF) {
03471                 w_iconv_nocombine(c1, c2, 0);
03472                 break;
03473             }
03474             if (hold_index < hold_count){
03475                 c4 = hold_buf[hold_index++];
03476                 fromhold_count++;
03477             } else if ((c4 = (*i_getc)(f)) == EOF) {
03478                 w_iconv_nocombine(c1, c2, 0);
03479                 if (fromhold_count <= 2)
03480                     (*i_ungetc)(c3,f);
03481                 else
03482                     hold_index--;
03483                 continue;
03484             }
03485             if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
03486                 w_iconv_nocombine(c1, c2, 0);
03487                 if (fromhold_count <= 2) {
03488                     (*i_ungetc)(c4,f);
03489                     (*i_ungetc)(c3,f);
03490                 } else if (fromhold_count == 3) {
03491                     (*i_ungetc)(c4,f);
03492                     hold_index--;
03493                 } else {
03494                     hold_index -= 2;
03495                 }
03496             }
03497             break;
03498         case -1:
03499             /* 3 bytes EUC or UTF-8 */
03500             if (hold_index < hold_count){
03501                 c3 = hold_buf[hold_index++];
03502                 fromhold_count++;
03503             } else if ((c3 = (*i_getc)(f)) == EOF) {
03504                 ret = EOF;
03505                 break;
03506             } else {
03507                 code_status(c3);
03508             }
03509             if ((*iconv)(c1, c2, c3) == -3) {
03510                 /* 6 bytes UTF-8 (check combining character) */
03511                 nkf_char c5, c6;
03512                 if (hold_index < hold_count){
03513                     c4 = hold_buf[hold_index++];
03514                     fromhold_count++;
03515                 } else if ((c4 = (*i_getc)(f)) == EOF) {
03516                     w_iconv_nocombine(c1, c2, c3);
03517                     continue;
03518                 }
03519                 if (hold_index < hold_count){
03520                     c5 = hold_buf[hold_index++];
03521                     fromhold_count++;
03522                 } else if ((c5 = (*i_getc)(f)) == EOF) {
03523                     w_iconv_nocombine(c1, c2, c3);
03524                     if (fromhold_count == 4)
03525                         hold_index--;
03526                     else
03527                         (*i_ungetc)(c4,f);
03528                     continue;
03529                 }
03530                 if (hold_index < hold_count){
03531                     c6 = hold_buf[hold_index++];
03532                     fromhold_count++;
03533                 } else if ((c6 = (*i_getc)(f)) == EOF) {
03534                     w_iconv_nocombine(c1, c2, c3);
03535                     if (fromhold_count == 5) {
03536                         hold_index -= 2;
03537                     } else if (fromhold_count == 4) {
03538                         hold_index--;
03539                         (*i_ungetc)(c5,f);
03540                     } else {
03541                         (*i_ungetc)(c5,f);
03542                         (*i_ungetc)(c4,f);
03543                     }
03544                     continue;
03545                 }
03546                 if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
03547                     w_iconv_nocombine(c1, c2, c3);
03548                     if (fromhold_count == 6) {
03549                         hold_index -= 3;
03550                     } else if (fromhold_count == 5) {
03551                         hold_index -= 2;
03552                         (*i_ungetc)(c6,f);
03553                     } else if (fromhold_count == 4) {
03554                         hold_index--;
03555                         (*i_ungetc)(c6,f);
03556                         (*i_ungetc)(c5,f);
03557                     } else {
03558                         (*i_ungetc)(c6,f);
03559                         (*i_ungetc)(c5,f);
03560                         (*i_ungetc)(c4,f);
03561                     }
03562                 }
03563             }
03564             break;
03565         }
03566         if (c3 == EOF) break;
03567     }
03568     return ret;
03569 }
03570 
03571 /*
03572  * Check and Ignore BOM
03573  */
03574 static void
03575 check_bom(FILE *f)
03576 {
03577     int c2;
03578     switch(c2 = (*i_getc)(f)){
03579     case 0x00:
03580         if((c2 = (*i_getc)(f)) == 0x00){
03581             if((c2 = (*i_getc)(f)) == 0xFE){
03582                 if((c2 = (*i_getc)(f)) == 0xFF){
03583                     if(!input_encoding){
03584                         set_iconv(TRUE, w_iconv32);
03585                     }
03586                     if (iconv == w_iconv32) {
03587                         input_bom_f = TRUE;
03588                         input_endian = ENDIAN_BIG;
03589                         return;
03590                     }
03591                     (*i_ungetc)(0xFF,f);
03592                 }else (*i_ungetc)(c2,f);
03593                 (*i_ungetc)(0xFE,f);
03594             }else if(c2 == 0xFF){
03595                 if((c2 = (*i_getc)(f)) == 0xFE){
03596                     if(!input_encoding){
03597                         set_iconv(TRUE, w_iconv32);
03598                     }
03599                     if (iconv == w_iconv32) {
03600                         input_endian = ENDIAN_2143;
03601                         return;
03602                     }
03603                     (*i_ungetc)(0xFF,f);
03604                 }else (*i_ungetc)(c2,f);
03605                 (*i_ungetc)(0xFF,f);
03606             }else (*i_ungetc)(c2,f);
03607             (*i_ungetc)(0x00,f);
03608         }else (*i_ungetc)(c2,f);
03609         (*i_ungetc)(0x00,f);
03610         break;
03611     case 0xEF:
03612         if((c2 = (*i_getc)(f)) == 0xBB){
03613             if((c2 = (*i_getc)(f)) == 0xBF){
03614                 if(!input_encoding){
03615                     set_iconv(TRUE, w_iconv);
03616                 }
03617                 if (iconv == w_iconv) {
03618                     input_bom_f = TRUE;
03619                     return;
03620                 }
03621                 (*i_ungetc)(0xBF,f);
03622             }else (*i_ungetc)(c2,f);
03623             (*i_ungetc)(0xBB,f);
03624         }else (*i_ungetc)(c2,f);
03625         (*i_ungetc)(0xEF,f);
03626         break;
03627     case 0xFE:
03628         if((c2 = (*i_getc)(f)) == 0xFF){
03629             if((c2 = (*i_getc)(f)) == 0x00){
03630                 if((c2 = (*i_getc)(f)) == 0x00){
03631                     if(!input_encoding){
03632                         set_iconv(TRUE, w_iconv32);
03633                     }
03634                     if (iconv == w_iconv32) {
03635                         input_endian = ENDIAN_3412;
03636                         return;
03637                     }
03638                     (*i_ungetc)(0x00,f);
03639                 }else (*i_ungetc)(c2,f);
03640                 (*i_ungetc)(0x00,f);
03641             }else (*i_ungetc)(c2,f);
03642             if(!input_encoding){
03643                 set_iconv(TRUE, w_iconv16);
03644             }
03645             if (iconv == w_iconv16) {
03646                 input_endian = ENDIAN_BIG;
03647                 input_bom_f = TRUE;
03648                 return;
03649             }
03650             (*i_ungetc)(0xFF,f);
03651         }else (*i_ungetc)(c2,f);
03652         (*i_ungetc)(0xFE,f);
03653         break;
03654     case 0xFF:
03655         if((c2 = (*i_getc)(f)) == 0xFE){
03656             if((c2 = (*i_getc)(f)) == 0x00){
03657                 if((c2 = (*i_getc)(f)) == 0x00){
03658                     if(!input_encoding){
03659                         set_iconv(TRUE, w_iconv32);
03660                     }
03661                     if (iconv == w_iconv32) {
03662                         input_endian = ENDIAN_LITTLE;
03663                         input_bom_f = TRUE;
03664                         return;
03665                     }
03666                     (*i_ungetc)(0x00,f);
03667                 }else (*i_ungetc)(c2,f);
03668                 (*i_ungetc)(0x00,f);
03669             }else (*i_ungetc)(c2,f);
03670             if(!input_encoding){
03671                 set_iconv(TRUE, w_iconv16);
03672             }
03673             if (iconv == w_iconv16) {
03674                 input_endian = ENDIAN_LITTLE;
03675                 input_bom_f = TRUE;
03676                 return;
03677             }
03678             (*i_ungetc)(0xFE,f);
03679         }else (*i_ungetc)(c2,f);
03680         (*i_ungetc)(0xFF,f);
03681         break;
03682     default:
03683         (*i_ungetc)(c2,f);
03684         break;
03685     }
03686 }
03687 
03688 static nkf_char
03689 broken_getc(FILE *f)
03690 {
03691     nkf_char c, c1;
03692 
03693     if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
03694         return nkf_buf_pop(nkf_state->broken_buf);
03695     }
03696     c = (*i_bgetc)(f);
03697     if (c=='$' && nkf_state->broken_state != ESC
03698         && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
03699         c1= (*i_bgetc)(f);
03700         nkf_state->broken_state = 0;
03701         if (c1=='@'|| c1=='B') {
03702             nkf_buf_push(nkf_state->broken_buf, c1);
03703             nkf_buf_push(nkf_state->broken_buf, c);
03704             return ESC;
03705         } else {
03706             (*i_bungetc)(c1,f);
03707             return c;
03708         }
03709     } else if (c=='(' && nkf_state->broken_state != ESC
03710                && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
03711         c1= (*i_bgetc)(f);
03712         nkf_state->broken_state = 0;
03713         if (c1=='J'|| c1=='B') {
03714             nkf_buf_push(nkf_state->broken_buf, c1);
03715             nkf_buf_push(nkf_state->broken_buf, c);
03716             return ESC;
03717         } else {
03718             (*i_bungetc)(c1,f);
03719             return c;
03720         }
03721     } else {
03722         nkf_state->broken_state = c;
03723         return c;
03724     }
03725 }
03726 
03727 static nkf_char
03728 broken_ungetc(nkf_char c, ARG_UNUSED FILE *f)
03729 {
03730     if (nkf_buf_length(nkf_state->broken_buf) < 2)
03731         nkf_buf_push(nkf_state->broken_buf, c);
03732     return c;
03733 }
03734 
03735 static void
03736 eol_conv(nkf_char c2, nkf_char c1)
03737 {
03738     if (guess_f && input_eol != EOF) {
03739         if (c2 == 0 && c1 == LF) {
03740             if (!input_eol) input_eol = prev_cr ? CRLF : LF;
03741             else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
03742         } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
03743         else if (!prev_cr);
03744         else if (!input_eol) input_eol = CR;
03745         else if (input_eol != CR) input_eol = EOF;
03746     }
03747     if (prev_cr || (c2 == 0 && c1 == LF)) {
03748         prev_cr = 0;
03749         if (eolmode_f != LF) (*o_eol_conv)(0, CR);
03750         if (eolmode_f != CR) (*o_eol_conv)(0, LF);
03751     }
03752     if (c2 == 0 && c1 == CR) prev_cr = CR;
03753     else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
03754 }
03755 
03756 static void
03757 put_newline(void (*func)(nkf_char))
03758 {
03759     switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
03760       case CRLF:
03761         (*func)(0x0D);
03762         (*func)(0x0A);
03763         break;
03764       case CR:
03765         (*func)(0x0D);
03766         break;
03767       case LF:
03768         (*func)(0x0A);
03769         break;
03770     }
03771 }
03772 
03773 static void
03774 oconv_newline(void (*func)(nkf_char, nkf_char))
03775 {
03776     switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
03777       case CRLF:
03778         (*func)(0, 0x0D);
03779         (*func)(0, 0x0A);
03780         break;
03781       case CR:
03782         (*func)(0, 0x0D);
03783         break;
03784       case LF:
03785         (*func)(0, 0x0A);
03786         break;
03787     }
03788 }
03789 
03790 /*
03791    Return value of fold_conv()
03792 
03793    LF  add newline  and output char
03794    CR  add newline  and output nothing
03795    SP  space
03796    0   skip
03797    1   (or else) normal output
03798 
03799    fold state in prev (previous character)
03800 
03801    >0x80 Japanese (X0208/X0201)
03802    <0x80 ASCII
03803    LF    new line
03804    SP    space
03805 
03806    This fold algorthm does not preserve heading space in a line.
03807    This is the main difference from fmt.
03808  */
03809 
03810 #define char_size(c2,c1) (c2?2:1)
03811 
03812 static void
03813 fold_conv(nkf_char c2, nkf_char c1)
03814 {
03815     nkf_char prev0;
03816     nkf_char fold_state;
03817 
03818     if (c1== CR && !fold_preserve_f) {
03819         fold_state=0;  /* ignore cr */
03820     }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
03821         f_prev = LF;
03822         fold_state=0;  /* ignore cr */
03823     } else if (c1== BS) {
03824         if (f_line>0) f_line--;
03825         fold_state =  1;
03826     } else if (c2==EOF && f_line != 0) {    /* close open last line */
03827         fold_state = LF;
03828     } else if ((c1==LF && !fold_preserve_f)
03829                || ((c1==CR||(c1==LF&&f_prev!=CR))
03830                    && fold_preserve_f)) {
03831         /* new line */
03832         if (fold_preserve_f) {
03833             f_prev = c1;
03834             f_line = 0;
03835             fold_state =  CR;
03836         } else if ((f_prev == c1 && !fold_preserve_f)
03837                    || (f_prev == LF && fold_preserve_f)
03838                   ) {        /* duplicate newline */
03839             if (f_line) {
03840                 f_line = 0;
03841                 fold_state =  LF;    /* output two newline */
03842             } else {
03843                 f_line = 0;
03844                 fold_state =  1;
03845             }
03846         } else  {
03847             if (f_prev&0x80) {     /* Japanese? */
03848                 f_prev = c1;
03849                 fold_state =  0;       /* ignore given single newline */
03850             } else if (f_prev==SP) {
03851                 fold_state =  0;
03852             } else {
03853                 f_prev = c1;
03854                 if (++f_line<=fold_len)
03855                     fold_state =  SP;
03856                 else {
03857                     f_line = 0;
03858                     fold_state =  CR;        /* fold and output nothing */
03859                 }
03860             }
03861         }
03862     } else if (c1=='\f') {
03863         f_prev = LF;
03864         f_line = 0;
03865         fold_state =  LF;            /* output newline and clear */
03866     } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
03867         /* X0208 kankaku or ascii space */
03868         if (f_prev == SP) {
03869             fold_state = 0;         /* remove duplicate spaces */
03870         } else {
03871             f_prev = SP;
03872             if (++f_line<=fold_len)
03873                 fold_state = SP;         /* output ASCII space only */
03874             else {
03875                 f_prev = SP; f_line = 0;
03876                 fold_state = CR;        /* fold and output nothing */
03877             }
03878         }
03879     } else {
03880         prev0 = f_prev; /* we still need this one... , but almost done */
03881         f_prev = c1;
03882         if (c2 || c2 == JIS_X_0201_1976_K)
03883             f_prev |= 0x80;  /* this is Japanese */
03884         f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
03885         if (f_line<=fold_len) {   /* normal case */
03886             fold_state = 1;
03887         } else {
03888             if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
03889                 f_line = char_size(c2,c1);
03890                 fold_state =  LF;       /* We can't wait, do fold now */
03891             } else if (c2 == JIS_X_0201_1976_K) {
03892                 /* simple kinsoku rules  return 1 means no folding  */
03893                 if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/
03894                 else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/
03895                 else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/
03896                 else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/
03897                 else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/
03898                 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
03899                 else if (SP<=c1 && c1<=(0xdf&0x7f)) {      /* X0201 */
03900                     f_line = 1;
03901                     fold_state = LF;/* add one new f_line before this character */
03902                 } else {
03903                     f_line = 1;
03904                     fold_state = LF;/* add one new f_line before this character */
03905                 }
03906             } else if (c2==0) {
03907                 /* kinsoku point in ASCII */
03908                 if (  c1==')'||    /* { [ ( */
03909                     c1==']'||
03910                     c1=='}'||
03911                     c1=='.'||
03912                     c1==','||
03913                     c1=='!'||
03914                     c1=='?'||
03915                     c1=='/'||
03916                     c1==':'||
03917                     c1==';') {
03918                     fold_state = 1;
03919                     /* just after special */
03920                 } else if (!is_alnum(prev0)) {
03921                     f_line = char_size(c2,c1);
03922                     fold_state = LF;
03923                 } else if ((prev0==SP) ||   /* ignored new f_line */
03924                            (prev0==LF)||        /* ignored new f_line */
03925                            (prev0&0x80)) {        /* X0208 - ASCII */
03926                     f_line = char_size(c2,c1);
03927                     fold_state = LF;/* add one new f_line before this character */
03928                 } else {
03929                     fold_state = 1;  /* default no fold in ASCII */
03930                 }
03931             } else {
03932                 if (c2=='!') {
03933                     if (c1=='"')  fold_state = 1; /* $B!"(B */
03934                     else if (c1=='#')  fold_state = 1; /* $B!#(B */
03935                     else if (c1=='W')  fold_state = 1; /* $B!W(B */
03936                     else if (c1=='K')  fold_state = 1; /* $B!K(B */
03937                     else if (c1=='$')  fold_state = 1; /* $B!$(B */
03938                     else if (c1=='%')  fold_state = 1; /* $B!%(B */
03939                     else if (c1=='\'') fold_state = 1; /* $B!\(B */
03940                     else if (c1=='(')  fold_state = 1; /* $B!((B */
03941                     else if (c1==')')  fold_state = 1; /* $B!)(B */
03942                     else if (c1=='*')  fold_state = 1; /* $B!*(B */
03943                     else if (c1=='+')  fold_state = 1; /* $B!+(B */
03944                     else if (c1==',')  fold_state = 1; /* $B!,(B */
03945                     /* default no fold in kinsoku */
03946                     else {
03947                         fold_state = LF;
03948                         f_line = char_size(c2,c1);
03949                         /* add one new f_line before this character */
03950                     }
03951                 } else {
03952                     f_line = char_size(c2,c1);
03953                     fold_state = LF;
03954                     /* add one new f_line before this character */
03955                 }
03956             }
03957         }
03958     }
03959     /* terminator process */
03960     switch(fold_state) {
03961     case LF:
03962         oconv_newline(o_fconv);
03963         (*o_fconv)(c2,c1);
03964         break;
03965     case 0:
03966         return;
03967     case CR:
03968         oconv_newline(o_fconv);
03969         break;
03970     case TAB:
03971     case SP:
03972         (*o_fconv)(0,SP);
03973         break;
03974     default:
03975         (*o_fconv)(c2,c1);
03976     }
03977 }
03978 
03979 static nkf_char z_prev2=0,z_prev1=0;
03980 
03981 static void
03982 z_conv(nkf_char c2, nkf_char c1)
03983 {
03984 
03985     /* if (c2) c1 &= 0x7f; assertion */
03986 
03987     if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
03988         (*o_zconv)(c2,c1);
03989         return;
03990     }
03991 
03992     if (x0201_f) {
03993         if (z_prev2 == JIS_X_0201_1976_K) {
03994             if (c2 == JIS_X_0201_1976_K) {
03995                 if (c1 == (0xde&0x7f)) { /* $BByE@(B */
03996                     z_prev2 = 0;
03997                     (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
03998                     return;
03999                 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) {  /* $BH>ByE@(B */
04000                     z_prev2 = 0;
04001                     (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
04002                     return;
04003                 } else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) {  /* $BH>ByE@(B */
04004                     z_prev2 = 0;
04005                     (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]);
04006                     return;
04007                 }
04008             }
04009             z_prev2 = 0;
04010             (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
04011         }
04012         if (c2 == JIS_X_0201_1976_K) {
04013             if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) {
04014                 /* wait for $BByE@(B or $BH>ByE@(B */
04015                 z_prev1 = c1;
04016                 z_prev2 = c2;
04017                 return;
04018             } else {
04019                 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
04020                 return;
04021             }
04022         }
04023     }
04024 
04025     if (c2 == EOF) {
04026         (*o_zconv)(c2, c1);
04027         return;
04028     }
04029 
04030     if (alpha_f&1 && c2 == 0x23) {
04031         /* JISX0208 Alphabet */
04032         c2 = 0;
04033     } else if (c2 == 0x21) {
04034         /* JISX0208 Kigou */
04035         if (0x21==c1) {
04036             if (alpha_f&2) {
04037                 c2 = 0;
04038                 c1 = SP;
04039             } else if (alpha_f&4) {
04040                 (*o_zconv)(0, SP);
04041                 (*o_zconv)(0, SP);
04042                 return;
04043             }
04044         } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
04045             c2 =  0;
04046             c1 = fv[c1-0x20];
04047         }
04048     }
04049 
04050     if (alpha_f&8 && c2 == 0) {
04051         /* HTML Entity */
04052         const char *entity = 0;
04053         switch (c1){
04054         case '>': entity = "&gt;"; break;
04055         case '<': entity = "&lt;"; break;
04056         case '\"': entity = "&quot;"; break;
04057         case '&': entity = "&amp;"; break;
04058         }
04059         if (entity){
04060             while (*entity) (*o_zconv)(0, *entity++);
04061             return;
04062         }
04063     }
04064 
04065     if (alpha_f & 16) {
04066         /* JIS X 0208 Katakana to JIS X 0201 Katakana */
04067         if (c2 == 0x21) {
04068             nkf_char c = 0;
04069             switch (c1) {
04070             case 0x23:
04071                 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
04072                 c = 0xA1;
04073                 break;
04074             case 0x56:
04075                 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
04076                 c = 0xA2;
04077                 break;
04078             case 0x57:
04079                 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
04080                 c = 0xA3;
04081                 break;
04082             case 0x22:
04083                 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
04084                 c = 0xA4;
04085                 break;
04086             case 0x26:
04087                 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
04088                 c = 0xA5;
04089                 break;
04090             case 0x3C:
04091                 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
04092                 c = 0xB0;
04093                 break;
04094             case 0x2B:
04095                 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
04096                 c = 0xDE;
04097                 break;
04098             case 0x2C:
04099                 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
04100                 c = 0xDF;
04101                 break;
04102             }
04103             if (c) {
04104                 (*o_zconv)(JIS_X_0201_1976_K, c);
04105                 return;
04106             }
04107         } else if (c2 == 0x25) {
04108             /* JISX0208 Katakana */
04109             static const int fullwidth_to_halfwidth[] =
04110             {
04111                 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
04112                 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
04113                 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
04114                 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
04115                 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
04116                 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
04117                 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
04118                 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
04119                 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
04120                 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
04121                 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
04122                 0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
04123             };
04124             if (fullwidth_to_halfwidth[c1-0x20]){
04125                 c2 = fullwidth_to_halfwidth[c1-0x20];
04126                 (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
04127                 if (c2 & 0xFF) {
04128                     (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
04129                 }
04130                 return;
04131             }
04132         } else if (c2 == 0 && nkf_char_unicode_p(c1) &&
04133             ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* $B9g@.MQByE@!&H>ByE@(B */
04134             (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099);
04135             return;
04136         }
04137     }
04138     (*o_zconv)(c2,c1);
04139 }
04140 
04141 
04142 #define rot13(c)  ( \
04143                    ( c < 'A') ? c: \
04144                    (c <= 'M')  ? (c + 13): \
04145                    (c <= 'Z')  ? (c - 13): \
04146                    (c < 'a')   ? (c): \
04147                    (c <= 'm')  ? (c + 13): \
04148                    (c <= 'z')  ? (c - 13): \
04149                    (c) \
04150                   )
04151 
04152 #define  rot47(c) ( \
04153                    ( c < '!') ? c: \
04154                    ( c <= 'O') ? (c + 47) : \
04155                    ( c <= '~') ?  (c - 47) : \
04156                    c \
04157                   )
04158 
04159 static void
04160 rot_conv(nkf_char c2, nkf_char c1)
04161 {
04162     if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
04163         c1 = rot13(c1);
04164     } else if (c2) {
04165         c1 = rot47(c1);
04166         c2 = rot47(c2);
04167     }
04168     (*o_rot_conv)(c2,c1);
04169 }
04170 
04171 static void
04172 hira_conv(nkf_char c2, nkf_char c1)
04173 {
04174     if (hira_f & 1) {
04175         if (c2 == 0x25) {
04176             if (0x20 < c1 && c1 < 0x74) {
04177                 c2 = 0x24;
04178                 (*o_hira_conv)(c2,c1);
04179                 return;
04180             } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
04181                 c2 = 0;
04182                 c1 = nkf_char_unicode_new(0x3094);
04183                 (*o_hira_conv)(c2,c1);
04184                 return;
04185             }
04186         } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
04187             c1 += 2;
04188             (*o_hira_conv)(c2,c1);
04189             return;
04190         }
04191     }
04192     if (hira_f & 2) {
04193         if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
04194             c2 = 0x25;
04195             c1 = 0x74;
04196         } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
04197             c2 = 0x25;
04198         } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
04199             c1 -= 2;
04200         }
04201     }
04202     (*o_hira_conv)(c2,c1);
04203 }
04204 
04205 
04206 static void
04207 iso2022jp_check_conv(nkf_char c2, nkf_char c1)
04208 {
04209 #define RANGE_NUM_MAX 18
04210     static const nkf_char range[RANGE_NUM_MAX][2] = {
04211         {0x222f, 0x2239,},
04212         {0x2242, 0x2249,},
04213         {0x2251, 0x225b,},
04214         {0x226b, 0x2271,},
04215         {0x227a, 0x227d,},
04216         {0x2321, 0x232f,},
04217         {0x233a, 0x2340,},
04218         {0x235b, 0x2360,},
04219         {0x237b, 0x237e,},
04220         {0x2474, 0x247e,},
04221         {0x2577, 0x257e,},
04222         {0x2639, 0x2640,},
04223         {0x2659, 0x267e,},
04224         {0x2742, 0x2750,},
04225         {0x2772, 0x277e,},
04226         {0x2841, 0x287e,},
04227         {0x4f54, 0x4f7e,},
04228         {0x7425, 0x747e},
04229     };
04230     nkf_char i;
04231     nkf_char start, end, c;
04232 
04233     if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
04234         c2 = GETA1;
04235         c1 = GETA2;
04236     }
04237     if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
04238         c2 = GETA1;
04239         c1 = GETA2;
04240     }
04241 
04242     for (i = 0; i < RANGE_NUM_MAX; i++) {
04243         start = range[i][0];
04244         end   = range[i][1];
04245         c     = (c2 << 8) + c1;
04246         if (c >= start && c <= end) {
04247             c2 = GETA1;
04248             c1 = GETA2;
04249         }
04250     }
04251     (*o_iso2022jp_check_conv)(c2,c1);
04252 }
04253 
04254 
04255 /* This converts  =?ISO-2022-JP?B?HOGE HOGE?= */
04256 
04257 static const unsigned char *mime_pattern[] = {
04258     (const unsigned char *)"\075?EUC-JP?B?",
04259     (const unsigned char *)"\075?SHIFT_JIS?B?",
04260     (const unsigned char *)"\075?ISO-8859-1?Q?",
04261     (const unsigned char *)"\075?ISO-8859-1?B?",
04262     (const unsigned char *)"\075?ISO-2022-JP?B?",
04263     (const unsigned char *)"\075?ISO-2022-JP?B?",
04264     (const unsigned char *)"\075?ISO-2022-JP?Q?",
04265 #if defined(UTF8_INPUT_ENABLE)
04266     (const unsigned char *)"\075?UTF-8?B?",
04267     (const unsigned char *)"\075?UTF-8?Q?",
04268 #endif
04269     (const unsigned char *)"\075?US-ASCII?Q?",
04270     NULL
04271 };
04272 
04273 
04274 /* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
04275 nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
04276     e_iconv, s_iconv, 0, 0, 0, 0, 0,
04277 #if defined(UTF8_INPUT_ENABLE)
04278     w_iconv, w_iconv,
04279 #endif
04280     0,
04281 };
04282 
04283 static const nkf_char mime_encode[] = {
04284     EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
04285 #if defined(UTF8_INPUT_ENABLE)
04286     UTF_8, UTF_8,
04287 #endif
04288     ASCII,
04289     0
04290 };
04291 
04292 static const nkf_char mime_encode_method[] = {
04293     'B', 'B','Q', 'B', 'B', 'B', 'Q',
04294 #if defined(UTF8_INPUT_ENABLE)
04295     'B', 'Q',
04296 #endif
04297     'Q',
04298     0
04299 };
04300 
04301 
04302 /* MIME preprocessor fifo */
04303 
04304 #define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
04305 #define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
04306 #define mime_input_buf(n)        mime_input_state.buf[(n)&MIME_BUF_MASK]
04307 static struct {
04308     unsigned char buf[MIME_BUF_SIZE];
04309     unsigned int  top;
04310     unsigned int  last;  /* decoded */
04311     unsigned int  input; /* undecoded */
04312 } mime_input_state;
04313 static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
04314 
04315 #define MAXRECOVER 20
04316 
04317 static void
04318 mime_input_buf_unshift(nkf_char c)
04319 {
04320     mime_input_buf(--mime_input_state.top) = (unsigned char)c;
04321 }
04322 
04323 static nkf_char
04324 mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
04325 {
04326     mime_input_buf_unshift(c);
04327     return c;
04328 }
04329 
04330 static nkf_char
04331 mime_ungetc_buf(nkf_char c, FILE *f)
04332 {
04333     if (mimebuf_f)
04334         (*i_mungetc_buf)(c,f);
04335     else
04336         mime_input_buf(--mime_input_state.input) = (unsigned char)c;
04337     return c;
04338 }
04339 
04340 static nkf_char
04341 mime_getc_buf(FILE *f)
04342 {
04343     /* we don't keep eof of mime_input_buf, becase it contains ?= as
04344        a terminator. It was checked in mime_integrity. */
04345     return ((mimebuf_f)?
04346             (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
04347 }
04348 
04349 static void
04350 switch_mime_getc(void)
04351 {
04352     if (i_getc!=mime_getc) {
04353         i_mgetc = i_getc; i_getc = mime_getc;
04354         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
04355         if(mime_f==STRICT_MIME) {
04356             i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
04357             i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
04358         }
04359     }
04360 }
04361 
04362 static void
04363 unswitch_mime_getc(void)
04364 {
04365     if(mime_f==STRICT_MIME) {
04366         i_mgetc = i_mgetc_buf;
04367         i_mungetc = i_mungetc_buf;
04368     }
04369     i_getc = i_mgetc;
04370     i_ungetc = i_mungetc;
04371     if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
04372     mime_iconv_back = NULL;
04373 }
04374 
04375 static nkf_char
04376 mime_integrity(FILE *f, const unsigned char *p)
04377 {
04378     nkf_char c,d;
04379     unsigned int q;
04380     /* In buffered mode, read until =? or NL or buffer full
04381      */
04382     mime_input_state.input = mime_input_state.top;
04383     mime_input_state.last = mime_input_state.top;
04384 
04385     while(*p) mime_input_buf(mime_input_state.input++) = *p++;
04386     d = 0;
04387     q = mime_input_state.input;
04388     while((c=(*i_getc)(f))!=EOF) {
04389         if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
04390             break;   /* buffer full */
04391         }
04392         if (c=='=' && d=='?') {
04393             /* checked. skip header, start decode */
04394             mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04395             /* mime_last_input = mime_input_state.input; */
04396             mime_input_state.input = q;
04397             switch_mime_getc();
04398             return 1;
04399         }
04400         if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
04401             break;
04402         /* Should we check length mod 4? */
04403         mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04404         d=c;
04405     }
04406     /* In case of Incomplete MIME, no MIME decode  */
04407     mime_input_buf(mime_input_state.input++) = (unsigned char)c;
04408     mime_input_state.last = mime_input_state.input;     /* point undecoded buffer */
04409     mime_decode_mode = 1;              /* no decode on mime_input_buf last in mime_getc */
04410     switch_mime_getc();         /* anyway we need buffered getc */
04411     return 1;
04412 }
04413 
04414 static nkf_char
04415 mime_begin_strict(FILE *f)
04416 {
04417     nkf_char c1 = 0;
04418     int i,j,k;
04419     const unsigned char *p,*q;
04420     nkf_char r[MAXRECOVER];    /* recovery buffer, max mime pattern length */
04421 
04422     mime_decode_mode = FALSE;
04423     /* =? has been checked */
04424     j = 0;
04425     p = mime_pattern[j];
04426     r[0]='='; r[1]='?';
04427 
04428     for(i=2;p[i]>SP;i++) {                   /* start at =? */
04429         if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
04430             /* pattern fails, try next one */
04431             q = p;
04432             while (mime_pattern[++j]) {
04433                 p = mime_pattern[j];
04434                 for(k=2;k<i;k++)              /* assume length(p) > i */
04435                     if (p[k]!=q[k]) break;
04436                 if (k==i && nkf_toupper(c1)==p[k]) break;
04437             }
04438             p = mime_pattern[j];
04439             if (p) continue;  /* found next one, continue */
04440             /* all fails, output from recovery buffer */
04441             (*i_ungetc)(c1,f);
04442             for(j=0;j<i;j++) {
04443                 (*oconv)(0,r[j]);
04444             }
04445             return c1;
04446         }
04447     }
04448     mime_decode_mode = p[i-2];
04449 
04450     mime_iconv_back = iconv;
04451     set_iconv(FALSE, mime_priority_func[j]);
04452     clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
04453 
04454     if (mime_decode_mode=='B') {
04455         mimebuf_f = unbuf_f;
04456         if (!unbuf_f) {
04457             /* do MIME integrity check */
04458             return mime_integrity(f,mime_pattern[j]);
04459         }
04460     }
04461     switch_mime_getc();
04462     mimebuf_f = TRUE;
04463     return c1;
04464 }
04465 
04466 static nkf_char
04467 mime_begin(FILE *f)
04468 {
04469     nkf_char c1 = 0;
04470     int i,k;
04471 
04472     /* In NONSTRICT mode, only =? is checked. In case of failure, we  */
04473     /* re-read and convert again from mime_buffer.  */
04474 
04475     /* =? has been checked */
04476     k = mime_input_state.last;
04477     mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
04478     for(i=2;i<MAXRECOVER;i++) {                   /* start at =? */
04479         /* We accept any character type even if it is breaked by new lines */
04480         c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04481         if (c1==LF||c1==SP||c1==CR||
04482             c1=='-'||c1=='_'||is_alnum(c1)) continue;
04483         if (c1=='=') {
04484             /* Failed. But this could be another MIME preemble */
04485             (*i_ungetc)(c1,f);
04486             mime_input_state.last--;
04487             break;
04488         }
04489         if (c1!='?') break;
04490         else {
04491             /* c1=='?' */
04492             c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04493             if (!(++i<MAXRECOVER) || c1==EOF) break;
04494             if (c1=='b'||c1=='B') {
04495                 mime_decode_mode = 'B';
04496             } else if (c1=='q'||c1=='Q') {
04497                 mime_decode_mode = 'Q';
04498             } else {
04499                 break;
04500             }
04501             c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
04502             if (!(++i<MAXRECOVER) || c1==EOF) break;
04503             if (c1!='?') {
04504                 mime_decode_mode = FALSE;
04505             }
04506             break;
04507         }
04508     }
04509     switch_mime_getc();
04510     if (!mime_decode_mode) {
04511         /* false MIME premble, restart from mime_buffer */
04512         mime_decode_mode = 1;  /* no decode, but read from the mime_buffer */
04513         /* Since we are in MIME mode until buffer becomes empty,    */
04514         /* we never go into mime_begin again for a while.           */
04515         return c1;
04516     }
04517     /* discard mime preemble, and goto MIME mode */
04518     mime_input_state.last = k;
04519     /* do no MIME integrity check */
04520     return c1;   /* used only for checking EOF */
04521 }
04522 
04523 #ifdef CHECK_OPTION
04524 static void
04525 no_putc(ARG_UNUSED nkf_char c)
04526 {
04527     ;
04528 }
04529 
04530 static void
04531 debug(const char *str)
04532 {
04533     if (debug_f){
04534         fprintf(stderr, "%s\n", str ? str : "NULL");
04535     }
04536 }
04537 #endif
04538 
04539 static void
04540 set_input_codename(const char *codename)
04541 {
04542     if (!input_codename) {
04543         input_codename = codename;
04544     } else if (strcmp(codename, input_codename) != 0) {
04545         input_codename = "";
04546     }
04547 }
04548 
04549 static const char*
04550 get_guessed_code(void)
04551 {
04552     if (input_codename && !*input_codename) {
04553         input_codename = "BINARY";
04554     } else {
04555         struct input_code *p = find_inputcode_byfunc(iconv);
04556         if (!input_codename) {
04557             input_codename = "ASCII";
04558         } else if (strcmp(input_codename, "Shift_JIS") == 0) {
04559             if (p->score & (SCORE_DEPEND|SCORE_CP932))
04560                 input_codename = "CP932";
04561         } else if (strcmp(input_codename, "EUC-JP") == 0) {
04562             if (p->score & SCORE_X0213)
04563                 input_codename = "EUC-JIS-2004";
04564             else if (p->score & (SCORE_X0212))
04565                 input_codename = "EUCJP-MS";
04566             else if (p->score & (SCORE_DEPEND|SCORE_CP932))
04567                 input_codename = "CP51932";
04568         } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
04569             if (p->score & (SCORE_KANA))
04570                 input_codename = "CP50221";
04571             else if (p->score & (SCORE_DEPEND|SCORE_CP932))
04572                 input_codename = "CP50220";
04573         }
04574     }
04575     return input_codename;
04576 }
04577 
04578 #if !defined(PERL_XS) && !defined(WIN32DLL)
04579 static void
04580 print_guessed_code(char *filename)
04581 {
04582     if (filename != NULL) printf("%s: ", filename);
04583     if (input_codename && !*input_codename) {
04584         printf("BINARY\n");
04585     } else {
04586         input_codename = get_guessed_code();
04587         if (guess_f == 1) {
04588             printf("%s\n", input_codename);
04589         } else {
04590             printf("%s%s%s%s\n",
04591                    input_codename,
04592                    iconv != w_iconv16 && iconv != w_iconv32 ? "" :
04593                    input_endian == ENDIAN_LITTLE ? " LE" :
04594                    input_endian == ENDIAN_BIG ? " BE" :
04595                    "[BUG]",
04596                    input_bom_f ? " (BOM)" : "",
04597                    input_eol == CR   ? " (CR)" :
04598                    input_eol == LF   ? " (LF)" :
04599                    input_eol == CRLF ? " (CRLF)" :
04600                    input_eol == EOF  ? " (MIXED NL)" :
04601                    "");
04602         }
04603     }
04604 }
04605 #endif /*WIN32DLL*/
04606 
04607 #ifdef INPUT_OPTION
04608 
04609 static nkf_char
04610 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
04611 {
04612     nkf_char c1, c2, c3;
04613     c1 = (*g)(f);
04614     if (c1 != ch){
04615         return c1;
04616     }
04617     c2 = (*g)(f);
04618     if (!nkf_isxdigit(c2)){
04619         (*u)(c2, f);
04620         return c1;
04621     }
04622     c3 = (*g)(f);
04623     if (!nkf_isxdigit(c3)){
04624         (*u)(c2, f);
04625         (*u)(c3, f);
04626         return c1;
04627     }
04628     return (hex2bin(c2) << 4) | hex2bin(c3);
04629 }
04630 
04631 static nkf_char
04632 cap_getc(FILE *f)
04633 {
04634     return hex_getc(':', f, i_cgetc, i_cungetc);
04635 }
04636 
04637 static nkf_char
04638 cap_ungetc(nkf_char c, FILE *f)
04639 {
04640     return (*i_cungetc)(c, f);
04641 }
04642 
04643 static nkf_char
04644 url_getc(FILE *f)
04645 {
04646     return hex_getc('%', f, i_ugetc, i_uungetc);
04647 }
04648 
04649 static nkf_char
04650 url_ungetc(nkf_char c, FILE *f)
04651 {
04652     return (*i_uungetc)(c, f);
04653 }
04654 #endif
04655 
04656 #ifdef NUMCHAR_OPTION
04657 static nkf_char
04658 numchar_getc(FILE *f)
04659 {
04660     nkf_char (*g)(FILE *) = i_ngetc;
04661     nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
04662     int i = 0, j;
04663     nkf_char buf[12];
04664     nkf_char c = -1;
04665 
04666     buf[i] = (*g)(f);
04667     if (buf[i] == '&'){
04668         buf[++i] = (*g)(f);
04669         if (buf[i] == '#'){
04670             c = 0;
04671             buf[++i] = (*g)(f);
04672             if (buf[i] == 'x' || buf[i] == 'X'){
04673                 for (j = 0; j < 7; j++){
04674                     buf[++i] = (*g)(f);
04675                     if (!nkf_isxdigit(buf[i])){
04676                         if (buf[i] != ';'){
04677                             c = -1;
04678                         }
04679                         break;
04680                     }
04681                     c <<= 4;
04682                     c |= hex2bin(buf[i]);
04683                 }
04684             }else{
04685                 for (j = 0; j < 8; j++){
04686                     if (j){
04687                         buf[++i] = (*g)(f);
04688                     }
04689                     if (!nkf_isdigit(buf[i])){
04690                         if (buf[i] != ';'){
04691                             c = -1;
04692                         }
04693                         break;
04694                     }
04695                     c *= 10;
04696                     c += hex2bin(buf[i]);
04697                 }
04698             }
04699         }
04700     }
04701     if (c != -1){
04702         return nkf_char_unicode_new(c);
04703     }
04704     while (i > 0){
04705         (*u)(buf[i], f);
04706         --i;
04707     }
04708     return buf[0];
04709 }
04710 
04711 static nkf_char
04712 numchar_ungetc(nkf_char c, FILE *f)
04713 {
04714     return (*i_nungetc)(c, f);
04715 }
04716 #endif
04717 
04718 #ifdef UNICODE_NORMALIZATION
04719 
04720 static nkf_char
04721 nfc_getc(FILE *f)
04722 {
04723     nkf_char (*g)(FILE *f) = i_nfc_getc;
04724     nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
04725     nkf_buf_t *buf = nkf_state->nfc_buf;
04726     const unsigned char *array;
04727     int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
04728     nkf_char c = (*g)(f);
04729 
04730     if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
04731 
04732     nkf_buf_push(buf, c);
04733     do {
04734         while (lower <= upper) {
04735             int mid = (lower+upper) / 2;
04736             int len;
04737             array = normalization_table[mid].nfd;
04738             for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
04739                 if (len >= nkf_buf_length(buf)) {
04740                     c = (*g)(f);
04741                     if (c == EOF) {
04742                         len = 0;
04743                         lower = 1, upper = 0;
04744                         break;
04745                     }
04746                     nkf_buf_push(buf, c);
04747                 }
04748                 if (array[len] != nkf_buf_at(buf, len)) {
04749                     if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
04750                     else  upper = mid - 1;
04751                     len = 0;
04752                     break;
04753                 }
04754             }
04755             if (len > 0) {
04756                 int i;
04757                 array = normalization_table[mid].nfc;
04758                 nkf_buf_clear(buf);
04759                 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
04760                     nkf_buf_push(buf, array[i]);
04761                 break;
04762             }
04763         }
04764     } while (lower <= upper);
04765 
04766     while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
04767     c = nkf_buf_pop(buf);
04768 
04769     return c;
04770 }
04771 
04772 static nkf_char
04773 nfc_ungetc(nkf_char c, FILE *f)
04774 {
04775     return (*i_nfc_ungetc)(c, f);
04776 }
04777 #endif /* UNICODE_NORMALIZATION */
04778 
04779 
04780 static nkf_char
04781 base64decode(nkf_char c)
04782 {
04783     int             i;
04784     if (c > '@') {
04785         if (c < '[') {
04786             i = c - 'A';                        /* A..Z 0-25 */
04787         } else if (c == '_') {
04788             i = '?'         /* 63 */ ;          /* _  63 */
04789         } else {
04790             i = c - 'G'     /* - 'a' + 26 */ ;  /* a..z 26-51 */
04791         }
04792     } else if (c > '/') {
04793         i = c - '0' + '4'   /* - '0' + 52 */ ;  /* 0..9 52-61 */
04794     } else if (c == '+' || c == '-') {
04795         i = '>'             /* 62 */ ;          /* + and -  62 */
04796     } else {
04797         i = '?'             /* 63 */ ;          /* / 63 */
04798     }
04799     return (i);
04800 }
04801 
04802 static nkf_char
04803 mime_getc(FILE *f)
04804 {
04805     nkf_char c1, c2, c3, c4, cc;
04806     nkf_char t1, t2, t3, t4, mode, exit_mode;
04807     nkf_char lwsp_count;
04808     char *lwsp_buf;
04809     char *lwsp_buf_new;
04810     nkf_char lwsp_size = 128;
04811 
04812     if (mime_input_state.top != mime_input_state.last) {  /* Something is in FIFO */
04813         return  mime_input_buf(mime_input_state.top++);
04814     }
04815     if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
04816         mime_decode_mode=FALSE;
04817         unswitch_mime_getc();
04818         return (*i_getc)(f);
04819     }
04820 
04821     if (mimebuf_f == FIXED_MIME)
04822         exit_mode = mime_decode_mode;
04823     else
04824         exit_mode = FALSE;
04825     if (mime_decode_mode == 'Q') {
04826         if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
04827       restart_mime_q:
04828         if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
04829         if (c1<=SP || DEL<=c1) {
04830             mime_decode_mode = exit_mode; /* prepare for quit */
04831             return c1;
04832         }
04833         if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
04834             return c1;
04835         }
04836 
04837         mime_decode_mode = exit_mode; /* prepare for quit */
04838         if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
04839         if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
04840             /* end Q encoding */
04841             input_mode = exit_mode;
04842             lwsp_count = 0;
04843             lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
04844             while ((c1=(*i_getc)(f))!=EOF) {
04845                 switch (c1) {
04846                 case LF:
04847                 case CR:
04848                     if (c1==LF) {
04849                         if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04850                             i_ungetc(SP,f);
04851                             continue;
04852                         } else {
04853                             i_ungetc(c1,f);
04854                         }
04855                         c1 = LF;
04856                     } else {
04857                         if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
04858                             if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04859                                 i_ungetc(SP,f);
04860                                 continue;
04861                             } else {
04862                                 i_ungetc(c1,f);
04863                             }
04864                             i_ungetc(LF,f);
04865                         } else {
04866                             i_ungetc(c1,f);
04867                         }
04868                         c1 = CR;
04869                     }
04870                     break;
04871                 case SP:
04872                 case TAB:
04873                     lwsp_buf[lwsp_count] = (unsigned char)c1;
04874                     if (lwsp_count++>lwsp_size){
04875                         lwsp_size <<= 1;
04876                         lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
04877                         lwsp_buf = lwsp_buf_new;
04878                     }
04879                     continue;
04880                 }
04881                 break;
04882             }
04883             if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
04884                 i_ungetc(c1,f);
04885                 for(lwsp_count--;lwsp_count>0;lwsp_count--)
04886                     i_ungetc(lwsp_buf[lwsp_count],f);
04887                 c1 = lwsp_buf[0];
04888             }
04889             nkf_xfree(lwsp_buf);
04890             return c1;
04891         }
04892         if (c1=='='&&c2<SP) { /* this is soft wrap */
04893             while((c1 =  (*i_mgetc)(f)) <=SP) {
04894                 if (c1 == EOF) return (EOF);
04895             }
04896             mime_decode_mode = 'Q'; /* still in MIME */
04897             goto restart_mime_q;
04898         }
04899         if (c1=='?') {
04900             mime_decode_mode = 'Q'; /* still in MIME */
04901             (*i_mungetc)(c2,f);
04902             return c1;
04903         }
04904         if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
04905         if (c2<=SP) return c2;
04906         mime_decode_mode = 'Q'; /* still in MIME */
04907         return ((hex2bin(c2)<<4) + hex2bin(c3));
04908     }
04909 
04910     if (mime_decode_mode != 'B') {
04911         mime_decode_mode = FALSE;
04912         return (*i_mgetc)(f);
04913     }
04914 
04915 
04916     /* Base64 encoding */
04917     /*
04918        MIME allows line break in the middle of
04919        Base64, but we are very pessimistic in decoding
04920        in unbuf mode because MIME encoded code may broken by
04921        less or editor's control sequence (such as ESC-[-K in unbuffered
04922        mode. ignore incomplete MIME.
04923      */
04924     mode = mime_decode_mode;
04925     mime_decode_mode = exit_mode;  /* prepare for quit */
04926 
04927     while ((c1 = (*i_mgetc)(f))<=SP) {
04928         if (c1==EOF)
04929             return (EOF);
04930     }
04931   mime_c2_retry:
04932     if ((c2 = (*i_mgetc)(f))<=SP) {
04933         if (c2==EOF)
04934             return (EOF);
04935         if (mime_f != STRICT_MIME) goto mime_c2_retry;
04936         if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
04937         return c2;
04938     }
04939     if ((c1 == '?') && (c2 == '=')) {
04940         input_mode = ASCII;
04941         lwsp_count = 0;
04942         lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
04943         while ((c1=(*i_getc)(f))!=EOF) {
04944             switch (c1) {
04945             case LF:
04946             case CR:
04947                 if (c1==LF) {
04948                     if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04949                         i_ungetc(SP,f);
04950                         continue;
04951                     } else {
04952                         i_ungetc(c1,f);
04953                     }
04954                     c1 = LF;
04955                 } else {
04956                     if ((c1=(*i_getc)(f))!=EOF) {
04957                         if (c1==SP) {
04958                             i_ungetc(SP,f);
04959                             continue;
04960                         } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
04961                             i_ungetc(SP,f);
04962                             continue;
04963                         } else {
04964                             i_ungetc(c1,f);
04965                         }
04966                         i_ungetc(LF,f);
04967                     } else {
04968                         i_ungetc(c1,f);
04969                     }
04970                     c1 = CR;
04971                 }
04972                 break;
04973             case SP:
04974             case TAB:
04975                 lwsp_buf[lwsp_count] = (unsigned char)c1;
04976                 if (lwsp_count++>lwsp_size){
04977                     lwsp_size <<= 1;
04978                     lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
04979                     lwsp_buf = lwsp_buf_new;
04980                 }
04981                 continue;
04982             }
04983             break;
04984         }
04985         if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
04986             i_ungetc(c1,f);
04987             for(lwsp_count--;lwsp_count>0;lwsp_count--)
04988                 i_ungetc(lwsp_buf[lwsp_count],f);
04989             c1 = lwsp_buf[0];
04990         }
04991         nkf_xfree(lwsp_buf);
04992         return c1;
04993     }
04994   mime_c3_retry:
04995     if ((c3 = (*i_mgetc)(f))<=SP) {
04996         if (c3==EOF)
04997             return (EOF);
04998         if (mime_f != STRICT_MIME) goto mime_c3_retry;
04999         if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
05000         return c3;
05001     }
05002   mime_c4_retry:
05003     if ((c4 = (*i_mgetc)(f))<=SP) {
05004         if (c4==EOF)
05005             return (EOF);
05006         if (mime_f != STRICT_MIME) goto mime_c4_retry;
05007         if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
05008         return c4;
05009     }
05010 
05011     mime_decode_mode = mode; /* still in MIME sigh... */
05012 
05013     /* BASE 64 decoding */
05014 
05015     t1 = 0x3f & base64decode(c1);
05016     t2 = 0x3f & base64decode(c2);
05017     t3 = 0x3f & base64decode(c3);
05018     t4 = 0x3f & base64decode(c4);
05019     cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
05020     if (c2 != '=') {
05021         mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
05022         cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
05023         if (c3 != '=') {
05024             mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
05025             cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
05026             if (c4 != '=')
05027                 mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
05028         }
05029     } else {
05030         return c1;
05031     }
05032     return  mime_input_buf(mime_input_state.top++);
05033 }
05034 
05035 static const char basis_64[] =
05036     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
05037 
05038 #define MIMEOUT_BUF_LENGTH 74
05039 static struct {
05040     unsigned char buf[MIMEOUT_BUF_LENGTH+1];
05041     int count;
05042 } mimeout_state;
05043 
05044 /*nkf_char mime_lastchar2, mime_lastchar1;*/
05045 
05046 static void
05047 open_mime(nkf_char mode)
05048 {
05049     const unsigned char *p;
05050     int i;
05051     int j;
05052     p  = mime_pattern[0];
05053     for(i=0;mime_pattern[i];i++) {
05054         if (mode == mime_encode[i]) {
05055             p = mime_pattern[i];
05056             break;
05057         }
05058     }
05059     mimeout_mode = mime_encode_method[i];
05060     i = 0;
05061     if (base64_count>45) {
05062         if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
05063             (*o_mputc)(mimeout_state.buf[i]);
05064             i++;
05065         }
05066         put_newline(o_mputc);
05067         (*o_mputc)(SP);
05068         base64_count = 1;
05069         if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
05070             i++;
05071         }
05072     }
05073     for (;i<mimeout_state.count;i++) {
05074         if (nkf_isspace(mimeout_state.buf[i])) {
05075             (*o_mputc)(mimeout_state.buf[i]);
05076             base64_count ++;
05077         } else {
05078             break;
05079         }
05080     }
05081     while(*p) {
05082         (*o_mputc)(*p++);
05083         base64_count ++;
05084     }
05085     j = mimeout_state.count;
05086     mimeout_state.count = 0;
05087     for (;i<j;i++) {
05088         mime_putc(mimeout_state.buf[i]);
05089     }
05090 }
05091 
05092 static void
05093 mime_prechar(nkf_char c2, nkf_char c1)
05094 {
05095     if (mimeout_mode > 0){
05096         if (c2 == EOF){
05097             if (base64_count + mimeout_state.count/3*4> 73){
05098                 (*o_base64conv)(EOF,0);
05099                 oconv_newline(o_base64conv);
05100                 (*o_base64conv)(0,SP);
05101                 base64_count = 1;
05102             }
05103         } else {
05104             if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
05105                 (*o_base64conv)(EOF,0);
05106                 oconv_newline(o_base64conv);
05107                 (*o_base64conv)(0,SP);
05108                 base64_count = 1;
05109                 mimeout_mode = -1;
05110             }
05111         }
05112     } else if (c2) {
05113         if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
05114             mimeout_mode =  (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
05115             open_mime(output_mode);
05116             (*o_base64conv)(EOF,0);
05117             oconv_newline(o_base64conv);
05118             (*o_base64conv)(0,SP);
05119             base64_count = 1;
05120             mimeout_mode = -1;
05121         }
05122     }
05123 }
05124 
05125 static void
05126 close_mime(void)
05127 {
05128     (*o_mputc)('?');
05129     (*o_mputc)('=');
05130     base64_count += 2;
05131     mimeout_mode = 0;
05132 }
05133 
05134 static void
05135 eof_mime(void)
05136 {
05137     switch(mimeout_mode) {
05138     case 'Q':
05139     case 'B':
05140         break;
05141     case 2:
05142         (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
05143         (*o_mputc)('=');
05144         (*o_mputc)('=');
05145         base64_count += 3;
05146         break;
05147     case 1:
05148         (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
05149         (*o_mputc)('=');
05150         base64_count += 2;
05151         break;
05152     }
05153     if (mimeout_mode > 0) {
05154         if (mimeout_f!=FIXED_MIME) {
05155             close_mime();
05156         } else if (mimeout_mode != 'Q')
05157             mimeout_mode = 'B';
05158     }
05159 }
05160 
05161 static void
05162 mimeout_addchar(nkf_char c)
05163 {
05164     switch(mimeout_mode) {
05165     case 'Q':
05166         if (c==CR||c==LF) {
05167             (*o_mputc)(c);
05168             base64_count = 0;
05169         } else if(!nkf_isalnum(c)) {
05170             (*o_mputc)('=');
05171             (*o_mputc)(bin2hex(((c>>4)&0xf)));
05172             (*o_mputc)(bin2hex((c&0xf)));
05173             base64_count += 3;
05174         } else {
05175             (*o_mputc)(c);
05176             base64_count++;
05177         }
05178         break;
05179     case 'B':
05180         nkf_state->mimeout_state=c;
05181         (*o_mputc)(basis_64[c>>2]);
05182         mimeout_mode=2;
05183         base64_count ++;
05184         break;
05185     case 2:
05186         (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
05187         nkf_state->mimeout_state=c;
05188         mimeout_mode=1;
05189         base64_count ++;
05190         break;
05191     case 1:
05192         (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
05193         (*o_mputc)(basis_64[c & 0x3F]);
05194         mimeout_mode='B';
05195         base64_count += 2;
05196         break;
05197     default:
05198         (*o_mputc)(c);
05199         base64_count++;
05200         break;
05201     }
05202 }
05203 
05204 static void
05205 mime_putc(nkf_char c)
05206 {
05207     int i, j;
05208     nkf_char lastchar;
05209 
05210     if (mimeout_f == FIXED_MIME){
05211         if (mimeout_mode == 'Q'){
05212             if (base64_count > 71){
05213                 if (c!=CR && c!=LF) {
05214                     (*o_mputc)('=');
05215                     put_newline(o_mputc);
05216                 }
05217                 base64_count = 0;
05218             }
05219         }else{
05220             if (base64_count > 71){
05221                 eof_mime();
05222                 put_newline(o_mputc);
05223                 base64_count = 0;
05224             }
05225             if (c == EOF) { /* c==EOF */
05226                 eof_mime();
05227             }
05228         }
05229         if (c != EOF) { /* c==EOF */
05230             mimeout_addchar(c);
05231         }
05232         return;
05233     }
05234 
05235     /* mimeout_f != FIXED_MIME */
05236 
05237     if (c == EOF) { /* c==EOF */
05238         if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
05239         j = mimeout_state.count;
05240         mimeout_state.count = 0;
05241         i = 0;
05242         if (mimeout_mode > 0) {
05243             if (!nkf_isblank(mimeout_state.buf[j-1])) {
05244                 for (;i<j;i++) {
05245                     if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
05246                         break;
05247                     }
05248                     mimeout_addchar(mimeout_state.buf[i]);
05249                 }
05250                 eof_mime();
05251                 for (;i<j;i++) {
05252                     mimeout_addchar(mimeout_state.buf[i]);
05253                 }
05254             } else {
05255                 for (;i<j;i++) {
05256                     mimeout_addchar(mimeout_state.buf[i]);
05257                 }
05258                 eof_mime();
05259             }
05260         } else {
05261             for (;i<j;i++) {
05262                 mimeout_addchar(mimeout_state.buf[i]);
05263             }
05264         }
05265         return;
05266     }
05267 
05268     if (mimeout_state.count > 0){
05269         lastchar = mimeout_state.buf[mimeout_state.count - 1];
05270     }else{
05271         lastchar = -1;
05272     }
05273 
05274     if (mimeout_mode=='Q') {
05275         if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
05276             if (c == CR || c == LF) {
05277                 close_mime();
05278                 (*o_mputc)(c);
05279                 base64_count = 0;
05280                 return;
05281             } else if (c <= SP) {
05282                 close_mime();
05283                 if (base64_count > 70) {
05284                     put_newline(o_mputc);
05285                     base64_count = 0;
05286                 }
05287                 if (!nkf_isblank(c)) {
05288                     (*o_mputc)(SP);
05289                     base64_count++;
05290                 }
05291             } else {
05292                 if (base64_count > 70) {
05293                     close_mime();
05294                     put_newline(o_mputc);
05295                     (*o_mputc)(SP);
05296                     base64_count = 1;
05297                     open_mime(output_mode);
05298                 }
05299                 if (!nkf_noescape_mime(c)) {
05300                     mimeout_addchar(c);
05301                     return;
05302                 }
05303             }
05304             if (c != 0x1B) {
05305                 (*o_mputc)(c);
05306                 base64_count++;
05307                 return;
05308             }
05309         }
05310     }
05311 
05312     if (mimeout_mode <= 0) {
05313         if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
05314                     output_mode == UTF_8)) {
05315             if (nkf_isspace(c)) {
05316                 int flag = 0;
05317                 if (mimeout_mode == -1) {
05318                     flag = 1;
05319                 }
05320                 if (c==CR || c==LF) {
05321                     if (flag) {
05322                         open_mime(output_mode);
05323                         output_mode = 0;
05324                     } else {
05325                         base64_count = 0;
05326                     }
05327                 }
05328                 for (i=0;i<mimeout_state.count;i++) {
05329                     (*o_mputc)(mimeout_state.buf[i]);
05330                     if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
05331                         base64_count = 0;
05332                     }else{
05333                         base64_count++;
05334                     }
05335                 }
05336                 if (flag) {
05337                     eof_mime();
05338                     base64_count = 0;
05339                     mimeout_mode = 0;
05340                 }
05341                 mimeout_state.buf[0] = (char)c;
05342                 mimeout_state.count = 1;
05343             }else{
05344                 if (base64_count > 1
05345                     && base64_count + mimeout_state.count > 76
05346                     && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
05347                     static const char *str = "boundary=\"";
05348                     static int len = 10;
05349                     i = 0;
05350 
05351                     for (; i < mimeout_state.count - len; ++i) {
05352                         if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
05353                             i += len - 2;
05354                             break;
05355                         }
05356                     }
05357 
05358                     if (i == 0 || i == mimeout_state.count - len) {
05359                         put_newline(o_mputc);
05360                         base64_count = 0;
05361                         if (!nkf_isspace(mimeout_state.buf[0])){
05362                             (*o_mputc)(SP);
05363                             base64_count++;
05364                         }
05365                     }
05366                     else {
05367                         int j;
05368                         for (j = 0; j <= i; ++j) {
05369                             (*o_mputc)(mimeout_state.buf[j]);
05370                         }
05371                         put_newline(o_mputc);
05372                         base64_count = 1;
05373                         for (; j <= mimeout_state.count; ++j) {
05374                             mimeout_state.buf[j - i] = mimeout_state.buf[j];
05375                         }
05376                         mimeout_state.count -= i;
05377                     }
05378                 }
05379                 mimeout_state.buf[mimeout_state.count++] = (char)c;
05380                 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05381                     open_mime(output_mode);
05382                 }
05383             }
05384             return;
05385         }else{
05386             if (lastchar==CR || lastchar == LF){
05387                 for (i=0;i<mimeout_state.count;i++) {
05388                     (*o_mputc)(mimeout_state.buf[i]);
05389                 }
05390                 base64_count = 0;
05391                 mimeout_state.count = 0;
05392             }
05393             if (lastchar==SP) {
05394                 for (i=0;i<mimeout_state.count-1;i++) {
05395                     (*o_mputc)(mimeout_state.buf[i]);
05396                     base64_count++;
05397                 }
05398                 mimeout_state.buf[0] = SP;
05399                 mimeout_state.count = 1;
05400             }
05401             open_mime(output_mode);
05402         }
05403     }else{
05404         /* mimeout_mode == 'B', 1, 2 */
05405         if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
05406                     output_mode == UTF_8)) {
05407             if (lastchar == CR || lastchar == LF){
05408                 if (nkf_isblank(c)) {
05409                     for (i=0;i<mimeout_state.count;i++) {
05410                         mimeout_addchar(mimeout_state.buf[i]);
05411                     }
05412                     mimeout_state.count = 0;
05413                 } else {
05414                     eof_mime();
05415                     for (i=0;i<mimeout_state.count;i++) {
05416                         (*o_mputc)(mimeout_state.buf[i]);
05417                     }
05418                     base64_count = 0;
05419                     mimeout_state.count = 0;
05420                 }
05421                 mimeout_state.buf[mimeout_state.count++] = (char)c;
05422                 return;
05423             }
05424             if (nkf_isspace(c)) {
05425                 for (i=0;i<mimeout_state.count;i++) {
05426                     if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
05427                         eof_mime();
05428                         for (i=0;i<mimeout_state.count;i++) {
05429                             (*o_mputc)(mimeout_state.buf[i]);
05430                             base64_count++;
05431                         }
05432                         mimeout_state.count = 0;
05433                     }
05434                 }
05435                 mimeout_state.buf[mimeout_state.count++] = (char)c;
05436                 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05437                     eof_mime();
05438                     for (i=0;i<mimeout_state.count;i++) {
05439                         (*o_mputc)(mimeout_state.buf[i]);
05440                         base64_count++;
05441                     }
05442                     mimeout_state.count = 0;
05443                 }
05444                 return;
05445             }
05446             if (mimeout_state.count>0 && SP<c && c!='=') {
05447                 mimeout_state.buf[mimeout_state.count++] = (char)c;
05448                 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
05449                     j = mimeout_state.count;
05450                     mimeout_state.count = 0;
05451                     for (i=0;i<j;i++) {
05452                         mimeout_addchar(mimeout_state.buf[i]);
05453                     }
05454                 }
05455                 return;
05456             }
05457         }
05458     }
05459     if (mimeout_state.count>0) {
05460         j = mimeout_state.count;
05461         mimeout_state.count = 0;
05462         for (i=0;i<j;i++) {
05463             if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
05464                 break;
05465             mimeout_addchar(mimeout_state.buf[i]);
05466         }
05467         if (i<j) {
05468             eof_mime();
05469             base64_count=0;
05470             for (;i<j;i++) {
05471                 (*o_mputc)(mimeout_state.buf[i]);
05472             }
05473             open_mime(output_mode);
05474         }
05475     }
05476     mimeout_addchar(c);
05477 }
05478 
05479 static void
05480 base64_conv(nkf_char c2, nkf_char c1)
05481 {
05482     mime_prechar(c2, c1);
05483     (*o_base64conv)(c2,c1);
05484 }
05485 
05486 #ifdef HAVE_ICONV_H
05487 typedef struct nkf_iconv_t {
05488     iconv_t cd;
05489     char *input_buffer;
05490     size_t input_buffer_size;
05491     char *output_buffer;
05492     size_t output_buffer_size;
05493 }
05494 
05495 static nkf_iconv_t
05496 nkf_iconv_new(char *tocode, char *fromcode)
05497 {
05498     nkf_iconv_t converter;
05499 
05500     converter->input_buffer_size = IOBUF_SIZE;
05501     converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
05502     converter->output_buffer_size = IOBUF_SIZE * 2;
05503     converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
05504     converter->cd = iconv_open(tocode, fromcode);
05505     if (converter->cd == (iconv_t)-1)
05506     {
05507         switch (errno) {
05508         case EINVAL:
05509             perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
05510             return -1;
05511         default:
05512             perror("can't iconv_open");
05513         }
05514     }
05515 }
05516 
05517 static size_t
05518 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
05519 {
05520     size_t invalid = (size_t)0;
05521     char *input_buffer = converter->input_buffer;
05522     size_t input_length = (size_t)0;
05523     char *output_buffer = converter->output_buffer;
05524     size_t output_length = converter->output_buffer_size;
05525     int c;
05526 
05527     do {
05528         if (c != EOF) {
05529             while ((c = (*i_getc)(f)) != EOF) {
05530                 input_buffer[input_length++] = c;
05531                 if (input_length < converter->input_buffer_size) break;
05532             }
05533         }
05534 
05535         size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
05536         while (output_length-- > 0) {
05537             (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
05538         }
05539         if (ret == (size_t) - 1) {
05540             switch (errno) {
05541             case EINVAL:
05542                 if (input_buffer != converter->input_buffer)
05543                     memmove(converter->input_buffer, input_buffer, input_length);
05544                 break;
05545             case E2BIG:
05546                 converter->output_buffer_size *= 2;
05547                 output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
05548                 if (output_buffer == NULL) {
05549                     perror("can't realloc");
05550                     return -1;
05551                 }
05552                 converter->output_buffer = output_buffer;
05553                 break;
05554             default:
05555                 perror("can't iconv");
05556                 return -1;
05557             }
05558         } else {
05559             invalid += ret;
05560         }
05561     } while (1);
05562 
05563     return invalid;
05564 }
05565 
05566 
05567 static void
05568 nkf_iconv_close(nkf_iconv_t *convert)
05569 {
05570     nkf_xfree(converter->inbuf);
05571     nkf_xfree(converter->outbuf);
05572     iconv_close(converter->cd);
05573 }
05574 #endif
05575 
05576 
05577 static void
05578 reinit(void)
05579 {
05580     {
05581         struct input_code *p = input_code_list;
05582         while (p->name){
05583             status_reinit(p++);
05584         }
05585     }
05586     unbuf_f = FALSE;
05587     estab_f = FALSE;
05588     nop_f = FALSE;
05589     binmode_f = TRUE;
05590     rot_f = FALSE;
05591     hira_f = FALSE;
05592     alpha_f = FALSE;
05593     mime_f = MIME_DECODE_DEFAULT;
05594     mime_decode_f = FALSE;
05595     mimebuf_f = FALSE;
05596     broken_f = FALSE;
05597     iso8859_f = FALSE;
05598     mimeout_f = FALSE;
05599     x0201_f = NKF_UNSPECIFIED;
05600     iso2022jp_f = FALSE;
05601 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
05602     ms_ucs_map_f = UCS_MAP_ASCII;
05603 #endif
05604 #ifdef UTF8_INPUT_ENABLE
05605     no_cp932ext_f = FALSE;
05606     no_best_fit_chars_f = FALSE;
05607     encode_fallback = NULL;
05608     unicode_subchar  = '?';
05609     input_endian = ENDIAN_BIG;
05610 #endif
05611 #ifdef UTF8_OUTPUT_ENABLE
05612     output_bom_f = FALSE;
05613     output_endian = ENDIAN_BIG;
05614 #endif
05615 #ifdef UNICODE_NORMALIZATION
05616     nfc_f = FALSE;
05617 #endif
05618 #ifdef INPUT_OPTION
05619     cap_f = FALSE;
05620     url_f = FALSE;
05621     numchar_f = FALSE;
05622 #endif
05623 #ifdef CHECK_OPTION
05624     noout_f = FALSE;
05625     debug_f = FALSE;
05626 #endif
05627     guess_f = 0;
05628 #ifdef EXEC_IO
05629     exec_f = 0;
05630 #endif
05631 #ifdef SHIFTJIS_CP932
05632     cp51932_f = TRUE;
05633     cp932inv_f = TRUE;
05634 #endif
05635 #ifdef X0212_ENABLE
05636     x0212_f = FALSE;
05637     x0213_f = FALSE;
05638 #endif
05639     {
05640         int i;
05641         for (i = 0; i < 256; i++){
05642             prefix_table[i] = 0;
05643         }
05644     }
05645     hold_count = 0;
05646     mimeout_state.count = 0;
05647     mimeout_mode = 0;
05648     base64_count = 0;
05649     f_line = 0;
05650     f_prev = 0;
05651     fold_preserve_f = FALSE;
05652     fold_f = FALSE;
05653     fold_len = 0;
05654     kanji_intro = DEFAULT_J;
05655     ascii_intro = DEFAULT_R;
05656     fold_margin  = FOLD_MARGIN;
05657     o_zconv = no_connection;
05658     o_fconv = no_connection;
05659     o_eol_conv = no_connection;
05660     o_rot_conv = no_connection;
05661     o_hira_conv = no_connection;
05662     o_base64conv = no_connection;
05663     o_iso2022jp_check_conv = no_connection;
05664     o_putc = std_putc;
05665     i_getc = std_getc;
05666     i_ungetc = std_ungetc;
05667     i_bgetc = std_getc;
05668     i_bungetc = std_ungetc;
05669     o_mputc = std_putc;
05670     i_mgetc = std_getc;
05671     i_mungetc  = std_ungetc;
05672     i_mgetc_buf = std_getc;
05673     i_mungetc_buf = std_ungetc;
05674     output_mode = ASCII;
05675     input_mode =  ASCII;
05676     mime_decode_mode = FALSE;
05677     file_out_f = FALSE;
05678     eolmode_f = 0;
05679     input_eol = 0;
05680     prev_cr = 0;
05681     option_mode = 0;
05682     z_prev2=0,z_prev1=0;
05683 #ifdef CHECK_OPTION
05684     iconv_for_check = 0;
05685 #endif
05686     input_codename = NULL;
05687     input_encoding = NULL;
05688     output_encoding = NULL;
05689     nkf_state_init();
05690 #ifdef WIN32DLL
05691     reinitdll();
05692 #endif /*WIN32DLL*/
05693 }
05694 
05695 static int
05696 module_connection(void)
05697 {
05698     if (input_encoding) set_input_encoding(input_encoding);
05699     if (!output_encoding) {
05700         output_encoding = nkf_default_encoding();
05701     }
05702     if (!output_encoding) {
05703         if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
05704         else return -1;
05705     }
05706     set_output_encoding(output_encoding);
05707     oconv = nkf_enc_to_oconv(output_encoding);
05708     o_putc = std_putc;
05709     if (nkf_enc_unicode_p(output_encoding))
05710         output_mode = UTF_8;
05711 
05712         if (x0201_f == NKF_UNSPECIFIED) {
05713                 x0201_f = X0201_DEFAULT;
05714         }
05715 
05716     /* replace continucation module, from output side */
05717 
05718     /* output redicrection */
05719 #ifdef CHECK_OPTION
05720     if (noout_f || guess_f){
05721         o_putc = no_putc;
05722     }
05723 #endif
05724     if (mimeout_f) {
05725         o_mputc = o_putc;
05726         o_putc = mime_putc;
05727         if (mimeout_f == TRUE) {
05728             o_base64conv = oconv; oconv = base64_conv;
05729         }
05730         /* base64_count = 0; */
05731     }
05732 
05733     if (eolmode_f || guess_f) {
05734         o_eol_conv = oconv; oconv = eol_conv;
05735     }
05736     if (rot_f) {
05737         o_rot_conv = oconv; oconv = rot_conv;
05738     }
05739     if (iso2022jp_f) {
05740         o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
05741     }
05742     if (hira_f) {
05743         o_hira_conv = oconv; oconv = hira_conv;
05744     }
05745     if (fold_f) {
05746         o_fconv = oconv; oconv = fold_conv;
05747         f_line = 0;
05748     }
05749     if (alpha_f || x0201_f) {
05750         o_zconv = oconv; oconv = z_conv;
05751     }
05752 
05753     i_getc = std_getc;
05754     i_ungetc = std_ungetc;
05755     /* input redicrection */
05756 #ifdef INPUT_OPTION
05757     if (cap_f){
05758         i_cgetc = i_getc; i_getc = cap_getc;
05759         i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
05760     }
05761     if (url_f){
05762         i_ugetc = i_getc; i_getc = url_getc;
05763         i_uungetc = i_ungetc; i_ungetc= url_ungetc;
05764     }
05765 #endif
05766 #ifdef NUMCHAR_OPTION
05767     if (numchar_f){
05768         i_ngetc = i_getc; i_getc = numchar_getc;
05769         i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
05770     }
05771 #endif
05772 #ifdef UNICODE_NORMALIZATION
05773     if (nfc_f){
05774         i_nfc_getc = i_getc; i_getc = nfc_getc;
05775         i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
05776     }
05777 #endif
05778     if (mime_f && mimebuf_f==FIXED_MIME) {
05779         i_mgetc = i_getc; i_getc = mime_getc;
05780         i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
05781     }
05782     if (broken_f & 1) {
05783         i_bgetc = i_getc; i_getc = broken_getc;
05784         i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
05785     }
05786     if (input_encoding) {
05787         set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
05788     } else {
05789         set_iconv(FALSE, e_iconv);
05790     }
05791 
05792     {
05793         struct input_code *p = input_code_list;
05794         while (p->name){
05795             status_reinit(p++);
05796         }
05797     }
05798     return 0;
05799 }
05800 
05801 /*
05802    Conversion main loop. Code detection only.
05803  */
05804 
05805 #if !defined(PERL_XS) && !defined(WIN32DLL)
05806 static nkf_char
05807 noconvert(FILE *f)
05808 {
05809     nkf_char    c;
05810 
05811     if (nop_f == 2)
05812         module_connection();
05813     while ((c = (*i_getc)(f)) != EOF)
05814         (*o_putc)(c);
05815     (*o_putc)(EOF);
05816     return 1;
05817 }
05818 #endif
05819 
05820 #define NEXT continue        /* no output, get next */
05821 #define SKIP c2=0;continue        /* no output, get next */
05822 #define MORE c2=c1;continue  /* need one more byte */
05823 #define SEND (void)0         /* output c1 and c2, get next */
05824 #define LAST break           /* end of loop, go closing  */
05825 #define set_input_mode(mode) do { \
05826     input_mode = mode; \
05827     shift_mode = 0; \
05828     set_input_codename("ISO-2022-JP"); \
05829     debug("ISO-2022-JP"); \
05830 } while (0)
05831 
05832 static int
05833 kanji_convert(FILE *f)
05834 {
05835     nkf_char c1=0, c2=0, c3=0, c4=0;
05836     int shift_mode = 0; /* 0, 1, 2, 3 */
05837     int g2 = 0;
05838     int is_8bit = FALSE;
05839 
05840     if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
05841         is_8bit = TRUE;
05842     }
05843 
05844     input_mode = ASCII;
05845     output_mode = ASCII;
05846 
05847     if (module_connection() < 0) {
05848 #if !defined(PERL_XS) && !defined(WIN32DLL)
05849         fprintf(stderr, "no output encoding given\n");
05850 #endif
05851         return -1;
05852     }
05853     check_bom(f);
05854 
05855 #ifdef UTF8_INPUT_ENABLE
05856     if(iconv == w_iconv32){
05857         while ((c1 = (*i_getc)(f)) != EOF &&
05858                (c2 = (*i_getc)(f)) != EOF &&
05859                (c3 = (*i_getc)(f)) != EOF &&
05860                (c4 = (*i_getc)(f)) != EOF) {
05861             nkf_char c5, c6, c7, c8;
05862             if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
05863                 if ((c5 = (*i_getc)(f)) != EOF &&
05864                     (c6 = (*i_getc)(f)) != EOF &&
05865                     (c7 = (*i_getc)(f)) != EOF &&
05866                     (c8 = (*i_getc)(f)) != EOF) {
05867                     if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
05868                         (*i_ungetc)(c8, f);
05869                         (*i_ungetc)(c7, f);
05870                         (*i_ungetc)(c6, f);
05871                         (*i_ungetc)(c5, f);
05872                         nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
05873                     }
05874                 } else {
05875                     nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
05876                 }
05877             }
05878         }
05879         goto finished;
05880     }
05881     else if (iconv == w_iconv16) {
05882         while ((c1 = (*i_getc)(f)) != EOF &&
05883                (c2 = (*i_getc)(f)) != EOF) {
05884             size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
05885             if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES &&
05886                 (c3 = (*i_getc)(f)) != EOF &&
05887                 (c4 = (*i_getc)(f)) != EOF) {
05888                 nkf_iconv_utf_16(c1, c2, c3, c4);
05889             } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
05890                 if ((c3 = (*i_getc)(f)) != EOF &&
05891                     (c4 = (*i_getc)(f)) != EOF) {
05892                     if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
05893                         (*i_ungetc)(c4, f);
05894                         (*i_ungetc)(c3, f);
05895                         nkf_iconv_utf_16_nocombine(c1, c2);
05896                     }
05897                 } else {
05898                     nkf_iconv_utf_16_nocombine(c1, c2);
05899                 }
05900             }
05901         }
05902         goto finished;
05903     }
05904 #endif
05905 
05906     while ((c1 = (*i_getc)(f)) != EOF) {
05907 #ifdef INPUT_CODE_FIX
05908         if (!input_encoding)
05909 #endif
05910             code_status(c1);
05911         if (c2) {
05912             /* second byte */
05913             if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
05914                 /* in case of 8th bit is on */
05915                 if (!estab_f&&!mime_decode_mode) {
05916                     /* in case of not established yet */
05917                     /* It is still ambiguious */
05918                     if (h_conv(f, c2, c1)==EOF) {
05919                         LAST;
05920                     }
05921                     else {
05922                         SKIP;
05923                     }
05924                 }
05925                 else {
05926                     /* in case of already established */
05927                     if (c1 < 0x40) {
05928                         /* ignore bogus code */
05929                         SKIP;
05930                     } else {
05931                         SEND;
05932                     }
05933                 }
05934             }
05935             else {
05936                 /* 2nd byte of 7 bit code or SJIS */
05937                 SEND;
05938             }
05939         }
05940         else if (nkf_char_unicode_p(c1)) {
05941             (*oconv)(0, c1);
05942             NEXT;
05943         }
05944         else {
05945             /* first byte */
05946             if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
05947                 /* CP5022x */
05948                 MORE;
05949             }else if (input_codename && input_codename[0] == 'I' &&
05950                     0xA1 <= c1 && c1 <= 0xDF) {
05951                 /* JIS X 0201 Katakana in 8bit JIS */
05952                 c2 = JIS_X_0201_1976_K;
05953                 c1 &= 0x7f;
05954                 SEND;
05955             } else if (c1 > DEL) {
05956                 /* 8 bit code */
05957                 if (!estab_f && !iso8859_f) {
05958                     /* not established yet */
05959                     MORE;
05960                 } else { /* estab_f==TRUE */
05961                     if (iso8859_f) {
05962                         c2 = ISO_8859_1;
05963                         c1 &= 0x7f;
05964                         SEND;
05965                     }
05966                     else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
05967                              (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
05968                         /* JIS X 0201 */
05969                         c2 = JIS_X_0201_1976_K;
05970                         c1 &= 0x7f;
05971                         SEND;
05972                     }
05973                     else {
05974                         /* already established */
05975                         MORE;
05976                     }
05977                 }
05978             } else if (SP < c1 && c1 < DEL) {
05979                 /* in case of Roman characters */
05980                 if (shift_mode) {
05981                     /* output 1 shifted byte */
05982                     if (iso8859_f) {
05983                         c2 = ISO_8859_1;
05984                         SEND;
05985                     } else if (nkf_byte_jisx0201_katakana_p(c1)){
05986                         /* output 1 shifted byte */
05987                         c2 = JIS_X_0201_1976_K;
05988                         SEND;
05989                     } else {
05990                         /* look like bogus code */
05991                         SKIP;
05992                     }
05993                 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
05994                            input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
05995                     /* in case of Kanji shifted */
05996                     MORE;
05997                 } else if (c1 == '=' && mime_f && !mime_decode_mode) {
05998                     /* Check MIME code */
05999                     if ((c1 = (*i_getc)(f)) == EOF) {
06000                         (*oconv)(0, '=');
06001                         LAST;
06002                     } else if (c1 == '?') {
06003                         /* =? is mime conversion start sequence */
06004                         if(mime_f == STRICT_MIME) {
06005                             /* check in real detail */
06006                             if (mime_begin_strict(f) == EOF)
06007                                 LAST;
06008                             SKIP;
06009                         } else if (mime_begin(f) == EOF)
06010                             LAST;
06011                         SKIP;
06012                     } else {
06013                         (*oconv)(0, '=');
06014                         (*i_ungetc)(c1,f);
06015                         SKIP;
06016                     }
06017                 } else {
06018                     /* normal ASCII code */
06019                     SEND;
06020                 }
06021             } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
06022                 shift_mode = 0;
06023                 SKIP;
06024             } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
06025                 shift_mode = 1;
06026                 SKIP;
06027             } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
06028                 if ((c1 = (*i_getc)(f)) == EOF) {
06029                     (*oconv)(0, ESC);
06030                     LAST;
06031                 }
06032                 else if (c1 == '&') {
06033                     /* IRR */
06034                     if ((c1 = (*i_getc)(f)) == EOF) {
06035                         LAST;
06036                     } else {
06037                         SKIP;
06038                     }
06039                 }
06040                 else if (c1 == '$') {
06041                     /* GZDMx */
06042                     if ((c1 = (*i_getc)(f)) == EOF) {
06043                         /* don't send bogus code
06044                            (*oconv)(0, ESC);
06045                            (*oconv)(0, '$'); */
06046                         LAST;
06047                     } else if (c1 == '@' || c1 == 'B') {
06048                         /* JIS X 0208 */
06049                         set_input_mode(JIS_X_0208);
06050                         SKIP;
06051                     } else if (c1 == '(') {
06052                         /* GZDM4 */
06053                         if ((c1 = (*i_getc)(f)) == EOF) {
06054                             /* don't send bogus code
06055                                (*oconv)(0, ESC);
06056                                (*oconv)(0, '$');
06057                                (*oconv)(0, '(');
06058                              */
06059                             LAST;
06060                         } else if (c1 == '@'|| c1 == 'B') {
06061                             /* JIS X 0208 */
06062                             set_input_mode(JIS_X_0208);
06063                             SKIP;
06064 #ifdef X0212_ENABLE
06065                         } else if (c1 == 'D'){
06066                             set_input_mode(JIS_X_0212);
06067                             SKIP;
06068 #endif /* X0212_ENABLE */
06069                         } else if (c1 == 'O' || c1 == 'Q'){
06070                             set_input_mode(JIS_X_0213_1);
06071                             SKIP;
06072                         } else if (c1 == 'P'){
06073                             set_input_mode(JIS_X_0213_2);
06074                             SKIP;
06075                         } else {
06076                             /* could be some special code */
06077                             (*oconv)(0, ESC);
06078                             (*oconv)(0, '$');
06079                             (*oconv)(0, '(');
06080                             (*oconv)(0, c1);
06081                             SKIP;
06082                         }
06083                     } else if (broken_f&0x2) {
06084                         /* accept any ESC-(-x as broken code ... */
06085                         input_mode = JIS_X_0208;
06086                         shift_mode = 0;
06087                         SKIP;
06088                     } else {
06089                         (*oconv)(0, ESC);
06090                         (*oconv)(0, '$');
06091                         (*oconv)(0, c1);
06092                         SKIP;
06093                     }
06094                 } else if (c1 == '(') {
06095                     /* GZD4 */
06096                     if ((c1 = (*i_getc)(f)) == EOF) {
06097                         /* don't send bogus code
06098                            (*oconv)(0, ESC);
06099                            (*oconv)(0, '('); */
06100                         LAST;
06101                     }
06102                     else if (c1 == 'I') {
06103                         /* JIS X 0201 Katakana */
06104                         set_input_mode(JIS_X_0201_1976_K);
06105                         shift_mode = 1;
06106                         SKIP;
06107                     }
06108                     else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
06109                         /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
06110                         set_input_mode(ASCII);
06111                         SKIP;
06112                     }
06113                     else if (broken_f&0x2) {
06114                         set_input_mode(ASCII);
06115                         SKIP;
06116                     }
06117                     else {
06118                         (*oconv)(0, ESC);
06119                         (*oconv)(0, '(');
06120                         SEND;
06121                     }
06122                 }
06123                 else if (c1 == '.') {
06124                     /* G2D6 */
06125                     if ((c1 = (*i_getc)(f)) == EOF) {
06126                         LAST;
06127                     }
06128                     else if (c1 == 'A') {
06129                         /* ISO-8859-1 */
06130                         g2 = ISO_8859_1;
06131                         SKIP;
06132                     }
06133                     else {
06134                         (*oconv)(0, ESC);
06135                         (*oconv)(0, '.');
06136                         SEND;
06137                     }
06138                 }
06139                 else if (c1 == 'N') {
06140                     /* SS2 */
06141                     c1 = (*i_getc)(f);
06142                     if (g2 == ISO_8859_1) {
06143                         c2 = ISO_8859_1;
06144                         SEND;
06145                     }else{
06146                         (*i_ungetc)(c1, f);
06147                         /* lonely ESC  */
06148                         (*oconv)(0, ESC);
06149                         SEND;
06150                     }
06151                 }
06152                 else {
06153                     /* lonely ESC  */
06154                     (*oconv)(0, ESC);
06155                     SEND;
06156                 }
06157             } else if (c1 == ESC && iconv == s_iconv) {
06158                 /* ESC in Shift_JIS */
06159                 if ((c1 = (*i_getc)(f)) == EOF) {
06160                     (*oconv)(0, ESC);
06161                     LAST;
06162                 } else if (c1 == '$') {
06163                     /* J-PHONE emoji */
06164                     if ((c1 = (*i_getc)(f)) == EOF) {
06165                         LAST;
06166                     } else if (('E' <= c1 && c1 <= 'G') ||
06167                                ('O' <= c1 && c1 <= 'Q')) {
06168                         /*
06169                            NUM : 0 1 2 3 4 5
06170                            BYTE: G E F O P Q
06171                            C%7 : 1 6 0 2 3 4
06172                            C%7 : 0 1 2 3 4 5 6
06173                            NUM : 2 0 3 4 5 X 1
06174                          */
06175                         static const nkf_char jphone_emoji_first_table[7] =
06176                         {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
06177                         c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
06178                         if ((c1 = (*i_getc)(f)) == EOF) LAST;
06179                         while (SP <= c1 && c1 <= 'z') {
06180                             (*oconv)(0, c1 + c3);
06181                             if ((c1 = (*i_getc)(f)) == EOF) LAST;
06182                         }
06183                         SKIP;
06184                     }
06185                     else {
06186                         (*oconv)(0, ESC);
06187                         (*oconv)(0, '$');
06188                         SEND;
06189                     }
06190                 }
06191                 else {
06192                     /* lonely ESC  */
06193                     (*oconv)(0, ESC);
06194                     SEND;
06195                 }
06196             } else if (c1 == LF || c1 == CR) {
06197                 if (broken_f&4) {
06198                     input_mode = ASCII; set_iconv(FALSE, 0);
06199                     SEND;
06200                 } else if (mime_decode_f && !mime_decode_mode){
06201                     if (c1 == LF) {
06202                         if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
06203                             i_ungetc(SP,f);
06204                             continue;
06205                         } else {
06206                             i_ungetc(c1,f);
06207                         }
06208                         c1 = LF;
06209                         SEND;
06210                     } else  { /* if (c1 == CR)*/
06211                         if ((c1=(*i_getc)(f))!=EOF) {
06212                             if (c1==SP) {
06213                                 i_ungetc(SP,f);
06214                                 continue;
06215                             } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
06216                                 i_ungetc(SP,f);
06217                                 continue;
06218                             } else {
06219                                 i_ungetc(c1,f);
06220                             }
06221                             i_ungetc(LF,f);
06222                         } else {
06223                             i_ungetc(c1,f);
06224                         }
06225                         c1 = CR;
06226                         SEND;
06227                     }
06228                 }
06229             } else
06230                 SEND;
06231         }
06232         /* send: */
06233         switch(input_mode){
06234         case ASCII:
06235             switch ((*iconv)(c2, c1, 0)) {  /* can be EUC / SJIS / UTF-8 */
06236             case -2:
06237                 /* 4 bytes UTF-8 */
06238                 if ((c3 = (*i_getc)(f)) != EOF) {
06239                     code_status(c3);
06240                     c3 <<= 8;
06241                     if ((c4 = (*i_getc)(f)) != EOF) {
06242                         code_status(c4);
06243                         (*iconv)(c2, c1, c3|c4);
06244                     }
06245                 }
06246                 break;
06247             case -3:
06248                 /* 4 bytes UTF-8 (check combining character) */
06249                 if ((c3 = (*i_getc)(f)) != EOF) {
06250                     if ((c4 = (*i_getc)(f)) != EOF) {
06251                         if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
06252                             (*i_ungetc)(c4, f);
06253                             (*i_ungetc)(c3, f);
06254                             w_iconv_nocombine(c2, c1, 0);
06255                         }
06256                     } else {
06257                         (*i_ungetc)(c3, f);
06258                         w_iconv_nocombine(c2, c1, 0);
06259                     }
06260                 } else {
06261                     w_iconv_nocombine(c2, c1, 0);
06262                 }
06263                 break;
06264             case -1:
06265                 /* 3 bytes EUC or UTF-8 */
06266                 if ((c3 = (*i_getc)(f)) != EOF) {
06267                     code_status(c3);
06268                     if ((*iconv)(c2, c1, c3) == -3) {
06269                         /* 6 bytes UTF-8 (check combining character) */
06270                         nkf_char c5, c6;
06271                         if ((c4 = (*i_getc)(f)) != EOF) {
06272                             if ((c5 = (*i_getc)(f)) != EOF) {
06273                                 if ((c6 = (*i_getc)(f)) != EOF) {
06274                                     if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
06275                                         (*i_ungetc)(c6, f);
06276                                         (*i_ungetc)(c5, f);
06277                                         (*i_ungetc)(c4, f);
06278                                         w_iconv_nocombine(c2, c1, c3);
06279                                     }
06280                                 } else {
06281                                     (*i_ungetc)(c5, f);
06282                                     (*i_ungetc)(c4, f);
06283                                     w_iconv_nocombine(c2, c1, c3);
06284                                 }
06285                             } else {
06286                                 (*i_ungetc)(c4, f);
06287                                 w_iconv_nocombine(c2, c1, c3);
06288                             }
06289                         } else {
06290                             w_iconv_nocombine(c2, c1, c3);
06291                         }
06292                     }
06293                 }
06294                 break;
06295             }
06296             break;
06297         case JIS_X_0208:
06298         case JIS_X_0213_1:
06299             if (ms_ucs_map_f &&
06300                 0x7F <= c2 && c2 <= 0x92 &&
06301                 0x21 <= c1 && c1 <= 0x7E) {
06302                 /* CP932 UDC */
06303                 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
06304                 c2 = 0;
06305             }
06306             (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
06307             break;
06308 #ifdef X0212_ENABLE
06309         case JIS_X_0212:
06310             (*oconv)(PREFIX_EUCG3 | c2, c1);
06311             break;
06312 #endif /* X0212_ENABLE */
06313         case JIS_X_0213_2:
06314             (*oconv)(PREFIX_EUCG3 | c2, c1);
06315             break;
06316         default:
06317             (*oconv)(input_mode, c1);  /* other special case */
06318         }
06319 
06320         c2 = 0;
06321         c3 = 0;
06322         continue;
06323         /* goto next_word */
06324     }
06325 
06326 finished:
06327     /* epilogue */
06328     (*iconv)(EOF, 0, 0);
06329     if (!input_codename)
06330     {
06331         if (is_8bit) {
06332             struct input_code *p = input_code_list;
06333             struct input_code *result = p;
06334             while (p->name){
06335                 if (p->score < result->score) result = p;
06336                 ++p;
06337             }
06338             set_input_codename(result->name);
06339 #ifdef CHECK_OPTION
06340             debug(result->name);
06341 #endif
06342         }
06343     }
06344     return 0;
06345 }
06346 
06347 /*
06348  * int options(unsigned char *cp)
06349  *
06350  * return values:
06351  *    0: success
06352  *   -1: ArgumentError
06353  */
06354 static int
06355 options(unsigned char *cp)
06356 {
06357     nkf_char i, j;
06358     unsigned char *p;
06359     unsigned char *cp_back = NULL;
06360     nkf_encoding *enc;
06361 
06362     if (option_mode==1)
06363         return 0;
06364     while(*cp && *cp++!='-');
06365     while (*cp || cp_back) {
06366         if(!*cp){
06367             cp = cp_back;
06368             cp_back = NULL;
06369             continue;
06370         }
06371         p = 0;
06372         switch (*cp++) {
06373         case '-':  /* literal options */
06374             if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
06375                 option_mode = 1;
06376                 return 0;
06377             }
06378             for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
06379                 p = (unsigned char *)long_option[i].name;
06380                 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
06381                 if (*p == cp[j] || cp[j] == SP){
06382                     p = &cp[j] + 1;
06383                     break;
06384                 }
06385                 p = 0;
06386             }
06387             if (p == 0) {
06388 #if !defined(PERL_XS) && !defined(WIN32DLL)
06389                 fprintf(stderr, "unknown long option: --%s\n", cp);
06390 #endif
06391                 return -1;
06392             }
06393             while(*cp && *cp != SP && cp++);
06394             if (long_option[i].alias[0]){
06395                 cp_back = cp;
06396                 cp = (unsigned char *)long_option[i].alias;
06397             }else{
06398 #ifndef PERL_XS
06399                 if (strcmp(long_option[i].name, "help") == 0){
06400                     usage();
06401                     exit(EXIT_SUCCESS);
06402                 }
06403 #endif
06404                 if (strcmp(long_option[i].name, "ic=") == 0){
06405                     enc = nkf_enc_find((char *)p);
06406                     if (!enc) continue;
06407                     input_encoding = enc;
06408                     continue;
06409                 }
06410                 if (strcmp(long_option[i].name, "oc=") == 0){
06411                     enc = nkf_enc_find((char *)p);
06412                     /* if (enc <= 0) continue; */
06413                     if (!enc) continue;
06414                     output_encoding = enc;
06415                     continue;
06416                 }
06417                 if (strcmp(long_option[i].name, "guess=") == 0){
06418                     if (p[0] == '0' || p[0] == '1') {
06419                         guess_f = 1;
06420                     } else {
06421                         guess_f = 2;
06422                     }
06423                     continue;
06424                 }
06425 #ifdef OVERWRITE
06426                 if (strcmp(long_option[i].name, "overwrite") == 0){
06427                     file_out_f = TRUE;
06428                     overwrite_f = TRUE;
06429                     preserve_time_f = TRUE;
06430                     continue;
06431                 }
06432                 if (strcmp(long_option[i].name, "overwrite=") == 0){
06433                     file_out_f = TRUE;
06434                     overwrite_f = TRUE;
06435                     preserve_time_f = TRUE;
06436                     backup_f = TRUE;
06437                     backup_suffix = (char *)p;
06438                     continue;
06439                 }
06440                 if (strcmp(long_option[i].name, "in-place") == 0){
06441                     file_out_f = TRUE;
06442                     overwrite_f = TRUE;
06443                     preserve_time_f = FALSE;
06444                     continue;
06445                 }
06446                 if (strcmp(long_option[i].name, "in-place=") == 0){
06447                     file_out_f = TRUE;
06448                     overwrite_f = TRUE;
06449                     preserve_time_f = FALSE;
06450                     backup_f = TRUE;
06451                     backup_suffix = (char *)p;
06452                     continue;
06453                 }
06454 #endif
06455 #ifdef INPUT_OPTION
06456                 if (strcmp(long_option[i].name, "cap-input") == 0){
06457                     cap_f = TRUE;
06458                     continue;
06459                 }
06460                 if (strcmp(long_option[i].name, "url-input") == 0){
06461                     url_f = TRUE;
06462                     continue;
06463                 }
06464 #endif
06465 #ifdef NUMCHAR_OPTION
06466                 if (strcmp(long_option[i].name, "numchar-input") == 0){
06467                     numchar_f = TRUE;
06468                     continue;
06469                 }
06470 #endif
06471 #ifdef CHECK_OPTION
06472                 if (strcmp(long_option[i].name, "no-output") == 0){
06473                     noout_f = TRUE;
06474                     continue;
06475                 }
06476                 if (strcmp(long_option[i].name, "debug") == 0){
06477                     debug_f = TRUE;
06478                     continue;
06479                 }
06480 #endif
06481                 if (strcmp(long_option[i].name, "cp932") == 0){
06482 #ifdef SHIFTJIS_CP932
06483                     cp51932_f = TRUE;
06484                     cp932inv_f = -TRUE;
06485 #endif
06486 #ifdef UTF8_OUTPUT_ENABLE
06487                     ms_ucs_map_f = UCS_MAP_CP932;
06488 #endif
06489                     continue;
06490                 }
06491                 if (strcmp(long_option[i].name, "no-cp932") == 0){
06492 #ifdef SHIFTJIS_CP932
06493                     cp51932_f = FALSE;
06494                     cp932inv_f = FALSE;
06495 #endif
06496 #ifdef UTF8_OUTPUT_ENABLE
06497                     ms_ucs_map_f = UCS_MAP_ASCII;
06498 #endif
06499                     continue;
06500                 }
06501 #ifdef SHIFTJIS_CP932
06502                 if (strcmp(long_option[i].name, "cp932inv") == 0){
06503                     cp932inv_f = -TRUE;
06504                     continue;
06505                 }
06506 #endif
06507 
06508 #ifdef X0212_ENABLE
06509                 if (strcmp(long_option[i].name, "x0212") == 0){
06510                     x0212_f = TRUE;
06511                     continue;
06512                 }
06513 #endif
06514 
06515 #ifdef EXEC_IO
06516                 if (strcmp(long_option[i].name, "exec-in") == 0){
06517                     exec_f = 1;
06518                     return 0;
06519                 }
06520                 if (strcmp(long_option[i].name, "exec-out") == 0){
06521                     exec_f = -1;
06522                     return 0;
06523                 }
06524 #endif
06525 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
06526                 if (strcmp(long_option[i].name, "no-cp932ext") == 0){
06527                     no_cp932ext_f = TRUE;
06528                     continue;
06529                 }
06530                 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
06531                     no_best_fit_chars_f = TRUE;
06532                     continue;
06533                 }
06534                 if (strcmp(long_option[i].name, "fb-skip") == 0){
06535                     encode_fallback = NULL;
06536                     continue;
06537                 }
06538                 if (strcmp(long_option[i].name, "fb-html") == 0){
06539                     encode_fallback = encode_fallback_html;
06540                     continue;
06541                 }
06542                 if (strcmp(long_option[i].name, "fb-xml") == 0){
06543                     encode_fallback = encode_fallback_xml;
06544                     continue;
06545                 }
06546                 if (strcmp(long_option[i].name, "fb-java") == 0){
06547                     encode_fallback = encode_fallback_java;
06548                     continue;
06549                 }
06550                 if (strcmp(long_option[i].name, "fb-perl") == 0){
06551                     encode_fallback = encode_fallback_perl;
06552                     continue;
06553                 }
06554                 if (strcmp(long_option[i].name, "fb-subchar") == 0){
06555                     encode_fallback = encode_fallback_subchar;
06556                     continue;
06557                 }
06558                 if (strcmp(long_option[i].name, "fb-subchar=") == 0){
06559                     encode_fallback = encode_fallback_subchar;
06560                     unicode_subchar = 0;
06561                     if (p[0] != '0'){
06562                         /* decimal number */
06563                         for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
06564                             unicode_subchar *= 10;
06565                             unicode_subchar += hex2bin(p[i]);
06566                         }
06567                     }else if(p[1] == 'x' || p[1] == 'X'){
06568                         /* hexadecimal number */
06569                         for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
06570                             unicode_subchar <<= 4;
06571                             unicode_subchar |= hex2bin(p[i]);
06572                         }
06573                     }else{
06574                         /* octal number */
06575                         for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
06576                             unicode_subchar *= 8;
06577                             unicode_subchar += hex2bin(p[i]);
06578                         }
06579                     }
06580                     w16e_conv(unicode_subchar, &i, &j);
06581                     unicode_subchar = i<<8 | j;
06582                     continue;
06583                 }
06584 #endif
06585 #ifdef UTF8_OUTPUT_ENABLE
06586                 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
06587                     ms_ucs_map_f = UCS_MAP_MS;
06588                     continue;
06589                 }
06590 #endif
06591 #ifdef UNICODE_NORMALIZATION
06592                 if (strcmp(long_option[i].name, "utf8mac-input") == 0){
06593                     nfc_f = TRUE;
06594                     continue;
06595                 }
06596 #endif
06597                 if (strcmp(long_option[i].name, "prefix=") == 0){
06598                     if (nkf_isgraph(p[0])){
06599                         for (i = 1; nkf_isgraph(p[i]); i++){
06600                             prefix_table[p[i]] = p[0];
06601                         }
06602                     }
06603                     continue;
06604                 }
06605 #if !defined(PERL_XS) && !defined(WIN32DLL)
06606                 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
06607 #endif
06608                 return -1;
06609             }
06610             continue;
06611         case 'b':           /* buffered mode */
06612             unbuf_f = FALSE;
06613             continue;
06614         case 'u':           /* non bufferd mode */
06615             unbuf_f = TRUE;
06616             continue;
06617         case 't':           /* transparent mode */
06618             if (*cp=='1') {
06619                 /* alias of -t */
06620                 cp++;
06621                 nop_f = TRUE;
06622             } else if (*cp=='2') {
06623                 /*
06624                  * -t with put/get
06625                  *
06626                  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
06627                  *
06628                  */
06629                 cp++;
06630                 nop_f = 2;
06631             } else
06632                 nop_f = TRUE;
06633             continue;
06634         case 'j':           /* JIS output */
06635         case 'n':
06636             output_encoding = nkf_enc_from_index(ISO_2022_JP);
06637             continue;
06638         case 'e':           /* AT&T EUC output */
06639             output_encoding = nkf_enc_from_index(EUCJP_NKF);
06640             continue;
06641         case 's':           /* SJIS output */
06642             output_encoding = nkf_enc_from_index(SHIFT_JIS);
06643             continue;
06644         case 'l':           /* ISO8859 Latin-1 support, no conversion */
06645             iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
06646             input_encoding = nkf_enc_from_index(ISO_8859_1);
06647             continue;
06648         case 'i':           /* Kanji IN ESC-$-@/B */
06649             if (*cp=='@'||*cp=='B')
06650                 kanji_intro = *cp++;
06651             continue;
06652         case 'o':           /* ASCII IN ESC-(-J/B/H */
06653             /* ESC ( H was used in initial JUNET messages */
06654             if (*cp=='J'||*cp=='B'||*cp=='H')
06655                 ascii_intro = *cp++;
06656             continue;
06657         case 'h':
06658             /*
06659                bit:1   katakana->hiragana
06660                bit:2   hiragana->katakana
06661              */
06662             if ('9'>= *cp && *cp>='0')
06663                 hira_f |= (*cp++ -'0');
06664             else
06665                 hira_f |= 1;
06666             continue;
06667         case 'r':
06668             rot_f = TRUE;
06669             continue;
06670 #if defined(MSDOS) || defined(__OS2__)
06671         case 'T':
06672             binmode_f = FALSE;
06673             continue;
06674 #endif
06675 #ifndef PERL_XS
06676         case 'V':
06677             show_configuration();
06678             exit(EXIT_SUCCESS);
06679             break;
06680         case 'v':
06681             version();
06682             exit(EXIT_SUCCESS);
06683             break;
06684 #endif
06685 #ifdef UTF8_OUTPUT_ENABLE
06686         case 'w':           /* UTF-{8,16,32} output */
06687             if (cp[0] == '8') {
06688                 cp++;
06689                 if (cp[0] == '0'){
06690                     cp++;
06691                     output_encoding = nkf_enc_from_index(UTF_8N);
06692                 } else {
06693                     output_bom_f = TRUE;
06694                     output_encoding = nkf_enc_from_index(UTF_8_BOM);
06695                 }
06696             } else {
06697                 int enc_idx;
06698                 if ('1'== cp[0] && '6'==cp[1]) {
06699                     cp += 2;
06700                     enc_idx = UTF_16;
06701                 } else if ('3'== cp[0] && '2'==cp[1]) {
06702                     cp += 2;
06703                     enc_idx = UTF_32;
06704                 } else {
06705                     output_encoding = nkf_enc_from_index(UTF_8);
06706                     continue;
06707                 }
06708                 if (cp[0]=='L') {
06709                     cp++;
06710                     output_endian = ENDIAN_LITTLE;
06711                     output_bom_f = TRUE;
06712                 } else if (cp[0] == 'B') {
06713                     cp++;
06714                     output_bom_f = TRUE;
06715                 }
06716                 if (cp[0] == '0'){
06717                     output_bom_f = FALSE;
06718                     cp++;
06719                     enc_idx = enc_idx == UTF_16
06720                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
06721                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
06722                 } else {
06723                     enc_idx = enc_idx == UTF_16
06724                         ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
06725                         : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
06726                 }
06727                 output_encoding = nkf_enc_from_index(enc_idx);
06728             }
06729             continue;
06730 #endif
06731 #ifdef UTF8_INPUT_ENABLE
06732         case 'W':           /* UTF input */
06733             if (cp[0] == '8') {
06734                 cp++;
06735                 input_encoding = nkf_enc_from_index(UTF_8);
06736             }else{
06737                 int enc_idx;
06738                 if ('1'== cp[0] && '6'==cp[1]) {
06739                     cp += 2;
06740                     input_endian = ENDIAN_BIG;
06741                     enc_idx = UTF_16;
06742                 } else if ('3'== cp[0] && '2'==cp[1]) {
06743                     cp += 2;
06744                     input_endian = ENDIAN_BIG;
06745                     enc_idx = UTF_32;
06746                 } else {
06747                     input_encoding = nkf_enc_from_index(UTF_8);
06748                     continue;
06749                 }
06750                 if (cp[0]=='L') {
06751                     cp++;
06752                     input_endian = ENDIAN_LITTLE;
06753                 } else if (cp[0] == 'B') {
06754                     cp++;
06755                     input_endian = ENDIAN_BIG;
06756                 }
06757                 enc_idx = (enc_idx == UTF_16
06758                     ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
06759                     : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
06760                 input_encoding = nkf_enc_from_index(enc_idx);
06761             }
06762             continue;
06763 #endif
06764             /* Input code assumption */
06765         case 'J':   /* ISO-2022-JP input */
06766             input_encoding = nkf_enc_from_index(ISO_2022_JP);
06767             continue;
06768         case 'E':   /* EUC-JP input */
06769             input_encoding = nkf_enc_from_index(EUCJP_NKF);
06770             continue;
06771         case 'S':   /* Shift_JIS input */
06772             input_encoding = nkf_enc_from_index(SHIFT_JIS);
06773             continue;
06774         case 'Z':   /* Convert X0208 alphabet to asii */
06775             /* alpha_f
06776                bit:0   Convert JIS X 0208 Alphabet to ASCII
06777                bit:1   Convert Kankaku to one space
06778                bit:2   Convert Kankaku to two spaces
06779                bit:3   Convert HTML Entity
06780                bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
06781              */
06782             while ('0'<= *cp && *cp <='4') {
06783                 alpha_f |= 1 << (*cp++ - '0');
06784             }
06785             alpha_f |= 1;
06786             continue;
06787         case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
06788             x0201_f = FALSE;    /* No X0201->X0208 conversion */
06789             /* accept  X0201
06790                ESC-(-I     in JIS, EUC, MS Kanji
06791                SI/SO       in JIS, EUC, MS Kanji
06792                SS2         in EUC, JIS, not in MS Kanji
06793                MS Kanji (0xa0-0xdf)
06794                output  X0201
06795                ESC-(-I     in JIS (0x20-0x5f)
06796                SS2         in EUC (0xa0-0xdf)
06797                0xa0-0xd    in MS Kanji (0xa0-0xdf)
06798              */
06799             continue;
06800         case 'X':   /* Convert X0201 kana to X0208 */
06801             x0201_f = TRUE;
06802             continue;
06803         case 'F':   /* prserve new lines */
06804             fold_preserve_f = TRUE;
06805         case 'f':   /* folding -f60 or -f */
06806             fold_f = TRUE;
06807             fold_len = 0;
06808             while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
06809                 fold_len *= 10;
06810                 fold_len += *cp++ - '0';
06811             }
06812             if (!(0<fold_len && fold_len<BUFSIZ))
06813                 fold_len = DEFAULT_FOLD;
06814             if (*cp=='-') {
06815                 fold_margin = 0;
06816                 cp++;
06817                 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
06818                     fold_margin *= 10;
06819                     fold_margin += *cp++ - '0';
06820                 }
06821             }
06822             continue;
06823         case 'm':   /* MIME support */
06824             /* mime_decode_f = TRUE; */ /* this has too large side effects... */
06825             if (*cp=='B'||*cp=='Q') {
06826                 mime_decode_mode = *cp++;
06827                 mimebuf_f = FIXED_MIME;
06828             } else if (*cp=='N') {
06829                 mime_f = TRUE; cp++;
06830             } else if (*cp=='S') {
06831                 mime_f = STRICT_MIME; cp++;
06832             } else if (*cp=='0') {
06833                 mime_decode_f = FALSE;
06834                 mime_f = FALSE; cp++;
06835             } else {
06836                 mime_f = STRICT_MIME;
06837             }
06838             continue;
06839         case 'M':   /* MIME output */
06840             if (*cp=='B') {
06841                 mimeout_mode = 'B';
06842                 mimeout_f = FIXED_MIME; cp++;
06843             } else if (*cp=='Q') {
06844                 mimeout_mode = 'Q';
06845                 mimeout_f = FIXED_MIME; cp++;
06846             } else {
06847                 mimeout_f = TRUE;
06848             }
06849             continue;
06850         case 'B':   /* Broken JIS support */
06851             /*  bit:0   no ESC JIS
06852                bit:1   allow any x on ESC-(-x or ESC-$-x
06853                bit:2   reset to ascii on NL
06854              */
06855             if ('9'>= *cp && *cp>='0')
06856                 broken_f |= 1<<(*cp++ -'0');
06857             else
06858                 broken_f |= TRUE;
06859             continue;
06860 #ifndef PERL_XS
06861         case 'O':/* for Output file */
06862             file_out_f = TRUE;
06863             continue;
06864 #endif
06865         case 'c':/* add cr code */
06866             eolmode_f = CRLF;
06867             continue;
06868         case 'd':/* delete cr code */
06869             eolmode_f = LF;
06870             continue;
06871         case 'I':   /* ISO-2022-JP output */
06872             iso2022jp_f = TRUE;
06873             continue;
06874         case 'L':  /* line mode */
06875             if (*cp=='u') {         /* unix */
06876                 eolmode_f = LF; cp++;
06877             } else if (*cp=='m') { /* mac */
06878                 eolmode_f = CR; cp++;
06879             } else if (*cp=='w') { /* windows */
06880                 eolmode_f = CRLF; cp++;
06881             } else if (*cp=='0') { /* no conversion  */
06882                 eolmode_f = 0; cp++;
06883             }
06884             continue;
06885 #ifndef PERL_XS
06886         case 'g':
06887             if ('2' <= *cp && *cp <= '9') {
06888                 guess_f = 2;
06889                 cp++;
06890             } else if (*cp == '0' || *cp == '1') {
06891                 guess_f = 1;
06892                 cp++;
06893             } else {
06894                 guess_f = 1;
06895             }
06896             continue;
06897 #endif
06898         case SP:
06899             /* module muliple options in a string are allowed for Perl moudle  */
06900             while(*cp && *cp++!='-');
06901             continue;
06902         default:
06903 #if !defined(PERL_XS) && !defined(WIN32DLL)
06904             fprintf(stderr, "unknown option: -%c\n", *(cp-1));
06905 #endif
06906             /* bogus option but ignored */
06907             return -1;
06908         }
06909     }
06910     return 0;
06911 }
06912 
06913 #ifdef WIN32DLL
06914 #include "nkf32dll.c"
06915 #elif defined(PERL_XS)
06916 #else /* WIN32DLL */
06917 int
06918 main(int argc, char **argv)
06919 {
06920     FILE  *fin;
06921     unsigned char  *cp;
06922 
06923     char *outfname = NULL;
06924     char *origfname;
06925 
06926 #ifdef EASYWIN /*Easy Win */
06927     _BufferSize.y = 400;/*Set Scroll Buffer Size*/
06928 #endif
06929 #ifdef DEFAULT_CODE_LOCALE
06930     setlocale(LC_CTYPE, "");
06931 #endif
06932     nkf_state_init();
06933 
06934     for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
06935         cp = (unsigned char *)*argv;
06936         options(cp);
06937 #ifdef EXEC_IO
06938         if (exec_f){
06939             int fds[2], pid;
06940             if (pipe(fds) < 0 || (pid = fork()) < 0){
06941                 abort();
06942             }
06943             if (pid == 0){
06944                 if (exec_f > 0){
06945                     close(fds[0]);
06946                     dup2(fds[1], 1);
06947                 }else{
06948                     close(fds[1]);
06949                     dup2(fds[0], 0);
06950                 }
06951                 execvp(argv[1], &argv[1]);
06952             }
06953             if (exec_f > 0){
06954                 close(fds[1]);
06955                 dup2(fds[0], 0);
06956             }else{
06957                 close(fds[0]);
06958                 dup2(fds[1], 1);
06959             }
06960             argc = 0;
06961             break;
06962         }
06963 #endif
06964     }
06965 
06966     if (guess_f) {
06967 #ifdef CHECK_OPTION
06968         int debug_f_back = debug_f;
06969 #endif
06970 #ifdef EXEC_IO
06971         int exec_f_back = exec_f;
06972 #endif
06973 #ifdef X0212_ENABLE
06974         int x0212_f_back = x0212_f;
06975 #endif
06976         int x0213_f_back = x0213_f;
06977         int guess_f_back = guess_f;
06978         reinit();
06979         guess_f = guess_f_back;
06980         mime_f = FALSE;
06981 #ifdef CHECK_OPTION
06982         debug_f = debug_f_back;
06983 #endif
06984 #ifdef EXEC_IO
06985         exec_f = exec_f_back;
06986 #endif
06987         x0212_f = x0212_f_back;
06988         x0213_f = x0213_f_back;
06989     }
06990 
06991     if (binmode_f == TRUE)
06992 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
06993         if (freopen("","wb",stdout) == NULL)
06994             return (-1);
06995 #else
06996     setbinmode(stdout);
06997 #endif
06998 
06999     if (unbuf_f)
07000         setbuf(stdout, (char *) NULL);
07001     else
07002         setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
07003 
07004     if (argc == 0) {
07005         if (binmode_f == TRUE)
07006 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
07007             if (freopen("","rb",stdin) == NULL) return (-1);
07008 #else
07009         setbinmode(stdin);
07010 #endif
07011         setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
07012         if (nop_f)
07013             noconvert(stdin);
07014         else {
07015             kanji_convert(stdin);
07016             if (guess_f) print_guessed_code(NULL);
07017         }
07018     } else {
07019         int nfiles = argc;
07020         int is_argument_error = FALSE;
07021         while (argc--) {
07022             input_codename = NULL;
07023             input_eol = 0;
07024 #ifdef CHECK_OPTION
07025             iconv_for_check = 0;
07026 #endif
07027             if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
07028                 perror(*(argv-1));
07029                 is_argument_error = TRUE;
07030                 continue;
07031             } else {
07032 #ifdef OVERWRITE
07033                 int fd = 0;
07034                 int fd_backup = 0;
07035 #endif
07036 
07037                 /* reopen file for stdout */
07038                 if (file_out_f == TRUE) {
07039 #ifdef OVERWRITE
07040                     if (overwrite_f){
07041                         outfname = nkf_xmalloc(strlen(origfname)
07042                                           + strlen(".nkftmpXXXXXX")
07043                                           + 1);
07044                         strcpy(outfname, origfname);
07045 #ifdef MSDOS
07046                         {
07047                             int i;
07048                             for (i = strlen(outfname); i; --i){
07049                                 if (outfname[i - 1] == '/'
07050                                     || outfname[i - 1] == '\\'){
07051                                     break;
07052                                 }
07053                             }
07054                             outfname[i] = '\0';
07055                         }
07056                         strcat(outfname, "ntXXXXXX");
07057                         mktemp(outfname);
07058                         fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
07059                                   S_IREAD | S_IWRITE);
07060 #else
07061                         strcat(outfname, ".nkftmpXXXXXX");
07062                         fd = mkstemp(outfname);
07063 #endif
07064                         if (fd < 0
07065                             || (fd_backup = dup(fileno(stdout))) < 0
07066                             || dup2(fd, fileno(stdout)) < 0
07067                            ){
07068                             perror(origfname);
07069                             return -1;
07070                         }
07071                     }else
07072 #endif
07073                     if(argc == 1) {
07074                         outfname = *argv++;
07075                         argc--;
07076                     } else {
07077                         outfname = "nkf.out";
07078                     }
07079 
07080                     if(freopen(outfname, "w", stdout) == NULL) {
07081                         perror (outfname);
07082                         return (-1);
07083                     }
07084                     if (binmode_f == TRUE) {
07085 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
07086                         if (freopen("","wb",stdout) == NULL)
07087                             return (-1);
07088 #else
07089                         setbinmode(stdout);
07090 #endif
07091                     }
07092                 }
07093                 if (binmode_f == TRUE)
07094 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
07095                     if (freopen("","rb",fin) == NULL)
07096                         return (-1);
07097 #else
07098                 setbinmode(fin);
07099 #endif
07100                 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
07101                 if (nop_f)
07102                     noconvert(fin);
07103                 else {
07104                     char *filename = NULL;
07105                     kanji_convert(fin);
07106                     if (nfiles > 1) filename = origfname;
07107                     if (guess_f) print_guessed_code(filename);
07108                 }
07109                 fclose(fin);
07110 #ifdef OVERWRITE
07111                 if (overwrite_f) {
07112                     struct stat     sb;
07113 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
07114                     time_t tb[2];
07115 #else
07116                     struct utimbuf  tb;
07117 #endif
07118 
07119                     fflush(stdout);
07120                     close(fd);
07121                     if (dup2(fd_backup, fileno(stdout)) < 0){
07122                         perror("dup2");
07123                     }
07124                     if (stat(origfname, &sb)) {
07125                         fprintf(stderr, "Can't stat %s\n", origfname);
07126                     }
07127                     /* $B%Q!<%_%C%7%g%s$rI|85(B */
07128                     if (chmod(outfname, sb.st_mode)) {
07129                         fprintf(stderr, "Can't set permission %s\n", outfname);
07130                     }
07131 
07132                     /* $B%?%$%`%9%?%s%W$rI|85(B */
07133                     if(preserve_time_f){
07134 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
07135                         tb[0] = tb[1] = sb.st_mtime;
07136                         if (utime(outfname, tb)) {
07137                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
07138                         }
07139 #else
07140                         tb.actime  = sb.st_atime;
07141                         tb.modtime = sb.st_mtime;
07142                         if (utime(outfname, &tb)) {
07143                             fprintf(stderr, "Can't set timestamp %s\n", outfname);
07144                         }
07145 #endif
07146                     }
07147                     if(backup_f){
07148                         char *backup_filename = get_backup_filename(backup_suffix, origfname);
07149 #ifdef MSDOS
07150                         unlink(backup_filename);
07151 #endif
07152                         if (rename(origfname, backup_filename)) {
07153                             perror(backup_filename);
07154                             fprintf(stderr, "Can't rename %s to %s\n",
07155                                     origfname, backup_filename);
07156                         }
07157                         nkf_xfree(backup_filename);
07158                     }else{
07159 #ifdef MSDOS
07160                         if (unlink(origfname)){
07161                             perror(origfname);
07162                         }
07163 #endif
07164                     }
07165                     if (rename(outfname, origfname)) {
07166                         perror(origfname);
07167                         fprintf(stderr, "Can't rename %s to %s\n",
07168                                 outfname, origfname);
07169                     }
07170                     nkf_xfree(outfname);
07171                 }
07172 #endif
07173             }
07174         }
07175         if (is_argument_error)
07176             return(-1);
07177     }
07178 #ifdef EASYWIN /*Easy Win */
07179     if (file_out_f == FALSE)
07180         scanf("%d",&end_check);
07181     else
07182         fclose(stdout);
07183 #else /* for Other OS */
07184     if (file_out_f == TRUE)
07185         fclose(stdout);
07186 #endif /*Easy Win */
07187     return (0);
07188 }
07189 #endif /* WIN32DLL */
07190