Ruby  2.0.0p247(2013-06-27revision41674)
regenc.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regenc.c -  Onigmo (Oniguruma-mod) (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * Copyright (c) 2011       K.Takata  <kentkt AT csc DOT jp>
00007  * All rights reserved.
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  * 1. Redistributions of source code must retain the above copyright
00013  *    notice, this list of conditions and the following disclaimer.
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in the
00016  *    documentation and/or other materials provided with the distribution.
00017  *
00018  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00019  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00022  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00023  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00024  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00025  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00026  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00027  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00028  * SUCH DAMAGE.
00029  */
00030 
00031 #include "regint.h"
00032 
00033 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
00034 
00035 extern int
00036 onigenc_init(void)
00037 {
00038   return 0;
00039 }
00040 
00041 extern OnigEncoding
00042 onigenc_get_default_encoding(void)
00043 {
00044   return OnigEncDefaultCharEncoding;
00045 }
00046 
00047 extern int
00048 onigenc_set_default_encoding(OnigEncoding enc)
00049 {
00050   OnigEncDefaultCharEncoding = enc;
00051   return 0;
00052 }
00053 
00054 extern int
00055 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
00056 {
00057   int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
00058   if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
00059     return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
00060   else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
00061     return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
00062   return 1;
00063 }
00064 
00065 extern UChar*
00066 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00067 {
00068   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00069   if (p < s) {
00070     p += enclen(enc, p, end);
00071   }
00072   return p;
00073 }
00074 
00075 extern UChar*
00076 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
00077                                    const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
00078 {
00079   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00080 
00081   if (p < s) {
00082     if (prev) *prev = (const UChar* )p;
00083     p += enclen(enc, p, end);
00084   }
00085   else {
00086     if (prev) *prev = (const UChar* )NULL; /* Sorry */
00087   }
00088   return p;
00089 }
00090 
00091 extern UChar*
00092 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00093 {
00094   if (s <= start)
00095     return (UChar* )NULL;
00096 
00097   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00098 }
00099 
00100 extern UChar*
00101 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
00102 {
00103   while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
00104     if (s <= start)
00105       return (UChar* )NULL;
00106 
00107     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00108   }
00109   return (UChar* )s;
00110 }
00111 
00112 extern UChar*
00113 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
00114 {
00115   UChar* q = (UChar* )p;
00116   while (n-- > 0) {
00117     q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00118   }
00119   return (q <= end ? q : NULL);
00120 }
00121 
00122 extern int
00123 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
00124 {
00125   int n = 0;
00126   UChar* q = (UChar* )p;
00127 
00128   while (q < end) {
00129     q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00130     n++;
00131   }
00132   return n;
00133 }
00134 
00135 extern int
00136 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
00137 {
00138   int n = 0;
00139   UChar* p = (UChar* )s;
00140   UChar* e;
00141 
00142   while (1) {
00143     if (*p == '\0') {
00144       UChar* q;
00145       int len = ONIGENC_MBC_MINLEN(enc);
00146 
00147       if (len == 1) return n;
00148       q = p + 1;
00149       while (len > 1) {
00150         if (*q != '\0') break;
00151         q++;
00152         len--;
00153       }
00154       if (len == 1) return n;
00155     }
00156     e = p + ONIGENC_MBC_MAXLEN(enc);
00157     p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00158     n++;
00159   }
00160 }
00161 
00162 extern int
00163 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
00164 {
00165   UChar* start = (UChar* )s;
00166   UChar* p = (UChar* )s;
00167   UChar* e;
00168 
00169   while (1) {
00170     if (*p == '\0') {
00171       UChar* q;
00172       int len = ONIGENC_MBC_MINLEN(enc);
00173 
00174       if (len == 1) return (int )(p - start);
00175       q = p + 1;
00176       while (len > 1) {
00177         if (*q != '\0') break;
00178         q++;
00179         len--;
00180       }
00181       if (len == 1) return (int )(p - start);
00182     }
00183     e = p + ONIGENC_MBC_MAXLEN(enc);
00184     p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00185   }
00186 }
00187 
00188 const UChar OnigEncAsciiToLowerCaseTable[] = {
00189   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00190   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00191   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00192   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00193   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00194   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00195   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00196   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00197   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00198   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00199   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00200   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00201   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00202   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00203   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00204   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00205   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00206   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00207   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00208   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00209   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00210   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00211   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00212   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00213   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00214   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00215   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00216   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00217   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00218   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00219   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00220   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00221 };
00222 
00223 #ifdef USE_UPPER_CASE_TABLE
00224 const UChar OnigEncAsciiToUpperCaseTable[256] = {
00225   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00226   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00227   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00228   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00229   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00230   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00231   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00232   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00233   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00234   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00235   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00236   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00237   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00238   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00239   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00240   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00241   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00242   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00243   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00244   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00245   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00246   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00247   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00248   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00249   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00250   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00251   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00252   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00253   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00254   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00255   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00256   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00257 };
00258 #endif
00259 
00260 const unsigned short OnigEncAsciiCtypeTable[256] = {
00261   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00262   0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
00263   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00264   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00265   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00266   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00267   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
00268   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00269   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
00270   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00271   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00272   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
00273   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
00274   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00275   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00276   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
00277   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00278   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00279   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00280   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00281   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00282   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00283   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00284   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00285   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00286   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00287   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00288   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00289   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00290   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00291   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00292   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
00293 };
00294 
00295 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
00296   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00297   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00298   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00299   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00300   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00301   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00302   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00303   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00304   '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00305   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00306   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00307   '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00308   '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00309   '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00310   '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00311   '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00312   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00313   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00314   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00315   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00316   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00317   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00318   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00319   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00320   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00321   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00322   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
00323   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
00324   '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00325   '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00326   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00327   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
00328 };
00329 
00330 #ifdef USE_UPPER_CASE_TABLE
00331 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
00332   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00333   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00334   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00335   '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00336   '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00337   '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00338   '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00339   '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00340   '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00341   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00342   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00343   '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00344   '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00345   '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00346   '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00347   '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00348   '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00349   '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00350   '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00351   '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00352   '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00353   '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00354   '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00355   '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00356   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00357   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00358   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00359   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00360   '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00361   '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00362   '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
00363   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
00364 };
00365 #endif
00366 
00367 extern void
00368 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
00369 {
00370   /* nothing */
00371   /* obsoleted. */
00372 }
00373 
00374 extern UChar*
00375 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00376 {
00377   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00378 }
00379 
00380 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
00381   { 0x41, 0x61 },
00382   { 0x42, 0x62 },
00383   { 0x43, 0x63 },
00384   { 0x44, 0x64 },
00385   { 0x45, 0x65 },
00386   { 0x46, 0x66 },
00387   { 0x47, 0x67 },
00388   { 0x48, 0x68 },
00389   { 0x49, 0x69 },
00390   { 0x4a, 0x6a },
00391   { 0x4b, 0x6b },
00392   { 0x4c, 0x6c },
00393   { 0x4d, 0x6d },
00394   { 0x4e, 0x6e },
00395   { 0x4f, 0x6f },
00396   { 0x50, 0x70 },
00397   { 0x51, 0x71 },
00398   { 0x52, 0x72 },
00399   { 0x53, 0x73 },
00400   { 0x54, 0x74 },
00401   { 0x55, 0x75 },
00402   { 0x56, 0x76 },
00403   { 0x57, 0x77 },
00404   { 0x58, 0x78 },
00405   { 0x59, 0x79 },
00406   { 0x5a, 0x7a }
00407 };
00408 
00409 extern int
00410 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00411                                   OnigApplyAllCaseFoldFunc f, void* arg,
00412                                   OnigEncoding enc ARG_UNUSED)
00413 {
00414   OnigCodePoint code;
00415   int i, r;
00416 
00417   for (i = 0;
00418        i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
00419        i++) {
00420     code = OnigAsciiLowerMap[i].to;
00421     r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
00422     if (r != 0) return r;
00423 
00424     code = OnigAsciiLowerMap[i].from;
00425     r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
00426     if (r != 0) return r;
00427   }
00428 
00429   return 0;
00430 }
00431 
00432 extern int
00433 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
00434     const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
00435      OnigEncoding enc ARG_UNUSED)
00436 {
00437   if (0x41 <= *p && *p <= 0x5a) {
00438     items[0].byte_len = 1;
00439     items[0].code_len = 1;
00440     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00441     return 1;
00442   }
00443   else if (0x61 <= *p && *p <= 0x7a) {
00444     items[0].byte_len = 1;
00445     items[0].code_len = 1;
00446     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00447     return 1;
00448   }
00449   else
00450     return 0;
00451 }
00452 
00453 static int
00454 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00455                        OnigApplyAllCaseFoldFunc f, void* arg)
00456 {
00457   OnigCodePoint ss[] = { 0x73, 0x73 };
00458 
00459   return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
00460 }
00461 
00462 extern int
00463 onigenc_apply_all_case_fold_with_map(int map_size,
00464     const OnigPairCaseFoldCodes map[],
00465     int ess_tsett_flag, OnigCaseFoldType flag,
00466     OnigApplyAllCaseFoldFunc f, void* arg)
00467 {
00468   OnigCodePoint code;
00469   int i, r;
00470 
00471   r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
00472   if (r != 0) return r;
00473 
00474   for (i = 0; i < map_size; i++) {
00475     code = map[i].to;
00476     r = (*f)(map[i].from, &code, 1, arg);
00477     if (r != 0) return r;
00478 
00479     code = map[i].from;
00480     r = (*f)(map[i].to, &code, 1, arg);
00481     if (r != 0) return r;
00482   }
00483 
00484   if (ess_tsett_flag != 0)
00485     return ss_apply_all_case_fold(flag, f, arg);
00486 
00487   return 0;
00488 }
00489 
00490 extern int
00491 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
00492     const OnigPairCaseFoldCodes map[],
00493     int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
00494     const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
00495 {
00496   if (0x41 <= *p && *p <= 0x5a) {
00497     items[0].byte_len = 1;
00498     items[0].code_len = 1;
00499     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00500     if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
00501         && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
00502       /* SS */
00503       items[1].byte_len = 2;
00504       items[1].code_len = 1;
00505       items[1].code[0] = (OnigCodePoint )0xdf;
00506       return 2;
00507     }
00508     else
00509       return 1;
00510   }
00511   else if (0x61 <= *p && *p <= 0x7a) {
00512     items[0].byte_len = 1;
00513     items[0].code_len = 1;
00514     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00515     if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
00516         && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
00517       /* ss */
00518       items[1].byte_len = 2;
00519       items[1].code_len = 1;
00520       items[1].code[0] = (OnigCodePoint )0xdf;
00521       return 2;
00522     }
00523     else
00524       return 1;
00525   }
00526   else if (*p == 0xdf && ess_tsett_flag != 0) {
00527     items[0].byte_len = 1;
00528     items[0].code_len = 2;
00529     items[0].code[0] = (OnigCodePoint )'s';
00530     items[0].code[1] = (OnigCodePoint )'s';
00531 
00532     items[1].byte_len = 1;
00533     items[1].code_len = 2;
00534     items[1].code[0] = (OnigCodePoint )'S';
00535     items[1].code[1] = (OnigCodePoint )'S';
00536 
00537     items[2].byte_len = 1;
00538     items[2].code_len = 2;
00539     items[2].code[0] = (OnigCodePoint )'s';
00540     items[2].code[1] = (OnigCodePoint )'S';
00541 
00542     items[3].byte_len = 1;
00543     items[3].code_len = 2;
00544     items[3].code[0] = (OnigCodePoint )'S';
00545     items[3].code[1] = (OnigCodePoint )'s';
00546 
00547     return 4;
00548   }
00549   else {
00550     int i;
00551 
00552     for (i = 0; i < map_size; i++) {
00553       if (*p == map[i].from) {
00554         items[0].byte_len = 1;
00555         items[0].code_len = 1;
00556         items[0].code[0] = map[i].to;
00557         return 1;
00558       }
00559       else if (*p == map[i].to) {
00560         items[0].byte_len = 1;
00561         items[0].code_len = 1;
00562         items[0].code[0] = map[i].from;
00563         return 1;
00564       }
00565     }
00566   }
00567 
00568   return 0;
00569 }
00570 
00571 
00572 extern int
00573 onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
00574                        OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
00575                        OnigEncoding enc)
00576 {
00577   return ONIG_NO_SUPPORT_CONFIG;
00578 }
00579 
00580 extern int
00581 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
00582 {
00583   if (p < end) {
00584     if (*p == 0x0a) return 1;
00585   }
00586   return 0;
00587 }
00588 
00589 /* for single byte encodings */
00590 extern int
00591 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
00592                             const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
00593 {
00594   *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
00595 
00596   (*p)++;
00597   return 1; /* return byte length of converted char to lower */
00598 }
00599 
00600 #if 0
00601 extern int
00602 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
00603                                const UChar** pp, const UChar* end ARG_UNUSED)
00604 {
00605   const UChar* p = *pp;
00606 
00607   (*pp)++;
00608   return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00609 }
00610 #endif
00611 
00612 extern int
00613 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
00614                                 OnigEncoding enc ARG_UNUSED)
00615 {
00616   return 1;
00617 }
00618 
00619 extern OnigCodePoint
00620 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
00621                                 OnigEncoding enc ARG_UNUSED)
00622 {
00623   return (OnigCodePoint )(*p);
00624 }
00625 
00626 extern int
00627 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
00628 {
00629   return 1;
00630 }
00631 
00632 extern int
00633 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
00634 {
00635   if (code > 0xff)
00636       rb_raise(rb_eRangeError, "%u out of char range", code);
00637   *buf = (UChar )(code & 0xff);
00638   return 1;
00639 }
00640 
00641 extern UChar*
00642 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
00643                                           const UChar* end,
00644                                           OnigEncoding enc ARG_UNUSED)
00645 {
00646   return (UChar* )s;
00647 }
00648 
00649 extern int
00650 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00651                                              OnigEncoding enc ARG_UNUSED)
00652 {
00653   return TRUE;
00654 }
00655 
00656 extern int
00657 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00658                                               OnigEncoding enc ARG_UNUSED)
00659 {
00660   return FALSE;
00661 }
00662 
00663 extern int
00664 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
00665                             OnigEncoding enc ARG_UNUSED)
00666 {
00667   if (code < 128)
00668     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00669   else
00670     return FALSE;
00671 }
00672 
00673 extern OnigCodePoint
00674 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
00675 {
00676   int c, i, len;
00677   OnigCodePoint n;
00678 
00679   len = enclen(enc, p, end);
00680   n = (OnigCodePoint )(*p++);
00681   if (len == 1) return n;
00682 
00683   for (i = 1; i < len; i++) {
00684     if (p >= end) break;
00685     c = *p++;
00686     n <<= 8;  n += c;
00687   }
00688   return n;
00689 }
00690 
00691 extern int
00692 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
00693                           const UChar** pp, const UChar* end ARG_UNUSED,
00694                           UChar* lower)
00695 {
00696   int len;
00697   const UChar *p = *pp;
00698 
00699   if (ONIGENC_IS_MBC_ASCII(p)) {
00700     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
00701     (*pp)++;
00702     return 1;
00703   }
00704   else {
00705     int i;
00706 
00707     len = enclen(enc, p, end);
00708     for (i = 0; i < len; i++) {
00709       *lower++ = *p++;
00710     }
00711     (*pp) += len;
00712     return len; /* return byte length of converted to lower char */
00713   }
00714 }
00715 
00716 #if 0
00717 extern int
00718 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
00719                              const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
00720 {
00721   const UChar* p = *pp;
00722 
00723   if (ONIGENC_IS_MBC_ASCII(p)) {
00724     (*pp)++;
00725     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00726   }
00727 
00728   (*pp) += enclen(enc, p);
00729   return FALSE;
00730 }
00731 #endif
00732 
00733 extern int
00734 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00735 {
00736   if (code <= 0xff) return 1;
00737   if (code <= 0xffff) return 2;
00738   return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
00739 }
00740 
00741 extern int
00742 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00743 {
00744        if ((code & 0xff000000) != 0) return 4;
00745   else if ((code & 0xff0000) != 0) return 3;
00746   else if ((code & 0xff00) != 0) return 2;
00747   else return 1;
00748 }
00749 
00750 extern int
00751 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00752 {
00753   UChar *p = buf;
00754 
00755   if ((code & 0xff00) != 0) {
00756     *p++ = (UChar )((code >>  8) & 0xff);
00757   }
00758   *p++ = (UChar )(code & 0xff);
00759 
00760 #if 1
00761   if (enclen(enc, buf, p) != (p - buf))
00762     return ONIGERR_INVALID_CODE_POINT_VALUE;
00763 #endif
00764   return (int )(p - buf);
00765 }
00766 
00767 extern int
00768 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00769 {
00770   UChar *p = buf;
00771 
00772   if ((code & 0xff000000) != 0) {
00773     *p++ = (UChar )((code >> 24) & 0xff);
00774   }
00775   if ((code & 0xff0000) != 0 || p != buf) {
00776     *p++ = (UChar )((code >> 16) & 0xff);
00777   }
00778   if ((code & 0xff00) != 0 || p != buf) {
00779     *p++ = (UChar )((code >> 8) & 0xff);
00780   }
00781   *p++ = (UChar )(code & 0xff);
00782 
00783 #if 1
00784   if (enclen(enc, buf, p) != (p - buf))
00785     return ONIGERR_INVALID_CODE_POINT_VALUE;
00786 #endif
00787   return (int )(p - buf);
00788 }
00789 
00790 extern int
00791 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
00792 {
00793   static const PosixBracketEntryType PBS[] = {
00794     PosixBracketEntryInit("Alnum",  ONIGENC_CTYPE_ALNUM),
00795     PosixBracketEntryInit("Alpha",  ONIGENC_CTYPE_ALPHA),
00796     PosixBracketEntryInit("Blank",  ONIGENC_CTYPE_BLANK),
00797     PosixBracketEntryInit("Cntrl",  ONIGENC_CTYPE_CNTRL),
00798     PosixBracketEntryInit("Digit",  ONIGENC_CTYPE_DIGIT),
00799     PosixBracketEntryInit("Graph",  ONIGENC_CTYPE_GRAPH),
00800     PosixBracketEntryInit("Lower",  ONIGENC_CTYPE_LOWER),
00801     PosixBracketEntryInit("Print",  ONIGENC_CTYPE_PRINT),
00802     PosixBracketEntryInit("Punct",  ONIGENC_CTYPE_PUNCT),
00803     PosixBracketEntryInit("Space",  ONIGENC_CTYPE_SPACE),
00804     PosixBracketEntryInit("Upper",  ONIGENC_CTYPE_UPPER),
00805     PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
00806     PosixBracketEntryInit("ASCII",  ONIGENC_CTYPE_ASCII),
00807     PosixBracketEntryInit("Word",   ONIGENC_CTYPE_WORD),
00808   };
00809 
00810   const PosixBracketEntryType *pb, *pbe;
00811   int len;
00812 
00813   len = onigenc_strlen(enc, p, end);
00814   for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
00815     if (len == pb->len &&
00816         onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
00817       return pb->ctype;
00818   }
00819 
00820   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
00821 }
00822 
00823 extern int
00824 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00825                           unsigned int ctype)
00826 {
00827   if (code < 128)
00828     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00829   else {
00830     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00831       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00832     }
00833   }
00834 
00835   return FALSE;
00836 }
00837 
00838 extern int
00839 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00840                           unsigned int ctype)
00841 {
00842   if (code < 128)
00843     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00844   else {
00845     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00846       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00847     }
00848   }
00849 
00850   return FALSE;
00851 }
00852 
00853 extern int
00854 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
00855                            const UChar* sascii /* ascii */, int n)
00856 {
00857   int x, c;
00858 
00859   while (n-- > 0) {
00860     if (p >= end) return (int )(*sascii);
00861 
00862     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
00863     x = *sascii - c;
00864     if (x) return x;
00865 
00866     sascii++;
00867     p += enclen(enc, p, end);
00868   }
00869   return 0;
00870 }
00871 
00872 extern int
00873 onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
00874                             const UChar* sascii /* ascii */, int n)
00875 {
00876   int x, c;
00877 
00878   while (n-- > 0) {
00879     if (p >= end) return (int )(*sascii);
00880 
00881     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
00882     if (ONIGENC_IS_ASCII_CODE(c))
00883       c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
00884     x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
00885     if (x) return x;
00886 
00887     sascii++;
00888     p += enclen(enc, p, end);
00889   }
00890   return 0;
00891 }
00892 
00893 /* Property management */
00894 static int
00895 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
00896 {
00897   size_t size;
00898   const OnigCodePoint **list = *plist;
00899 
00900   size = sizeof(OnigCodePoint*) * new_size;
00901   if (IS_NULL(list)) {
00902     list = (const OnigCodePoint** )xmalloc(size);
00903   }
00904   else {
00905     list = (const OnigCodePoint** )xrealloc((void* )list, size);
00906   }
00907 
00908   if (IS_NULL(list)) return ONIGERR_MEMORY;
00909 
00910   *plist = list;
00911   *psize = new_size;
00912 
00913   return 0;
00914 }
00915 
00916 extern int
00917 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
00918      hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
00919      int *psize)
00920 {
00921 #define PROP_INIT_SIZE     16
00922 
00923   int r;
00924 
00925   if (*psize <= *pnum) {
00926     int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
00927     r = resize_property_list(new_size, plist, psize);
00928     if (r != 0) return r;
00929   }
00930 
00931   (*plist)[*pnum] = prop;
00932 
00933   if (ONIG_IS_NULL(*table)) {
00934     *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
00935     if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
00936   }
00937 
00938   *pnum = *pnum + 1;
00939   onig_st_insert_strend(*table, name, name + strlen((char* )name),
00940                         (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
00941   return 0;
00942 }
00943 
00944 extern int
00945 onigenc_property_list_init(int (*f)(void))
00946 {
00947   int r;
00948 
00949   THREAD_ATOMIC_START;
00950 
00951   r = f();
00952 
00953   THREAD_ATOMIC_END;
00954   return r;
00955 }
00956