Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 /********************************************************************** 00002 regenc.c - Onigmo (Oniguruma-mod) (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp> 00007 * All rights reserved. 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 1. Redistributions of source code must retain the above copyright 00013 * notice, this list of conditions and the following disclaimer. 00014 * 2. Redistributions in binary form must reproduce the above copyright 00015 * notice, this list of conditions and the following disclaimer in the 00016 * documentation and/or other materials provided with the distribution. 00017 * 00018 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00019 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00021 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00022 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00023 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00024 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00025 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00026 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00027 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00028 * SUCH DAMAGE. 00029 */ 00030 00031 #include "regint.h" 00032 00033 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; 00034 00035 extern int 00036 onigenc_init(void) 00037 { 00038 return 0; 00039 } 00040 00041 extern OnigEncoding 00042 onigenc_get_default_encoding(void) 00043 { 00044 return OnigEncDefaultCharEncoding; 00045 } 00046 00047 extern int 00048 onigenc_set_default_encoding(OnigEncoding enc) 00049 { 00050 OnigEncDefaultCharEncoding = enc; 00051 return 0; 00052 } 00053 00054 extern int 00055 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) 00056 { 00057 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); 00058 if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) 00059 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); 00060 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) 00061 return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); 00062 return 1; 00063 } 00064 00065 extern UChar* 00066 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 00067 { 00068 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 00069 if (p < s) { 00070 p += enclen(enc, p, end); 00071 } 00072 return p; 00073 } 00074 00075 extern UChar* 00076 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, 00077 const UChar* start, const UChar* s, const UChar* end, const UChar** prev) 00078 { 00079 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 00080 00081 if (p < s) { 00082 if (prev) *prev = (const UChar* )p; 00083 p += enclen(enc, p, end); 00084 } 00085 else { 00086 if (prev) *prev = (const UChar* )NULL; /* Sorry */ 00087 } 00088 return p; 00089 } 00090 00091 extern UChar* 00092 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 00093 { 00094 if (s <= start) 00095 return (UChar* )NULL; 00096 00097 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); 00098 } 00099 00100 extern UChar* 00101 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n) 00102 { 00103 while (ONIG_IS_NOT_NULL(s) && n-- > 0) { 00104 if (s <= start) 00105 return (UChar* )NULL; 00106 00107 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); 00108 } 00109 return (UChar* )s; 00110 } 00111 00112 extern UChar* 00113 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) 00114 { 00115 UChar* q = (UChar* )p; 00116 while (n-- > 0) { 00117 q += ONIGENC_MBC_ENC_LEN(enc, q, end); 00118 } 00119 return (q <= end ? q : NULL); 00120 } 00121 00122 extern int 00123 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) 00124 { 00125 int n = 0; 00126 UChar* q = (UChar* )p; 00127 00128 while (q < end) { 00129 q += ONIGENC_MBC_ENC_LEN(enc, q, end); 00130 n++; 00131 } 00132 return n; 00133 } 00134 00135 extern int 00136 onigenc_strlen_null(OnigEncoding enc, const UChar* s) 00137 { 00138 int n = 0; 00139 UChar* p = (UChar* )s; 00140 UChar* e; 00141 00142 while (1) { 00143 if (*p == '\0') { 00144 UChar* q; 00145 int len = ONIGENC_MBC_MINLEN(enc); 00146 00147 if (len == 1) return n; 00148 q = p + 1; 00149 while (len > 1) { 00150 if (*q != '\0') break; 00151 q++; 00152 len--; 00153 } 00154 if (len == 1) return n; 00155 } 00156 e = p + ONIGENC_MBC_MAXLEN(enc); 00157 p += ONIGENC_MBC_ENC_LEN(enc, p, e); 00158 n++; 00159 } 00160 } 00161 00162 extern int 00163 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) 00164 { 00165 UChar* start = (UChar* )s; 00166 UChar* p = (UChar* )s; 00167 UChar* e; 00168 00169 while (1) { 00170 if (*p == '\0') { 00171 UChar* q; 00172 int len = ONIGENC_MBC_MINLEN(enc); 00173 00174 if (len == 1) return (int )(p - start); 00175 q = p + 1; 00176 while (len > 1) { 00177 if (*q != '\0') break; 00178 q++; 00179 len--; 00180 } 00181 if (len == 1) return (int )(p - start); 00182 } 00183 e = p + ONIGENC_MBC_MAXLEN(enc); 00184 p += ONIGENC_MBC_ENC_LEN(enc, p, e); 00185 } 00186 } 00187 00188 const UChar OnigEncAsciiToLowerCaseTable[] = { 00189 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 00190 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 00191 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 00192 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 00193 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 00194 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 00195 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 00196 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 00197 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 00198 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 00199 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 00200 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 00201 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 00202 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 00203 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 00204 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 00205 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 00206 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 00207 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 00208 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 00209 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 00210 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 00211 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 00212 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 00213 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 00214 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 00215 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 00216 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 00217 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 00218 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 00219 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 00220 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', 00221 }; 00222 00223 #ifdef USE_UPPER_CASE_TABLE 00224 const UChar OnigEncAsciiToUpperCaseTable[256] = { 00225 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 00226 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 00227 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 00228 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 00229 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 00230 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 00231 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 00232 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 00233 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', 00234 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', 00235 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', 00236 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', 00237 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', 00238 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', 00239 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', 00240 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', 00241 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 00242 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 00243 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 00244 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 00245 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 00246 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 00247 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 00248 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 00249 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 00250 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 00251 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 00252 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 00253 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 00254 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 00255 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 00256 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', 00257 }; 00258 #endif 00259 00260 const unsigned short OnigEncAsciiCtypeTable[256] = { 00261 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 00262 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 00263 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 00264 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 00265 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 00266 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 00267 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 00268 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 00269 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 00270 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 00271 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 00272 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 00273 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 00274 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 00275 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 00276 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 00277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00279 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00280 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00285 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00286 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00287 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00288 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00289 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00290 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00291 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00292 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 00293 }; 00294 00295 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { 00296 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 00297 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 00298 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 00299 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 00300 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 00301 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 00302 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 00303 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 00304 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 00305 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 00306 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 00307 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 00308 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 00309 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 00310 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 00311 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 00312 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 00313 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 00314 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 00315 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 00316 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 00317 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 00318 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 00319 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 00320 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 00321 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 00322 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', 00323 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', 00324 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 00325 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 00326 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 00327 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' 00328 }; 00329 00330 #ifdef USE_UPPER_CASE_TABLE 00331 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { 00332 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 00333 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 00334 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 00335 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 00336 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 00337 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 00338 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 00339 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 00340 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', 00341 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', 00342 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', 00343 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', 00344 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', 00345 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', 00346 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', 00347 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', 00348 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 00349 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 00350 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 00351 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 00352 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 00353 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 00354 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 00355 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 00356 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 00357 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 00358 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 00359 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 00360 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 00361 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 00362 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', 00363 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', 00364 }; 00365 #endif 00366 00367 extern void 00368 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) 00369 { 00370 /* nothing */ 00371 /* obsoleted. */ 00372 } 00373 00374 extern UChar* 00375 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 00376 { 00377 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 00378 } 00379 00380 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { 00381 { 0x41, 0x61 }, 00382 { 0x42, 0x62 }, 00383 { 0x43, 0x63 }, 00384 { 0x44, 0x64 }, 00385 { 0x45, 0x65 }, 00386 { 0x46, 0x66 }, 00387 { 0x47, 0x67 }, 00388 { 0x48, 0x68 }, 00389 { 0x49, 0x69 }, 00390 { 0x4a, 0x6a }, 00391 { 0x4b, 0x6b }, 00392 { 0x4c, 0x6c }, 00393 { 0x4d, 0x6d }, 00394 { 0x4e, 0x6e }, 00395 { 0x4f, 0x6f }, 00396 { 0x50, 0x70 }, 00397 { 0x51, 0x71 }, 00398 { 0x52, 0x72 }, 00399 { 0x53, 0x73 }, 00400 { 0x54, 0x74 }, 00401 { 0x55, 0x75 }, 00402 { 0x56, 0x76 }, 00403 { 0x57, 0x77 }, 00404 { 0x58, 0x78 }, 00405 { 0x59, 0x79 }, 00406 { 0x5a, 0x7a } 00407 }; 00408 00409 extern int 00410 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, 00411 OnigApplyAllCaseFoldFunc f, void* arg, 00412 OnigEncoding enc ARG_UNUSED) 00413 { 00414 OnigCodePoint code; 00415 int i, r; 00416 00417 for (i = 0; 00418 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); 00419 i++) { 00420 code = OnigAsciiLowerMap[i].to; 00421 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); 00422 if (r != 0) return r; 00423 00424 code = OnigAsciiLowerMap[i].from; 00425 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); 00426 if (r != 0) return r; 00427 } 00428 00429 return 0; 00430 } 00431 00432 extern int 00433 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, 00434 const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[], 00435 OnigEncoding enc ARG_UNUSED) 00436 { 00437 if (0x41 <= *p && *p <= 0x5a) { 00438 items[0].byte_len = 1; 00439 items[0].code_len = 1; 00440 items[0].code[0] = (OnigCodePoint )(*p + 0x20); 00441 return 1; 00442 } 00443 else if (0x61 <= *p && *p <= 0x7a) { 00444 items[0].byte_len = 1; 00445 items[0].code_len = 1; 00446 items[0].code[0] = (OnigCodePoint )(*p - 0x20); 00447 return 1; 00448 } 00449 else 00450 return 0; 00451 } 00452 00453 static int 00454 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, 00455 OnigApplyAllCaseFoldFunc f, void* arg) 00456 { 00457 OnigCodePoint ss[] = { 0x73, 0x73 }; 00458 00459 return (*f)((OnigCodePoint )0xdf, ss, 2, arg); 00460 } 00461 00462 extern int 00463 onigenc_apply_all_case_fold_with_map(int map_size, 00464 const OnigPairCaseFoldCodes map[], 00465 int ess_tsett_flag, OnigCaseFoldType flag, 00466 OnigApplyAllCaseFoldFunc f, void* arg) 00467 { 00468 OnigCodePoint code; 00469 int i, r; 00470 00471 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0); 00472 if (r != 0) return r; 00473 00474 for (i = 0; i < map_size; i++) { 00475 code = map[i].to; 00476 r = (*f)(map[i].from, &code, 1, arg); 00477 if (r != 0) return r; 00478 00479 code = map[i].from; 00480 r = (*f)(map[i].to, &code, 1, arg); 00481 if (r != 0) return r; 00482 } 00483 00484 if (ess_tsett_flag != 0) 00485 return ss_apply_all_case_fold(flag, f, arg); 00486 00487 return 0; 00488 } 00489 00490 extern int 00491 onigenc_get_case_fold_codes_by_str_with_map(int map_size, 00492 const OnigPairCaseFoldCodes map[], 00493 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, 00494 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) 00495 { 00496 if (0x41 <= *p && *p <= 0x5a) { 00497 items[0].byte_len = 1; 00498 items[0].code_len = 1; 00499 items[0].code[0] = (OnigCodePoint )(*p + 0x20); 00500 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 00501 && (*(p+1) == 0x53 || *(p+1) == 0x73)) { 00502 /* SS */ 00503 items[1].byte_len = 2; 00504 items[1].code_len = 1; 00505 items[1].code[0] = (OnigCodePoint )0xdf; 00506 return 2; 00507 } 00508 else 00509 return 1; 00510 } 00511 else if (0x61 <= *p && *p <= 0x7a) { 00512 items[0].byte_len = 1; 00513 items[0].code_len = 1; 00514 items[0].code[0] = (OnigCodePoint )(*p - 0x20); 00515 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 00516 && (*(p+1) == 0x73 || *(p+1) == 0x53)) { 00517 /* ss */ 00518 items[1].byte_len = 2; 00519 items[1].code_len = 1; 00520 items[1].code[0] = (OnigCodePoint )0xdf; 00521 return 2; 00522 } 00523 else 00524 return 1; 00525 } 00526 else if (*p == 0xdf && ess_tsett_flag != 0) { 00527 items[0].byte_len = 1; 00528 items[0].code_len = 2; 00529 items[0].code[0] = (OnigCodePoint )'s'; 00530 items[0].code[1] = (OnigCodePoint )'s'; 00531 00532 items[1].byte_len = 1; 00533 items[1].code_len = 2; 00534 items[1].code[0] = (OnigCodePoint )'S'; 00535 items[1].code[1] = (OnigCodePoint )'S'; 00536 00537 items[2].byte_len = 1; 00538 items[2].code_len = 2; 00539 items[2].code[0] = (OnigCodePoint )'s'; 00540 items[2].code[1] = (OnigCodePoint )'S'; 00541 00542 items[3].byte_len = 1; 00543 items[3].code_len = 2; 00544 items[3].code[0] = (OnigCodePoint )'S'; 00545 items[3].code[1] = (OnigCodePoint )'s'; 00546 00547 return 4; 00548 } 00549 else { 00550 int i; 00551 00552 for (i = 0; i < map_size; i++) { 00553 if (*p == map[i].from) { 00554 items[0].byte_len = 1; 00555 items[0].code_len = 1; 00556 items[0].code[0] = map[i].to; 00557 return 1; 00558 } 00559 else if (*p == map[i].to) { 00560 items[0].byte_len = 1; 00561 items[0].code_len = 1; 00562 items[0].code[0] = map[i].from; 00563 return 1; 00564 } 00565 } 00566 } 00567 00568 return 0; 00569 } 00570 00571 00572 extern int 00573 onigenc_not_support_get_ctype_code_range(OnigCtype ctype, 00574 OnigCodePoint* sb_out, const OnigCodePoint* ranges[], 00575 OnigEncoding enc) 00576 { 00577 return ONIG_NO_SUPPORT_CONFIG; 00578 } 00579 00580 extern int 00581 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED) 00582 { 00583 if (p < end) { 00584 if (*p == 0x0a) return 1; 00585 } 00586 return 0; 00587 } 00588 00589 /* for single byte encodings */ 00590 extern int 00591 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, 00592 const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED) 00593 { 00594 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); 00595 00596 (*p)++; 00597 return 1; /* return byte length of converted char to lower */ 00598 } 00599 00600 #if 0 00601 extern int 00602 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED, 00603 const UChar** pp, const UChar* end ARG_UNUSED) 00604 { 00605 const UChar* p = *pp; 00606 00607 (*pp)++; 00608 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); 00609 } 00610 #endif 00611 00612 extern int 00613 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED, 00614 OnigEncoding enc ARG_UNUSED) 00615 { 00616 return 1; 00617 } 00618 00619 extern OnigCodePoint 00620 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, 00621 OnigEncoding enc ARG_UNUSED) 00622 { 00623 return (OnigCodePoint )(*p); 00624 } 00625 00626 extern int 00627 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) 00628 { 00629 return 1; 00630 } 00631 00632 extern int 00633 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) 00634 { 00635 if (code > 0xff) 00636 rb_raise(rb_eRangeError, "%u out of char range", code); 00637 *buf = (UChar )(code & 0xff); 00638 return 1; 00639 } 00640 00641 extern UChar* 00642 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s, 00643 const UChar* end, 00644 OnigEncoding enc ARG_UNUSED) 00645 { 00646 return (UChar* )s; 00647 } 00648 00649 extern int 00650 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, 00651 OnigEncoding enc ARG_UNUSED) 00652 { 00653 return TRUE; 00654 } 00655 00656 extern int 00657 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, 00658 OnigEncoding enc ARG_UNUSED) 00659 { 00660 return FALSE; 00661 } 00662 00663 extern int 00664 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, 00665 OnigEncoding enc ARG_UNUSED) 00666 { 00667 if (code < 128) 00668 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 00669 else 00670 return FALSE; 00671 } 00672 00673 extern OnigCodePoint 00674 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) 00675 { 00676 int c, i, len; 00677 OnigCodePoint n; 00678 00679 len = enclen(enc, p, end); 00680 n = (OnigCodePoint )(*p++); 00681 if (len == 1) return n; 00682 00683 for (i = 1; i < len; i++) { 00684 if (p >= end) break; 00685 c = *p++; 00686 n <<= 8; n += c; 00687 } 00688 return n; 00689 } 00690 00691 extern int 00692 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, 00693 const UChar** pp, const UChar* end ARG_UNUSED, 00694 UChar* lower) 00695 { 00696 int len; 00697 const UChar *p = *pp; 00698 00699 if (ONIGENC_IS_MBC_ASCII(p)) { 00700 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 00701 (*pp)++; 00702 return 1; 00703 } 00704 else { 00705 int i; 00706 00707 len = enclen(enc, p, end); 00708 for (i = 0; i < len; i++) { 00709 *lower++ = *p++; 00710 } 00711 (*pp) += len; 00712 return len; /* return byte length of converted to lower char */ 00713 } 00714 } 00715 00716 #if 0 00717 extern int 00718 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, 00719 const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED) 00720 { 00721 const UChar* p = *pp; 00722 00723 if (ONIGENC_IS_MBC_ASCII(p)) { 00724 (*pp)++; 00725 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); 00726 } 00727 00728 (*pp) += enclen(enc, p); 00729 return FALSE; 00730 } 00731 #endif 00732 00733 extern int 00734 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) 00735 { 00736 if (code <= 0xff) return 1; 00737 if (code <= 0xffff) return 2; 00738 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 00739 } 00740 00741 extern int 00742 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) 00743 { 00744 if ((code & 0xff000000) != 0) return 4; 00745 else if ((code & 0xff0000) != 0) return 3; 00746 else if ((code & 0xff00) != 0) return 2; 00747 else return 1; 00748 } 00749 00750 extern int 00751 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) 00752 { 00753 UChar *p = buf; 00754 00755 if ((code & 0xff00) != 0) { 00756 *p++ = (UChar )((code >> 8) & 0xff); 00757 } 00758 *p++ = (UChar )(code & 0xff); 00759 00760 #if 1 00761 if (enclen(enc, buf, p) != (p - buf)) 00762 return ONIGERR_INVALID_CODE_POINT_VALUE; 00763 #endif 00764 return (int )(p - buf); 00765 } 00766 00767 extern int 00768 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) 00769 { 00770 UChar *p = buf; 00771 00772 if ((code & 0xff000000) != 0) { 00773 *p++ = (UChar )((code >> 24) & 0xff); 00774 } 00775 if ((code & 0xff0000) != 0 || p != buf) { 00776 *p++ = (UChar )((code >> 16) & 0xff); 00777 } 00778 if ((code & 0xff00) != 0 || p != buf) { 00779 *p++ = (UChar )((code >> 8) & 0xff); 00780 } 00781 *p++ = (UChar )(code & 0xff); 00782 00783 #if 1 00784 if (enclen(enc, buf, p) != (p - buf)) 00785 return ONIGERR_INVALID_CODE_POINT_VALUE; 00786 #endif 00787 return (int )(p - buf); 00788 } 00789 00790 extern int 00791 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) 00792 { 00793 static const PosixBracketEntryType PBS[] = { 00794 PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM), 00795 PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA), 00796 PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK), 00797 PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL), 00798 PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT), 00799 PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH), 00800 PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER), 00801 PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT), 00802 PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT), 00803 PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE), 00804 PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER), 00805 PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT), 00806 PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII), 00807 PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD), 00808 }; 00809 00810 const PosixBracketEntryType *pb, *pbe; 00811 int len; 00812 00813 len = onigenc_strlen(enc, p, end); 00814 for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) { 00815 if (len == pb->len && 00816 onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0) 00817 return pb->ctype; 00818 } 00819 00820 return ONIGERR_INVALID_CHAR_PROPERTY_NAME; 00821 } 00822 00823 extern int 00824 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, 00825 unsigned int ctype) 00826 { 00827 if (code < 128) 00828 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 00829 else { 00830 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 00831 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); 00832 } 00833 } 00834 00835 return FALSE; 00836 } 00837 00838 extern int 00839 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, 00840 unsigned int ctype) 00841 { 00842 if (code < 128) 00843 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 00844 else { 00845 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 00846 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); 00847 } 00848 } 00849 00850 return FALSE; 00851 } 00852 00853 extern int 00854 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, 00855 const UChar* sascii /* ascii */, int n) 00856 { 00857 int x, c; 00858 00859 while (n-- > 0) { 00860 if (p >= end) return (int )(*sascii); 00861 00862 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); 00863 x = *sascii - c; 00864 if (x) return x; 00865 00866 sascii++; 00867 p += enclen(enc, p, end); 00868 } 00869 return 0; 00870 } 00871 00872 extern int 00873 onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end, 00874 const UChar* sascii /* ascii */, int n) 00875 { 00876 int x, c; 00877 00878 while (n-- > 0) { 00879 if (p >= end) return (int )(*sascii); 00880 00881 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); 00882 if (ONIGENC_IS_ASCII_CODE(c)) 00883 c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c); 00884 x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c; 00885 if (x) return x; 00886 00887 sascii++; 00888 p += enclen(enc, p, end); 00889 } 00890 return 0; 00891 } 00892 00893 /* Property management */ 00894 static int 00895 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) 00896 { 00897 size_t size; 00898 const OnigCodePoint **list = *plist; 00899 00900 size = sizeof(OnigCodePoint*) * new_size; 00901 if (IS_NULL(list)) { 00902 list = (const OnigCodePoint** )xmalloc(size); 00903 } 00904 else { 00905 list = (const OnigCodePoint** )xrealloc((void* )list, size); 00906 } 00907 00908 if (IS_NULL(list)) return ONIGERR_MEMORY; 00909 00910 *plist = list; 00911 *psize = new_size; 00912 00913 return 0; 00914 } 00915 00916 extern int 00917 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, 00918 hash_table_type **table, const OnigCodePoint*** plist, int *pnum, 00919 int *psize) 00920 { 00921 #define PROP_INIT_SIZE 16 00922 00923 int r; 00924 00925 if (*psize <= *pnum) { 00926 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); 00927 r = resize_property_list(new_size, plist, psize); 00928 if (r != 0) return r; 00929 } 00930 00931 (*plist)[*pnum] = prop; 00932 00933 if (ONIG_IS_NULL(*table)) { 00934 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); 00935 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; 00936 } 00937 00938 *pnum = *pnum + 1; 00939 onig_st_insert_strend(*table, name, name + strlen((char* )name), 00940 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); 00941 return 0; 00942 } 00943 00944 extern int 00945 onigenc_property_list_init(int (*f)(void)) 00946 { 00947 int r; 00948 00949 THREAD_ATOMIC_START; 00950 00951 r = f(); 00952 00953 THREAD_ATOMIC_END; 00954 return r; 00955 } 00956