Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 /********************************************************************** 00002 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp> 00007 * All rights reserved. 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 1. Redistributions of source code must retain the above copyright 00013 * notice, this list of conditions and the following disclaimer. 00014 * 2. Redistributions in binary form must reproduce the above copyright 00015 * notice, this list of conditions and the following disclaimer in the 00016 * documentation and/or other materials provided with the distribution. 00017 * 00018 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00019 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00021 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00022 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00023 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00024 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00025 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00026 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00027 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00028 * SUCH DAMAGE. 00029 */ 00030 00031 #include "regparse.h" 00032 00033 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; 00034 00035 extern OnigCaseFoldType 00036 onig_get_default_case_fold_flag(void) 00037 { 00038 return OnigDefaultCaseFoldFlag; 00039 } 00040 00041 extern int 00042 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) 00043 { 00044 OnigDefaultCaseFoldFlag = case_fold_flag; 00045 return 0; 00046 } 00047 00048 00049 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 00050 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; 00051 #endif 00052 00053 #if 0 00054 static UChar* 00055 str_dup(UChar* s, UChar* end) 00056 { 00057 ptrdiff_t len = end - s; 00058 00059 if (len > 0) { 00060 UChar* r = (UChar* )xmalloc(len + 1); 00061 CHECK_NULL_RETURN(r); 00062 xmemcpy(r, s, len); 00063 r[len] = (UChar )0; 00064 return r; 00065 } 00066 else return NULL; 00067 } 00068 #endif 00069 00070 static void 00071 swap_node(Node* a, Node* b) 00072 { 00073 Node c; 00074 c = *a; *a = *b; *b = c; 00075 00076 if (NTYPE(a) == NT_STR) { 00077 StrNode* sn = NSTR(a); 00078 if (sn->capa == 0) { 00079 size_t len = sn->end - sn->s; 00080 sn->s = sn->buf; 00081 sn->end = sn->s + len; 00082 } 00083 } 00084 00085 if (NTYPE(b) == NT_STR) { 00086 StrNode* sn = NSTR(b); 00087 if (sn->capa == 0) { 00088 size_t len = sn->end - sn->s; 00089 sn->s = sn->buf; 00090 sn->end = sn->s + len; 00091 } 00092 } 00093 } 00094 00095 static OnigDistance 00096 distance_add(OnigDistance d1, OnigDistance d2) 00097 { 00098 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) 00099 return ONIG_INFINITE_DISTANCE; 00100 else { 00101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; 00102 else return ONIG_INFINITE_DISTANCE; 00103 } 00104 } 00105 00106 static OnigDistance 00107 distance_multiply(OnigDistance d, int m) 00108 { 00109 if (m == 0) return 0; 00110 00111 if (d < ONIG_INFINITE_DISTANCE / m) 00112 return d * m; 00113 else 00114 return ONIG_INFINITE_DISTANCE; 00115 } 00116 00117 static int 00118 bitset_is_empty(BitSetRef bs) 00119 { 00120 int i; 00121 for (i = 0; i < BITSET_SIZE; i++) { 00122 if (bs[i] != 0) return 0; 00123 } 00124 return 1; 00125 } 00126 00127 #ifdef ONIG_DEBUG 00128 static int 00129 onig_is_prelude(void) 00130 { 00131 return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE")); 00132 } 00133 00134 static int 00135 bitset_on_num(BitSetRef bs) 00136 { 00137 int i, n; 00138 00139 n = 0; 00140 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 00141 if (BITSET_AT(bs, i)) n++; 00142 } 00143 return n; 00144 } 00145 #endif 00146 00147 extern int 00148 onig_bbuf_init(BBuf* buf, OnigDistance size) 00149 { 00150 if (size <= 0) { 00151 size = 0; 00152 buf->p = NULL; 00153 } 00154 else { 00155 buf->p = (UChar* )xmalloc(size); 00156 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); 00157 } 00158 00159 buf->alloc = (unsigned int )size; 00160 buf->used = 0; 00161 return 0; 00162 } 00163 00164 00165 #ifdef USE_SUBEXP_CALL 00166 00167 static int 00168 unset_addr_list_init(UnsetAddrList* uslist, int size) 00169 { 00170 UnsetAddr* p; 00171 00172 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); 00173 CHECK_NULL_RETURN_MEMERR(p); 00174 uslist->num = 0; 00175 uslist->alloc = size; 00176 uslist->us = p; 00177 return 0; 00178 } 00179 00180 static void 00181 unset_addr_list_end(UnsetAddrList* uslist) 00182 { 00183 if (IS_NOT_NULL(uslist->us)) 00184 xfree(uslist->us); 00185 } 00186 00187 static int 00188 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) 00189 { 00190 UnsetAddr* p; 00191 int size; 00192 00193 if (uslist->num >= uslist->alloc) { 00194 size = uslist->alloc * 2; 00195 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); 00196 CHECK_NULL_RETURN_MEMERR(p); 00197 uslist->alloc = size; 00198 uslist->us = p; 00199 } 00200 00201 uslist->us[uslist->num].offset = offset; 00202 uslist->us[uslist->num].target = node; 00203 uslist->num++; 00204 return 0; 00205 } 00206 #endif /* USE_SUBEXP_CALL */ 00207 00208 00209 static int 00210 add_opcode(regex_t* reg, int opcode) 00211 { 00212 BBUF_ADD1(reg, opcode); 00213 return 0; 00214 } 00215 00216 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00217 static int 00218 add_state_check_num(regex_t* reg, int num) 00219 { 00220 StateCheckNumType n = (StateCheckNumType )num; 00221 00222 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); 00223 return 0; 00224 } 00225 #endif 00226 00227 static int 00228 add_rel_addr(regex_t* reg, int addr) 00229 { 00230 RelAddrType ra = (RelAddrType )addr; 00231 00232 BBUF_ADD(reg, &ra, SIZE_RELADDR); 00233 return 0; 00234 } 00235 00236 static int 00237 add_abs_addr(regex_t* reg, int addr) 00238 { 00239 AbsAddrType ra = (AbsAddrType )addr; 00240 00241 BBUF_ADD(reg, &ra, SIZE_ABSADDR); 00242 return 0; 00243 } 00244 00245 static int 00246 add_length(regex_t* reg, OnigDistance len) 00247 { 00248 LengthType l = (LengthType )len; 00249 00250 BBUF_ADD(reg, &l, SIZE_LENGTH); 00251 return 0; 00252 } 00253 00254 static int 00255 add_mem_num(regex_t* reg, int num) 00256 { 00257 MemNumType n = (MemNumType )num; 00258 00259 BBUF_ADD(reg, &n, SIZE_MEMNUM); 00260 return 0; 00261 } 00262 00263 static int 00264 add_pointer(regex_t* reg, void* addr) 00265 { 00266 PointerType ptr = (PointerType )addr; 00267 00268 BBUF_ADD(reg, &ptr, SIZE_POINTER); 00269 return 0; 00270 } 00271 00272 static int 00273 add_option(regex_t* reg, OnigOptionType option) 00274 { 00275 BBUF_ADD(reg, &option, SIZE_OPTION); 00276 return 0; 00277 } 00278 00279 static int 00280 add_opcode_rel_addr(regex_t* reg, int opcode, int addr) 00281 { 00282 int r; 00283 00284 r = add_opcode(reg, opcode); 00285 if (r) return r; 00286 r = add_rel_addr(reg, addr); 00287 return r; 00288 } 00289 00290 static int 00291 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len) 00292 { 00293 BBUF_ADD(reg, bytes, len); 00294 return 0; 00295 } 00296 00297 static int 00298 add_bitset(regex_t* reg, BitSetRef bs) 00299 { 00300 BBUF_ADD(reg, bs, SIZE_BITSET); 00301 return 0; 00302 } 00303 00304 static int 00305 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) 00306 { 00307 int r; 00308 00309 r = add_opcode(reg, opcode); 00310 if (r) return r; 00311 r = add_option(reg, option); 00312 return r; 00313 } 00314 00315 static int compile_length_tree(Node* node, regex_t* reg); 00316 static int compile_tree(Node* node, regex_t* reg); 00317 00318 00319 #define IS_NEED_STR_LEN_OP_EXACT(op) \ 00320 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ 00321 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) 00322 00323 static int 00324 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case) 00325 { 00326 int op; 00327 00328 if (ignore_case) { 00329 switch (str_len) { 00330 case 1: op = OP_EXACT1_IC; break; 00331 default: op = OP_EXACTN_IC; break; 00332 } 00333 } 00334 else { 00335 switch (mb_len) { 00336 case 1: 00337 switch (str_len) { 00338 case 1: op = OP_EXACT1; break; 00339 case 2: op = OP_EXACT2; break; 00340 case 3: op = OP_EXACT3; break; 00341 case 4: op = OP_EXACT4; break; 00342 case 5: op = OP_EXACT5; break; 00343 default: op = OP_EXACTN; break; 00344 } 00345 break; 00346 00347 case 2: 00348 switch (str_len) { 00349 case 1: op = OP_EXACTMB2N1; break; 00350 case 2: op = OP_EXACTMB2N2; break; 00351 case 3: op = OP_EXACTMB2N3; break; 00352 default: op = OP_EXACTMB2N; break; 00353 } 00354 break; 00355 00356 case 3: 00357 op = OP_EXACTMB3N; 00358 break; 00359 00360 default: 00361 op = OP_EXACTMBN; 00362 break; 00363 } 00364 } 00365 return op; 00366 } 00367 00368 static int 00369 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) 00370 { 00371 int r; 00372 int saved_num_null_check = reg->num_null_check; 00373 00374 if (empty_info != 0) { 00375 r = add_opcode(reg, OP_NULL_CHECK_START); 00376 if (r) return r; 00377 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ 00378 if (r) return r; 00379 reg->num_null_check++; 00380 } 00381 00382 r = compile_tree(node, reg); 00383 if (r) return r; 00384 00385 if (empty_info != 0) { 00386 if (empty_info == NQ_TARGET_IS_EMPTY) 00387 r = add_opcode(reg, OP_NULL_CHECK_END); 00388 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) 00389 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); 00390 else if (empty_info == NQ_TARGET_IS_EMPTY_REC) 00391 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); 00392 00393 if (r) return r; 00394 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ 00395 } 00396 return r; 00397 } 00398 00399 #ifdef USE_SUBEXP_CALL 00400 static int 00401 compile_call(CallNode* node, regex_t* reg) 00402 { 00403 int r; 00404 00405 r = add_opcode(reg, OP_CALL); 00406 if (r) return r; 00407 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), 00408 node->target); 00409 if (r) return r; 00410 r = add_abs_addr(reg, 0 /*dummy addr.*/); 00411 return r; 00412 } 00413 #endif 00414 00415 static int 00416 compile_tree_n_times(Node* node, int n, regex_t* reg) 00417 { 00418 int i, r; 00419 00420 for (i = 0; i < n; i++) { 00421 r = compile_tree(node, reg); 00422 if (r) return r; 00423 } 00424 return 0; 00425 } 00426 00427 static int 00428 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, 00429 regex_t* reg ARG_UNUSED, int ignore_case) 00430 { 00431 int len; 00432 int op = select_str_opcode(mb_len, str_len, ignore_case); 00433 00434 len = SIZE_OPCODE; 00435 00436 if (op == OP_EXACTMBN) len += SIZE_LENGTH; 00437 if (IS_NEED_STR_LEN_OP_EXACT(op)) 00438 len += SIZE_LENGTH; 00439 00440 len += mb_len * (int )str_len; 00441 return len; 00442 } 00443 00444 static int 00445 add_compile_string(UChar* s, int mb_len, OnigDistance str_len, 00446 regex_t* reg, int ignore_case) 00447 { 00448 int op = select_str_opcode(mb_len, str_len, ignore_case); 00449 add_opcode(reg, op); 00450 00451 if (op == OP_EXACTMBN) 00452 add_length(reg, mb_len); 00453 00454 if (IS_NEED_STR_LEN_OP_EXACT(op)) { 00455 if (op == OP_EXACTN_IC) 00456 add_length(reg, mb_len * str_len); 00457 else 00458 add_length(reg, str_len); 00459 } 00460 00461 add_bytes(reg, s, mb_len * str_len); 00462 return 0; 00463 } 00464 00465 00466 static int 00467 compile_length_string_node(Node* node, regex_t* reg) 00468 { 00469 int rlen, r, len, prev_len, slen, ambig; 00470 OnigEncoding enc = reg->enc; 00471 UChar *p, *prev; 00472 StrNode* sn; 00473 00474 sn = NSTR(node); 00475 if (sn->end <= sn->s) 00476 return 0; 00477 00478 ambig = NSTRING_IS_AMBIG(node); 00479 00480 p = prev = sn->s; 00481 prev_len = enclen(enc, p, sn->end); 00482 p += prev_len; 00483 slen = 1; 00484 rlen = 0; 00485 00486 for (; p < sn->end; ) { 00487 len = enclen(enc, p, sn->end); 00488 if (len == prev_len) { 00489 slen++; 00490 } 00491 else { 00492 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 00493 rlen += r; 00494 prev = p; 00495 slen = 1; 00496 prev_len = len; 00497 } 00498 p += len; 00499 } 00500 r = add_compile_string_length(prev, prev_len, slen, reg, ambig); 00501 rlen += r; 00502 return rlen; 00503 } 00504 00505 static int 00506 compile_length_string_raw_node(StrNode* sn, regex_t* reg) 00507 { 00508 if (sn->end <= sn->s) 00509 return 0; 00510 00511 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); 00512 } 00513 00514 static int 00515 compile_string_node(Node* node, regex_t* reg) 00516 { 00517 int r, len, prev_len, slen, ambig; 00518 OnigEncoding enc = reg->enc; 00519 UChar *p, *prev, *end; 00520 StrNode* sn; 00521 00522 sn = NSTR(node); 00523 if (sn->end <= sn->s) 00524 return 0; 00525 00526 end = sn->end; 00527 ambig = NSTRING_IS_AMBIG(node); 00528 00529 p = prev = sn->s; 00530 prev_len = enclen(enc, p, end); 00531 p += prev_len; 00532 slen = 1; 00533 00534 for (; p < end; ) { 00535 len = enclen(enc, p, end); 00536 if (len == prev_len) { 00537 slen++; 00538 } 00539 else { 00540 r = add_compile_string(prev, prev_len, slen, reg, ambig); 00541 if (r) return r; 00542 00543 prev = p; 00544 slen = 1; 00545 prev_len = len; 00546 } 00547 00548 p += len; 00549 } 00550 return add_compile_string(prev, prev_len, slen, reg, ambig); 00551 } 00552 00553 static int 00554 compile_string_raw_node(StrNode* sn, regex_t* reg) 00555 { 00556 if (sn->end <= sn->s) 00557 return 0; 00558 00559 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); 00560 } 00561 00562 static int 00563 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) 00564 { 00565 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00566 add_length(reg, mbuf->used); 00567 return add_bytes(reg, mbuf->p, mbuf->used); 00568 #else 00569 int r, pad_size; 00570 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; 00571 00572 GET_ALIGNMENT_PAD_SIZE(p, pad_size); 00573 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); 00574 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 00575 00576 r = add_bytes(reg, mbuf->p, mbuf->used); 00577 00578 /* padding for return value from compile_length_cclass_node() to be fix. */ 00579 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; 00580 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); 00581 return r; 00582 #endif 00583 } 00584 00585 static int 00586 compile_length_cclass_node(CClassNode* cc, regex_t* reg) 00587 { 00588 int len; 00589 00590 if (IS_NCCLASS_SHARE(cc)) { 00591 len = SIZE_OPCODE + SIZE_POINTER; 00592 return len; 00593 } 00594 00595 if (IS_NULL(cc->mbuf)) { 00596 len = SIZE_OPCODE + SIZE_BITSET; 00597 } 00598 else { 00599 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 00600 len = SIZE_OPCODE; 00601 } 00602 else { 00603 len = SIZE_OPCODE + SIZE_BITSET; 00604 } 00605 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 00606 len += SIZE_LENGTH + cc->mbuf->used; 00607 #else 00608 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); 00609 #endif 00610 } 00611 00612 return len; 00613 } 00614 00615 static int 00616 compile_cclass_node(CClassNode* cc, regex_t* reg) 00617 { 00618 int r; 00619 00620 if (IS_NCCLASS_SHARE(cc)) { 00621 add_opcode(reg, OP_CCLASS_NODE); 00622 r = add_pointer(reg, cc); 00623 return r; 00624 } 00625 00626 if (IS_NULL(cc->mbuf)) { 00627 if (IS_NCCLASS_NOT(cc)) 00628 add_opcode(reg, OP_CCLASS_NOT); 00629 else 00630 add_opcode(reg, OP_CCLASS); 00631 00632 r = add_bitset(reg, cc->bs); 00633 } 00634 else { 00635 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { 00636 if (IS_NCCLASS_NOT(cc)) 00637 add_opcode(reg, OP_CCLASS_MB_NOT); 00638 else 00639 add_opcode(reg, OP_CCLASS_MB); 00640 00641 r = add_multi_byte_cclass(cc->mbuf, reg); 00642 } 00643 else { 00644 if (IS_NCCLASS_NOT(cc)) 00645 add_opcode(reg, OP_CCLASS_MIX_NOT); 00646 else 00647 add_opcode(reg, OP_CCLASS_MIX); 00648 00649 r = add_bitset(reg, cc->bs); 00650 if (r) return r; 00651 r = add_multi_byte_cclass(cc->mbuf, reg); 00652 } 00653 } 00654 00655 return r; 00656 } 00657 00658 static int 00659 entry_repeat_range(regex_t* reg, int id, int lower, int upper) 00660 { 00661 #define REPEAT_RANGE_ALLOC 4 00662 00663 OnigRepeatRange* p; 00664 00665 if (reg->repeat_range_alloc == 0) { 00666 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); 00667 CHECK_NULL_RETURN_MEMERR(p); 00668 reg->repeat_range = p; 00669 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; 00670 } 00671 else if (reg->repeat_range_alloc <= id) { 00672 int n; 00673 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; 00674 p = (OnigRepeatRange* )xrealloc(reg->repeat_range, 00675 sizeof(OnigRepeatRange) * n); 00676 CHECK_NULL_RETURN_MEMERR(p); 00677 reg->repeat_range = p; 00678 reg->repeat_range_alloc = n; 00679 } 00680 else { 00681 p = reg->repeat_range; 00682 } 00683 00684 p[id].lower = lower; 00685 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); 00686 return 0; 00687 } 00688 00689 static int 00690 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, 00691 regex_t* reg) 00692 { 00693 int r; 00694 int num_repeat = reg->num_repeat; 00695 00696 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); 00697 if (r) return r; 00698 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 00699 reg->num_repeat++; 00700 if (r) return r; 00701 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); 00702 if (r) return r; 00703 00704 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); 00705 if (r) return r; 00706 00707 r = compile_tree_empty_check(qn->target, reg, empty_info); 00708 if (r) return r; 00709 00710 if ( 00711 #ifdef USE_SUBEXP_CALL 00712 reg->num_call > 0 || 00713 #endif 00714 IS_QUANTIFIER_IN_REPEAT(qn)) { 00715 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); 00716 } 00717 else { 00718 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); 00719 } 00720 if (r) return r; 00721 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ 00722 return r; 00723 } 00724 00725 static int 00726 is_anychar_star_quantifier(QtfrNode* qn) 00727 { 00728 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && 00729 NTYPE(qn->target) == NT_CANY) 00730 return 1; 00731 else 00732 return 0; 00733 } 00734 00735 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50 00736 #define CKN_ON (ckn > 0) 00737 00738 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00739 00740 static int 00741 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 00742 { 00743 int len, mod_tlen, cklen; 00744 int ckn; 00745 int infinite = IS_REPEAT_INFINITE(qn->upper); 00746 int empty_info = qn->target_empty_info; 00747 int tlen = compile_length_tree(qn->target, reg); 00748 00749 if (tlen < 0) return tlen; 00750 00751 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 00752 00753 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); 00754 00755 /* anychar repeat */ 00756 if (NTYPE(qn->target) == NT_CANY) { 00757 if (qn->greedy && infinite) { 00758 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) 00759 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; 00760 else 00761 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; 00762 } 00763 } 00764 00765 if (empty_info != 0) 00766 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00767 else 00768 mod_tlen = tlen; 00769 00770 if (infinite && qn->lower <= 1) { 00771 if (qn->greedy) { 00772 if (qn->lower == 1) 00773 len = SIZE_OP_JUMP; 00774 else 00775 len = 0; 00776 00777 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; 00778 } 00779 else { 00780 if (qn->lower == 0) 00781 len = SIZE_OP_JUMP; 00782 else 00783 len = 0; 00784 00785 len += mod_tlen + SIZE_OP_PUSH + cklen; 00786 } 00787 } 00788 else if (qn->upper == 0) { 00789 if (qn->is_refered != 0) /* /(?<n>..){0}/ */ 00790 len = SIZE_OP_JUMP + tlen; 00791 else 00792 len = 0; 00793 } 00794 else if (qn->upper == 1 && qn->greedy) { 00795 if (qn->lower == 0) { 00796 if (CKN_ON) { 00797 len = SIZE_OP_STATE_CHECK_PUSH + tlen; 00798 } 00799 else { 00800 len = SIZE_OP_PUSH + tlen; 00801 } 00802 } 00803 else { 00804 len = tlen; 00805 } 00806 } 00807 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 00808 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; 00809 } 00810 else { 00811 len = SIZE_OP_REPEAT_INC 00812 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 00813 if (CKN_ON) 00814 len += SIZE_OP_STATE_CHECK; 00815 } 00816 00817 return len; 00818 } 00819 00820 static int 00821 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 00822 { 00823 int r, mod_tlen; 00824 int ckn; 00825 int infinite = IS_REPEAT_INFINITE(qn->upper); 00826 int empty_info = qn->target_empty_info; 00827 int tlen = compile_length_tree(qn->target, reg); 00828 00829 if (tlen < 0) return tlen; 00830 00831 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); 00832 00833 if (is_anychar_star_quantifier(qn)) { 00834 r = compile_tree_n_times(qn->target, qn->lower, reg); 00835 if (r) return r; 00836 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { 00837 if (IS_MULTILINE(reg->options)) 00838 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 00839 else 00840 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 00841 if (r) return r; 00842 if (CKN_ON) { 00843 r = add_state_check_num(reg, ckn); 00844 if (r) return r; 00845 } 00846 00847 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 00848 } 00849 else { 00850 if (IS_MULTILINE(reg->options)) { 00851 r = add_opcode(reg, (CKN_ON ? 00852 OP_STATE_CHECK_ANYCHAR_ML_STAR 00853 : OP_ANYCHAR_ML_STAR)); 00854 } 00855 else { 00856 r = add_opcode(reg, (CKN_ON ? 00857 OP_STATE_CHECK_ANYCHAR_STAR 00858 : OP_ANYCHAR_STAR)); 00859 } 00860 if (r) return r; 00861 if (CKN_ON) 00862 r = add_state_check_num(reg, ckn); 00863 00864 return r; 00865 } 00866 } 00867 00868 if (empty_info != 0) 00869 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00870 else 00871 mod_tlen = tlen; 00872 00873 if (infinite && qn->lower <= 1) { 00874 if (qn->greedy) { 00875 if (qn->lower == 1) { 00876 r = add_opcode_rel_addr(reg, OP_JUMP, 00877 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); 00878 if (r) return r; 00879 } 00880 00881 if (CKN_ON) { 00882 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00883 if (r) return r; 00884 r = add_state_check_num(reg, ckn); 00885 if (r) return r; 00886 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); 00887 } 00888 else { 00889 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 00890 } 00891 if (r) return r; 00892 r = compile_tree_empty_check(qn->target, reg, empty_info); 00893 if (r) return r; 00894 r = add_opcode_rel_addr(reg, OP_JUMP, 00895 -(mod_tlen + (int )SIZE_OP_JUMP 00896 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); 00897 } 00898 else { 00899 if (qn->lower == 0) { 00900 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 00901 if (r) return r; 00902 } 00903 r = compile_tree_empty_check(qn->target, reg, empty_info); 00904 if (r) return r; 00905 if (CKN_ON) { 00906 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); 00907 if (r) return r; 00908 r = add_state_check_num(reg, ckn); 00909 if (r) return r; 00910 r = add_rel_addr(reg, 00911 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); 00912 } 00913 else 00914 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 00915 } 00916 } 00917 else if (qn->upper == 0) { 00918 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ 00919 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 00920 if (r) return r; 00921 r = compile_tree(qn->target, reg); 00922 } 00923 else 00924 r = 0; 00925 } 00926 else if (qn->upper == 1 && qn->greedy) { 00927 if (qn->lower == 0) { 00928 if (CKN_ON) { 00929 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00930 if (r) return r; 00931 r = add_state_check_num(reg, ckn); 00932 if (r) return r; 00933 r = add_rel_addr(reg, tlen); 00934 } 00935 else { 00936 r = add_opcode_rel_addr(reg, OP_PUSH, tlen); 00937 } 00938 if (r) return r; 00939 } 00940 00941 r = compile_tree(qn->target, reg); 00942 } 00943 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 00944 if (CKN_ON) { 00945 r = add_opcode(reg, OP_STATE_CHECK_PUSH); 00946 if (r) return r; 00947 r = add_state_check_num(reg, ckn); 00948 if (r) return r; 00949 r = add_rel_addr(reg, SIZE_OP_JUMP); 00950 } 00951 else { 00952 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 00953 } 00954 00955 if (r) return r; 00956 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 00957 if (r) return r; 00958 r = compile_tree(qn->target, reg); 00959 } 00960 else { 00961 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 00962 if (CKN_ON) { 00963 if (r) return r; 00964 r = add_opcode(reg, OP_STATE_CHECK); 00965 if (r) return r; 00966 r = add_state_check_num(reg, ckn); 00967 } 00968 } 00969 return r; 00970 } 00971 00972 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 00973 00974 static int 00975 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) 00976 { 00977 int len, mod_tlen; 00978 int infinite = IS_REPEAT_INFINITE(qn->upper); 00979 int empty_info = qn->target_empty_info; 00980 int tlen = compile_length_tree(qn->target, reg); 00981 00982 if (tlen < 0) return tlen; 00983 00984 /* anychar repeat */ 00985 if (NTYPE(qn->target) == NT_CANY) { 00986 if (qn->greedy && infinite) { 00987 if (IS_NOT_NULL(qn->next_head_exact)) 00988 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; 00989 else 00990 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; 00991 } 00992 } 00993 00994 if (empty_info != 0) 00995 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 00996 else 00997 mod_tlen = tlen; 00998 00999 if (infinite && 01000 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01001 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 01002 len = SIZE_OP_JUMP; 01003 } 01004 else { 01005 len = tlen * qn->lower; 01006 } 01007 01008 if (qn->greedy) { 01009 if (IS_NOT_NULL(qn->head_exact)) 01010 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; 01011 else if (IS_NOT_NULL(qn->next_head_exact)) 01012 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; 01013 else 01014 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; 01015 } 01016 else 01017 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; 01018 } 01019 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 01020 len = SIZE_OP_JUMP + tlen; 01021 } 01022 else if (!infinite && qn->greedy && 01023 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 01024 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01025 len = tlen * qn->lower; 01026 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); 01027 } 01028 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 01029 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; 01030 } 01031 else { 01032 len = SIZE_OP_REPEAT_INC 01033 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; 01034 } 01035 01036 return len; 01037 } 01038 01039 static int 01040 compile_quantifier_node(QtfrNode* qn, regex_t* reg) 01041 { 01042 int i, r, mod_tlen; 01043 int infinite = IS_REPEAT_INFINITE(qn->upper); 01044 int empty_info = qn->target_empty_info; 01045 int tlen = compile_length_tree(qn->target, reg); 01046 01047 if (tlen < 0) return tlen; 01048 01049 if (is_anychar_star_quantifier(qn)) { 01050 r = compile_tree_n_times(qn->target, qn->lower, reg); 01051 if (r) return r; 01052 if (IS_NOT_NULL(qn->next_head_exact)) { 01053 if (IS_MULTILINE(reg->options)) 01054 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); 01055 else 01056 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); 01057 if (r) return r; 01058 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 01059 } 01060 else { 01061 if (IS_MULTILINE(reg->options)) 01062 return add_opcode(reg, OP_ANYCHAR_ML_STAR); 01063 else 01064 return add_opcode(reg, OP_ANYCHAR_STAR); 01065 } 01066 } 01067 01068 if (empty_info != 0) 01069 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); 01070 else 01071 mod_tlen = tlen; 01072 01073 if (infinite && 01074 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01075 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { 01076 if (qn->greedy) { 01077 if (IS_NOT_NULL(qn->head_exact)) 01078 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); 01079 else if (IS_NOT_NULL(qn->next_head_exact)) 01080 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); 01081 else 01082 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); 01083 } 01084 else { 01085 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); 01086 } 01087 if (r) return r; 01088 } 01089 else { 01090 r = compile_tree_n_times(qn->target, qn->lower, reg); 01091 if (r) return r; 01092 } 01093 01094 if (qn->greedy) { 01095 if (IS_NOT_NULL(qn->head_exact)) { 01096 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, 01097 mod_tlen + SIZE_OP_JUMP); 01098 if (r) return r; 01099 add_bytes(reg, NSTR(qn->head_exact)->s, 1); 01100 r = compile_tree_empty_check(qn->target, reg, empty_info); 01101 if (r) return r; 01102 r = add_opcode_rel_addr(reg, OP_JUMP, 01103 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); 01104 } 01105 else if (IS_NOT_NULL(qn->next_head_exact)) { 01106 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, 01107 mod_tlen + SIZE_OP_JUMP); 01108 if (r) return r; 01109 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); 01110 r = compile_tree_empty_check(qn->target, reg, empty_info); 01111 if (r) return r; 01112 r = add_opcode_rel_addr(reg, OP_JUMP, 01113 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); 01114 } 01115 else { 01116 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); 01117 if (r) return r; 01118 r = compile_tree_empty_check(qn->target, reg, empty_info); 01119 if (r) return r; 01120 r = add_opcode_rel_addr(reg, OP_JUMP, 01121 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); 01122 } 01123 } 01124 else { 01125 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); 01126 if (r) return r; 01127 r = compile_tree_empty_check(qn->target, reg, empty_info); 01128 if (r) return r; 01129 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); 01130 } 01131 } 01132 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ 01133 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 01134 if (r) return r; 01135 r = compile_tree(qn->target, reg); 01136 } 01137 else if (!infinite && qn->greedy && 01138 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper 01139 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { 01140 int n = qn->upper - qn->lower; 01141 01142 r = compile_tree_n_times(qn->target, qn->lower, reg); 01143 if (r) return r; 01144 01145 for (i = 0; i < n; i++) { 01146 r = add_opcode_rel_addr(reg, OP_PUSH, 01147 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); 01148 if (r) return r; 01149 r = compile_tree(qn->target, reg); 01150 if (r) return r; 01151 } 01152 } 01153 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ 01154 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); 01155 if (r) return r; 01156 r = add_opcode_rel_addr(reg, OP_JUMP, tlen); 01157 if (r) return r; 01158 r = compile_tree(qn->target, reg); 01159 } 01160 else { 01161 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); 01162 } 01163 return r; 01164 } 01165 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 01166 01167 static int 01168 compile_length_option_node(EncloseNode* node, regex_t* reg) 01169 { 01170 int tlen; 01171 OnigOptionType prev = reg->options; 01172 01173 reg->options = node->option; 01174 tlen = compile_length_tree(node->target, reg); 01175 reg->options = prev; 01176 01177 if (tlen < 0) return tlen; 01178 01179 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01180 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL 01181 + tlen + SIZE_OP_SET_OPTION; 01182 } 01183 else 01184 return tlen; 01185 } 01186 01187 static int 01188 compile_option_node(EncloseNode* node, regex_t* reg) 01189 { 01190 int r; 01191 OnigOptionType prev = reg->options; 01192 01193 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01194 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); 01195 if (r) return r; 01196 r = add_opcode_option(reg, OP_SET_OPTION, prev); 01197 if (r) return r; 01198 r = add_opcode(reg, OP_FAIL); 01199 if (r) return r; 01200 } 01201 01202 reg->options = node->option; 01203 r = compile_tree(node->target, reg); 01204 reg->options = prev; 01205 01206 if (IS_DYNAMIC_OPTION(prev ^ node->option)) { 01207 if (r) return r; 01208 r = add_opcode_option(reg, OP_SET_OPTION, prev); 01209 } 01210 return r; 01211 } 01212 01213 static int 01214 compile_length_enclose_node(EncloseNode* node, regex_t* reg) 01215 { 01216 int len; 01217 int tlen; 01218 01219 if (node->type == ENCLOSE_OPTION) 01220 return compile_length_option_node(node, reg); 01221 01222 if (node->target) { 01223 tlen = compile_length_tree(node->target, reg); 01224 if (tlen < 0) return tlen; 01225 } 01226 else 01227 tlen = 0; 01228 01229 switch (node->type) { 01230 case ENCLOSE_MEMORY: 01231 #ifdef USE_SUBEXP_CALL 01232 if (IS_ENCLOSE_CALLED(node)) { 01233 len = SIZE_OP_MEMORY_START_PUSH + tlen 01234 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; 01235 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01236 len += (IS_ENCLOSE_RECURSION(node) 01237 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 01238 else 01239 len += (IS_ENCLOSE_RECURSION(node) 01240 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 01241 } 01242 else 01243 #endif 01244 { 01245 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 01246 len = SIZE_OP_MEMORY_START_PUSH; 01247 else 01248 len = SIZE_OP_MEMORY_START; 01249 01250 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) 01251 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); 01252 } 01253 break; 01254 01255 case ENCLOSE_STOP_BACKTRACK: 01256 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 01257 QtfrNode* qn = NQTFR(node->target); 01258 tlen = compile_length_tree(qn->target, reg); 01259 if (tlen < 0) return tlen; 01260 01261 len = tlen * qn->lower 01262 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; 01263 } 01264 else { 01265 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; 01266 } 01267 break; 01268 01269 case ENCLOSE_CONDITION: 01270 len = SIZE_OP_CONDITION; 01271 if (NTYPE(node->target) == NT_ALT) { 01272 Node* x = node->target; 01273 01274 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */ 01275 if (tlen < 0) return tlen; 01276 len += tlen + SIZE_OP_JUMP; 01277 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG; 01278 x = NCDR(x); 01279 tlen = compile_length_tree(NCAR(x), reg); /* no-node */ 01280 if (tlen < 0) return tlen; 01281 len += tlen; 01282 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN; 01283 } 01284 else { 01285 return ONIGERR_PARSER_BUG; 01286 } 01287 break; 01288 01289 default: 01290 return ONIGERR_TYPE_BUG; 01291 break; 01292 } 01293 01294 return len; 01295 } 01296 01297 static int get_char_length_tree(Node* node, regex_t* reg, int* len); 01298 01299 static int 01300 compile_enclose_node(EncloseNode* node, regex_t* reg) 01301 { 01302 int r, len; 01303 01304 if (node->type == ENCLOSE_OPTION) 01305 return compile_option_node(node, reg); 01306 01307 switch (node->type) { 01308 case ENCLOSE_MEMORY: 01309 #ifdef USE_SUBEXP_CALL 01310 if (IS_ENCLOSE_CALLED(node)) { 01311 r = add_opcode(reg, OP_CALL); 01312 if (r) return r; 01313 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; 01314 node->state |= NST_ADDR_FIXED; 01315 r = add_abs_addr(reg, (int )node->call_addr); 01316 if (r) return r; 01317 len = compile_length_tree(node->target, reg); 01318 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); 01319 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01320 len += (IS_ENCLOSE_RECURSION(node) 01321 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); 01322 else 01323 len += (IS_ENCLOSE_RECURSION(node) 01324 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); 01325 01326 r = add_opcode_rel_addr(reg, OP_JUMP, len); 01327 if (r) return r; 01328 } 01329 #endif 01330 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) 01331 r = add_opcode(reg, OP_MEMORY_START_PUSH); 01332 else 01333 r = add_opcode(reg, OP_MEMORY_START); 01334 if (r) return r; 01335 r = add_mem_num(reg, node->regnum); 01336 if (r) return r; 01337 r = compile_tree(node->target, reg); 01338 if (r) return r; 01339 #ifdef USE_SUBEXP_CALL 01340 if (IS_ENCLOSE_CALLED(node)) { 01341 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01342 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 01343 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); 01344 else 01345 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) 01346 ? OP_MEMORY_END_REC : OP_MEMORY_END)); 01347 01348 if (r) return r; 01349 r = add_mem_num(reg, node->regnum); 01350 if (r) return r; 01351 r = add_opcode(reg, OP_RETURN); 01352 } 01353 else 01354 #endif 01355 { 01356 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) 01357 r = add_opcode(reg, OP_MEMORY_END_PUSH); 01358 else 01359 r = add_opcode(reg, OP_MEMORY_END); 01360 if (r) return r; 01361 r = add_mem_num(reg, node->regnum); 01362 } 01363 break; 01364 01365 case ENCLOSE_STOP_BACKTRACK: 01366 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { 01367 QtfrNode* qn = NQTFR(node->target); 01368 r = compile_tree_n_times(qn->target, qn->lower, reg); 01369 if (r) return r; 01370 01371 len = compile_length_tree(qn->target, reg); 01372 if (len < 0) return len; 01373 01374 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); 01375 if (r) return r; 01376 r = compile_tree(qn->target, reg); 01377 if (r) return r; 01378 r = add_opcode(reg, OP_POP); 01379 if (r) return r; 01380 r = add_opcode_rel_addr(reg, OP_JUMP, 01381 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); 01382 } 01383 else { 01384 r = add_opcode(reg, OP_PUSH_STOP_BT); 01385 if (r) return r; 01386 r = compile_tree(node->target, reg); 01387 if (r) return r; 01388 r = add_opcode(reg, OP_POP_STOP_BT); 01389 } 01390 break; 01391 01392 case ENCLOSE_CONDITION: 01393 r = add_opcode(reg, OP_CONDITION); 01394 if (r) return r; 01395 r = add_mem_num(reg, node->regnum); 01396 if (r) return r; 01397 01398 if (NTYPE(node->target) == NT_ALT) { 01399 Node* x = node->target; 01400 int len2; 01401 01402 len = compile_length_tree(NCAR(x), reg); /* yes-node */ 01403 if (len < 0) return len; 01404 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG; 01405 x = NCDR(x); 01406 len2 = compile_length_tree(NCAR(x), reg); /* no-node */ 01407 if (len2 < 0) return len2; 01408 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN; 01409 01410 x = node->target; 01411 r = add_rel_addr(reg, len + SIZE_OP_JUMP); 01412 if (r) return r; 01413 r = compile_tree(NCAR(x), reg); /* yes-node */ 01414 if (r) return r; 01415 r = add_opcode_rel_addr(reg, OP_JUMP, len2); 01416 if (r) return r; 01417 x = NCDR(x); 01418 r = compile_tree(NCAR(x), reg); /* no-node */ 01419 } 01420 else { 01421 return ONIGERR_PARSER_BUG; 01422 } 01423 break; 01424 01425 default: 01426 return ONIGERR_TYPE_BUG; 01427 break; 01428 } 01429 01430 return r; 01431 } 01432 01433 static int 01434 compile_length_anchor_node(AnchorNode* node, regex_t* reg) 01435 { 01436 int len; 01437 int tlen = 0; 01438 01439 if (node->target) { 01440 tlen = compile_length_tree(node->target, reg); 01441 if (tlen < 0) return tlen; 01442 } 01443 01444 switch (node->type) { 01445 case ANCHOR_PREC_READ: 01446 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; 01447 break; 01448 case ANCHOR_PREC_READ_NOT: 01449 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; 01450 break; 01451 case ANCHOR_LOOK_BEHIND: 01452 len = SIZE_OP_LOOK_BEHIND + tlen; 01453 break; 01454 case ANCHOR_LOOK_BEHIND_NOT: 01455 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; 01456 break; 01457 01458 default: 01459 len = SIZE_OPCODE; 01460 break; 01461 } 01462 01463 return len; 01464 } 01465 01466 static int 01467 compile_anchor_node(AnchorNode* node, regex_t* reg) 01468 { 01469 int r, len; 01470 01471 switch (node->type) { 01472 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; 01473 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; 01474 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; 01475 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; 01476 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; 01477 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; 01478 01479 /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */ 01480 case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break; 01481 01482 case ANCHOR_WORD_BOUND: 01483 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND); 01484 else r = add_opcode(reg, OP_WORD_BOUND); 01485 break; 01486 case ANCHOR_NOT_WORD_BOUND: 01487 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND); 01488 else r = add_opcode(reg, OP_NOT_WORD_BOUND); 01489 break; 01490 #ifdef USE_WORD_BEGIN_END 01491 case ANCHOR_WORD_BEGIN: 01492 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN); 01493 else r = add_opcode(reg, OP_WORD_BEGIN); 01494 break; 01495 case ANCHOR_WORD_END: 01496 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END); 01497 else r = add_opcode(reg, OP_WORD_END); 01498 break; 01499 #endif 01500 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break; 01501 01502 case ANCHOR_PREC_READ: 01503 r = add_opcode(reg, OP_PUSH_POS); 01504 if (r) return r; 01505 r = compile_tree(node->target, reg); 01506 if (r) return r; 01507 r = add_opcode(reg, OP_POP_POS); 01508 break; 01509 01510 case ANCHOR_PREC_READ_NOT: 01511 len = compile_length_tree(node->target, reg); 01512 if (len < 0) return len; 01513 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); 01514 if (r) return r; 01515 r = compile_tree(node->target, reg); 01516 if (r) return r; 01517 r = add_opcode(reg, OP_FAIL_POS); 01518 break; 01519 01520 case ANCHOR_LOOK_BEHIND: 01521 { 01522 int n; 01523 r = add_opcode(reg, OP_LOOK_BEHIND); 01524 if (r) return r; 01525 if (node->char_len < 0) { 01526 r = get_char_length_tree(node->target, reg, &n); 01527 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 01528 } 01529 else 01530 n = node->char_len; 01531 r = add_length(reg, n); 01532 if (r) return r; 01533 r = compile_tree(node->target, reg); 01534 } 01535 break; 01536 01537 case ANCHOR_LOOK_BEHIND_NOT: 01538 { 01539 int n; 01540 len = compile_length_tree(node->target, reg); 01541 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, 01542 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); 01543 if (r) return r; 01544 if (node->char_len < 0) { 01545 r = get_char_length_tree(node->target, reg, &n); 01546 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 01547 } 01548 else 01549 n = node->char_len; 01550 r = add_length(reg, n); 01551 if (r) return r; 01552 r = compile_tree(node->target, reg); 01553 if (r) return r; 01554 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); 01555 } 01556 break; 01557 01558 default: 01559 return ONIGERR_TYPE_BUG; 01560 break; 01561 } 01562 01563 return r; 01564 } 01565 01566 static int 01567 compile_length_tree(Node* node, regex_t* reg) 01568 { 01569 int len, type, r; 01570 01571 type = NTYPE(node); 01572 switch (type) { 01573 case NT_LIST: 01574 len = 0; 01575 do { 01576 r = compile_length_tree(NCAR(node), reg); 01577 if (r < 0) return r; 01578 len += r; 01579 } while (IS_NOT_NULL(node = NCDR(node))); 01580 r = len; 01581 break; 01582 01583 case NT_ALT: 01584 { 01585 int n; 01586 01587 n = r = 0; 01588 do { 01589 r += compile_length_tree(NCAR(node), reg); 01590 n++; 01591 } while (IS_NOT_NULL(node = NCDR(node))); 01592 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); 01593 } 01594 break; 01595 01596 case NT_STR: 01597 if (NSTRING_IS_RAW(node)) 01598 r = compile_length_string_raw_node(NSTR(node), reg); 01599 else 01600 r = compile_length_string_node(node, reg); 01601 break; 01602 01603 case NT_CCLASS: 01604 r = compile_length_cclass_node(NCCLASS(node), reg); 01605 break; 01606 01607 case NT_CTYPE: 01608 case NT_CANY: 01609 r = SIZE_OPCODE; 01610 break; 01611 01612 case NT_BREF: 01613 { 01614 BRefNode* br = NBREF(node); 01615 01616 #ifdef USE_BACKREF_WITH_LEVEL 01617 if (IS_BACKREF_NEST_LEVEL(br)) { 01618 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + 01619 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 01620 } 01621 else 01622 #endif 01623 if (br->back_num == 1) { 01624 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) 01625 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); 01626 } 01627 else { 01628 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); 01629 } 01630 } 01631 break; 01632 01633 #ifdef USE_SUBEXP_CALL 01634 case NT_CALL: 01635 r = SIZE_OP_CALL; 01636 break; 01637 #endif 01638 01639 case NT_QTFR: 01640 r = compile_length_quantifier_node(NQTFR(node), reg); 01641 break; 01642 01643 case NT_ENCLOSE: 01644 r = compile_length_enclose_node(NENCLOSE(node), reg); 01645 break; 01646 01647 case NT_ANCHOR: 01648 r = compile_length_anchor_node(NANCHOR(node), reg); 01649 break; 01650 01651 default: 01652 return ONIGERR_TYPE_BUG; 01653 break; 01654 } 01655 01656 return r; 01657 } 01658 01659 static int 01660 compile_tree(Node* node, regex_t* reg) 01661 { 01662 int n, type, len, pos, r = 0; 01663 01664 type = NTYPE(node); 01665 switch (type) { 01666 case NT_LIST: 01667 do { 01668 r = compile_tree(NCAR(node), reg); 01669 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01670 break; 01671 01672 case NT_ALT: 01673 { 01674 Node* x = node; 01675 len = 0; 01676 do { 01677 len += compile_length_tree(NCAR(x), reg); 01678 if (NCDR(x) != NULL) { 01679 len += SIZE_OP_PUSH + SIZE_OP_JUMP; 01680 } 01681 } while (IS_NOT_NULL(x = NCDR(x))); 01682 pos = reg->used + len; /* goal position */ 01683 01684 do { 01685 len = compile_length_tree(NCAR(node), reg); 01686 if (IS_NOT_NULL(NCDR(node))) { 01687 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); 01688 if (r) break; 01689 } 01690 r = compile_tree(NCAR(node), reg); 01691 if (r) break; 01692 if (IS_NOT_NULL(NCDR(node))) { 01693 len = pos - (reg->used + SIZE_OP_JUMP); 01694 r = add_opcode_rel_addr(reg, OP_JUMP, len); 01695 if (r) break; 01696 } 01697 } while (IS_NOT_NULL(node = NCDR(node))); 01698 } 01699 break; 01700 01701 case NT_STR: 01702 if (NSTRING_IS_RAW(node)) 01703 r = compile_string_raw_node(NSTR(node), reg); 01704 else 01705 r = compile_string_node(node, reg); 01706 break; 01707 01708 case NT_CCLASS: 01709 r = compile_cclass_node(NCCLASS(node), reg); 01710 break; 01711 01712 case NT_CTYPE: 01713 { 01714 int op; 01715 01716 switch (NCTYPE(node)->ctype) { 01717 case ONIGENC_CTYPE_WORD: 01718 if (NCTYPE(node)->ascii_range != 0) { 01719 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD; 01720 else op = OP_ASCII_WORD; 01721 } 01722 else { 01723 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; 01724 else op = OP_WORD; 01725 } 01726 break; 01727 default: 01728 return ONIGERR_TYPE_BUG; 01729 break; 01730 } 01731 r = add_opcode(reg, op); 01732 } 01733 break; 01734 01735 case NT_CANY: 01736 if (IS_MULTILINE(reg->options)) 01737 r = add_opcode(reg, OP_ANYCHAR_ML); 01738 else 01739 r = add_opcode(reg, OP_ANYCHAR); 01740 break; 01741 01742 case NT_BREF: 01743 { 01744 BRefNode* br = NBREF(node); 01745 01746 #ifdef USE_BACKREF_WITH_LEVEL 01747 if (IS_BACKREF_NEST_LEVEL(br)) { 01748 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); 01749 if (r) return r; 01750 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); 01751 if (r) return r; 01752 r = add_length(reg, br->nest_level); 01753 if (r) return r; 01754 01755 goto add_bacref_mems; 01756 } 01757 else 01758 #endif 01759 if (br->back_num == 1) { 01760 n = br->back_static[0]; 01761 if (IS_IGNORECASE(reg->options)) { 01762 r = add_opcode(reg, OP_BACKREFN_IC); 01763 if (r) return r; 01764 r = add_mem_num(reg, n); 01765 } 01766 else { 01767 switch (n) { 01768 case 1: r = add_opcode(reg, OP_BACKREF1); break; 01769 case 2: r = add_opcode(reg, OP_BACKREF2); break; 01770 default: 01771 r = add_opcode(reg, OP_BACKREFN); 01772 if (r) return r; 01773 r = add_mem_num(reg, n); 01774 break; 01775 } 01776 } 01777 } 01778 else { 01779 int i; 01780 int* p; 01781 01782 if (IS_IGNORECASE(reg->options)) { 01783 r = add_opcode(reg, OP_BACKREF_MULTI_IC); 01784 } 01785 else { 01786 r = add_opcode(reg, OP_BACKREF_MULTI); 01787 } 01788 if (r) return r; 01789 01790 #ifdef USE_BACKREF_WITH_LEVEL 01791 add_bacref_mems: 01792 #endif 01793 r = add_length(reg, br->back_num); 01794 if (r) return r; 01795 p = BACKREFS_P(br); 01796 for (i = br->back_num - 1; i >= 0; i--) { 01797 r = add_mem_num(reg, p[i]); 01798 if (r) return r; 01799 } 01800 } 01801 } 01802 break; 01803 01804 #ifdef USE_SUBEXP_CALL 01805 case NT_CALL: 01806 r = compile_call(NCALL(node), reg); 01807 break; 01808 #endif 01809 01810 case NT_QTFR: 01811 r = compile_quantifier_node(NQTFR(node), reg); 01812 break; 01813 01814 case NT_ENCLOSE: 01815 r = compile_enclose_node(NENCLOSE(node), reg); 01816 break; 01817 01818 case NT_ANCHOR: 01819 r = compile_anchor_node(NANCHOR(node), reg); 01820 break; 01821 01822 default: 01823 #ifdef ONIG_DEBUG 01824 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); 01825 #endif 01826 break; 01827 } 01828 01829 return r; 01830 } 01831 01832 #ifdef USE_NAMED_GROUP 01833 01834 static int 01835 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) 01836 { 01837 int r = 0; 01838 Node* node = *plink; 01839 01840 switch (NTYPE(node)) { 01841 case NT_LIST: 01842 case NT_ALT: 01843 do { 01844 r = noname_disable_map(&(NCAR(node)), map, counter); 01845 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01846 break; 01847 01848 case NT_QTFR: 01849 { 01850 Node** ptarget = &(NQTFR(node)->target); 01851 Node* old = *ptarget; 01852 r = noname_disable_map(ptarget, map, counter); 01853 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { 01854 onig_reduce_nested_quantifier(node, *ptarget); 01855 } 01856 } 01857 break; 01858 01859 case NT_ENCLOSE: 01860 { 01861 EncloseNode* en = NENCLOSE(node); 01862 if (en->type == ENCLOSE_MEMORY) { 01863 if (IS_ENCLOSE_NAMED_GROUP(en)) { 01864 (*counter)++; 01865 map[en->regnum].new_val = *counter; 01866 en->regnum = *counter; 01867 r = noname_disable_map(&(en->target), map, counter); 01868 } 01869 else { 01870 *plink = en->target; 01871 en->target = NULL_NODE; 01872 onig_node_free(node); 01873 r = noname_disable_map(plink, map, counter); 01874 } 01875 } 01876 else 01877 r = noname_disable_map(&(en->target), map, counter); 01878 } 01879 break; 01880 01881 case NT_ANCHOR: 01882 { 01883 AnchorNode* an = NANCHOR(node); 01884 switch (an->type) { 01885 case ANCHOR_PREC_READ: 01886 case ANCHOR_PREC_READ_NOT: 01887 case ANCHOR_LOOK_BEHIND: 01888 case ANCHOR_LOOK_BEHIND_NOT: 01889 r = noname_disable_map(&(an->target), map, counter); 01890 break; 01891 } 01892 } 01893 break; 01894 01895 default: 01896 break; 01897 } 01898 01899 return r; 01900 } 01901 01902 static int 01903 renumber_node_backref(Node* node, GroupNumRemap* map) 01904 { 01905 int i, pos, n, old_num; 01906 int *backs; 01907 BRefNode* bn = NBREF(node); 01908 01909 if (! IS_BACKREF_NAME_REF(bn)) 01910 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 01911 01912 old_num = bn->back_num; 01913 if (IS_NULL(bn->back_dynamic)) 01914 backs = bn->back_static; 01915 else 01916 backs = bn->back_dynamic; 01917 01918 for (i = 0, pos = 0; i < old_num; i++) { 01919 n = map[backs[i]].new_val; 01920 if (n > 0) { 01921 backs[pos] = n; 01922 pos++; 01923 } 01924 } 01925 01926 bn->back_num = pos; 01927 return 0; 01928 } 01929 01930 static int 01931 renumber_by_map(Node* node, GroupNumRemap* map) 01932 { 01933 int r = 0; 01934 01935 switch (NTYPE(node)) { 01936 case NT_LIST: 01937 case NT_ALT: 01938 do { 01939 r = renumber_by_map(NCAR(node), map); 01940 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01941 break; 01942 case NT_QTFR: 01943 r = renumber_by_map(NQTFR(node)->target, map); 01944 break; 01945 case NT_ENCLOSE: 01946 r = renumber_by_map(NENCLOSE(node)->target, map); 01947 break; 01948 01949 case NT_BREF: 01950 r = renumber_node_backref(node, map); 01951 break; 01952 01953 case NT_ANCHOR: 01954 { 01955 AnchorNode* an = NANCHOR(node); 01956 switch (an->type) { 01957 case ANCHOR_PREC_READ: 01958 case ANCHOR_PREC_READ_NOT: 01959 case ANCHOR_LOOK_BEHIND: 01960 case ANCHOR_LOOK_BEHIND_NOT: 01961 r = renumber_by_map(an->target, map); 01962 break; 01963 } 01964 } 01965 break; 01966 01967 default: 01968 break; 01969 } 01970 01971 return r; 01972 } 01973 01974 static int 01975 numbered_ref_check(Node* node) 01976 { 01977 int r = 0; 01978 01979 switch (NTYPE(node)) { 01980 case NT_LIST: 01981 case NT_ALT: 01982 do { 01983 r = numbered_ref_check(NCAR(node)); 01984 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 01985 break; 01986 case NT_QTFR: 01987 r = numbered_ref_check(NQTFR(node)->target); 01988 break; 01989 case NT_ENCLOSE: 01990 r = numbered_ref_check(NENCLOSE(node)->target); 01991 break; 01992 01993 case NT_BREF: 01994 if (! IS_BACKREF_NAME_REF(NBREF(node))) 01995 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 01996 break; 01997 01998 default: 01999 break; 02000 } 02001 02002 return r; 02003 } 02004 02005 static int 02006 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) 02007 { 02008 int r, i, pos, counter; 02009 BitStatusType loc; 02010 GroupNumRemap* map; 02011 02012 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); 02013 CHECK_NULL_RETURN_MEMERR(map); 02014 for (i = 1; i <= env->num_mem; i++) { 02015 map[i].new_val = 0; 02016 } 02017 counter = 0; 02018 r = noname_disable_map(root, map, &counter); 02019 if (r != 0) return r; 02020 02021 r = renumber_by_map(*root, map); 02022 if (r != 0) return r; 02023 02024 for (i = 1, pos = 1; i <= env->num_mem; i++) { 02025 if (map[i].new_val > 0) { 02026 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; 02027 pos++; 02028 } 02029 } 02030 02031 loc = env->capture_history; 02032 BIT_STATUS_CLEAR(env->capture_history); 02033 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { 02034 if (BIT_STATUS_AT(loc, i)) { 02035 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); 02036 } 02037 } 02038 02039 env->num_mem = env->num_named; 02040 reg->num_mem = env->num_named; 02041 02042 return onig_renumber_name_table(reg, map); 02043 } 02044 #endif /* USE_NAMED_GROUP */ 02045 02046 #ifdef USE_SUBEXP_CALL 02047 static int 02048 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) 02049 { 02050 int i, offset; 02051 EncloseNode* en; 02052 AbsAddrType addr; 02053 02054 for (i = 0; i < uslist->num; i++) { 02055 en = NENCLOSE(uslist->us[i].target); 02056 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; 02057 addr = en->call_addr; 02058 offset = uslist->us[i].offset; 02059 02060 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); 02061 } 02062 return 0; 02063 } 02064 #endif 02065 02066 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 02067 static int 02068 quantifiers_memory_node_info(Node* node) 02069 { 02070 int r = 0; 02071 02072 switch (NTYPE(node)) { 02073 case NT_LIST: 02074 case NT_ALT: 02075 { 02076 int v; 02077 do { 02078 v = quantifiers_memory_node_info(NCAR(node)); 02079 if (v > r) r = v; 02080 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); 02081 } 02082 break; 02083 02084 #ifdef USE_SUBEXP_CALL 02085 case NT_CALL: 02086 if (IS_CALL_RECURSION(NCALL(node))) { 02087 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ 02088 } 02089 else 02090 r = quantifiers_memory_node_info(NCALL(node)->target); 02091 break; 02092 #endif 02093 02094 case NT_QTFR: 02095 { 02096 QtfrNode* qn = NQTFR(node); 02097 if (qn->upper != 0) { 02098 r = quantifiers_memory_node_info(qn->target); 02099 } 02100 } 02101 break; 02102 02103 case NT_ENCLOSE: 02104 { 02105 EncloseNode* en = NENCLOSE(node); 02106 switch (en->type) { 02107 case ENCLOSE_MEMORY: 02108 return NQ_TARGET_IS_EMPTY_MEM; 02109 break; 02110 02111 case ENCLOSE_OPTION: 02112 case ENCLOSE_STOP_BACKTRACK: 02113 case ENCLOSE_CONDITION: 02114 r = quantifiers_memory_node_info(en->target); 02115 break; 02116 default: 02117 break; 02118 } 02119 } 02120 break; 02121 02122 case NT_BREF: 02123 case NT_STR: 02124 case NT_CTYPE: 02125 case NT_CCLASS: 02126 case NT_CANY: 02127 case NT_ANCHOR: 02128 default: 02129 break; 02130 } 02131 02132 return r; 02133 } 02134 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ 02135 02136 static int 02137 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) 02138 { 02139 OnigDistance tmin; 02140 int r = 0; 02141 02142 *min = 0; 02143 switch (NTYPE(node)) { 02144 case NT_BREF: 02145 { 02146 int i; 02147 int* backs; 02148 Node** nodes = SCANENV_MEM_NODES(env); 02149 BRefNode* br = NBREF(node); 02150 if (br->state & NST_RECURSION) break; 02151 02152 backs = BACKREFS_P(br); 02153 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02154 r = get_min_match_length(nodes[backs[0]], min, env); 02155 if (r != 0) break; 02156 for (i = 1; i < br->back_num; i++) { 02157 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02158 r = get_min_match_length(nodes[backs[i]], &tmin, env); 02159 if (r != 0) break; 02160 if (*min > tmin) *min = tmin; 02161 } 02162 } 02163 break; 02164 02165 #ifdef USE_SUBEXP_CALL 02166 case NT_CALL: 02167 if (IS_CALL_RECURSION(NCALL(node))) { 02168 EncloseNode* en = NENCLOSE(NCALL(node)->target); 02169 if (IS_ENCLOSE_MIN_FIXED(en)) 02170 *min = en->min_len; 02171 } 02172 else 02173 r = get_min_match_length(NCALL(node)->target, min, env); 02174 break; 02175 #endif 02176 02177 case NT_LIST: 02178 do { 02179 r = get_min_match_length(NCAR(node), &tmin, env); 02180 if (r == 0) *min += tmin; 02181 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02182 break; 02183 02184 case NT_ALT: 02185 { 02186 Node *x, *y; 02187 y = node; 02188 do { 02189 x = NCAR(y); 02190 r = get_min_match_length(x, &tmin, env); 02191 if (r != 0) break; 02192 if (y == node) *min = tmin; 02193 else if (*min > tmin) *min = tmin; 02194 } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); 02195 } 02196 break; 02197 02198 case NT_STR: 02199 { 02200 StrNode* sn = NSTR(node); 02201 *min = sn->end - sn->s; 02202 } 02203 break; 02204 02205 case NT_CTYPE: 02206 *min = 1; 02207 break; 02208 02209 case NT_CCLASS: 02210 case NT_CANY: 02211 *min = 1; 02212 break; 02213 02214 case NT_QTFR: 02215 { 02216 QtfrNode* qn = NQTFR(node); 02217 02218 if (qn->lower > 0) { 02219 r = get_min_match_length(qn->target, min, env); 02220 if (r == 0) 02221 *min = distance_multiply(*min, qn->lower); 02222 } 02223 } 02224 break; 02225 02226 case NT_ENCLOSE: 02227 { 02228 EncloseNode* en = NENCLOSE(node); 02229 switch (en->type) { 02230 case ENCLOSE_MEMORY: 02231 #ifdef USE_SUBEXP_CALL 02232 if (IS_ENCLOSE_MIN_FIXED(en)) 02233 *min = en->min_len; 02234 else { 02235 r = get_min_match_length(en->target, min, env); 02236 if (r == 0) { 02237 en->min_len = *min; 02238 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); 02239 } 02240 } 02241 break; 02242 #endif 02243 case ENCLOSE_OPTION: 02244 case ENCLOSE_STOP_BACKTRACK: 02245 case ENCLOSE_CONDITION: 02246 r = get_min_match_length(en->target, min, env); 02247 break; 02248 } 02249 } 02250 break; 02251 02252 case NT_ANCHOR: 02253 default: 02254 break; 02255 } 02256 02257 return r; 02258 } 02259 02260 static int 02261 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) 02262 { 02263 OnigDistance tmax; 02264 int r = 0; 02265 02266 *max = 0; 02267 switch (NTYPE(node)) { 02268 case NT_LIST: 02269 do { 02270 r = get_max_match_length(NCAR(node), &tmax, env); 02271 if (r == 0) 02272 *max = distance_add(*max, tmax); 02273 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02274 break; 02275 02276 case NT_ALT: 02277 do { 02278 r = get_max_match_length(NCAR(node), &tmax, env); 02279 if (r == 0 && *max < tmax) *max = tmax; 02280 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02281 break; 02282 02283 case NT_STR: 02284 { 02285 StrNode* sn = NSTR(node); 02286 *max = sn->end - sn->s; 02287 } 02288 break; 02289 02290 case NT_CTYPE: 02291 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 02292 break; 02293 02294 case NT_CCLASS: 02295 case NT_CANY: 02296 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 02297 break; 02298 02299 case NT_BREF: 02300 { 02301 int i; 02302 int* backs; 02303 Node** nodes = SCANENV_MEM_NODES(env); 02304 BRefNode* br = NBREF(node); 02305 if (br->state & NST_RECURSION) { 02306 *max = ONIG_INFINITE_DISTANCE; 02307 break; 02308 } 02309 backs = BACKREFS_P(br); 02310 for (i = 0; i < br->back_num; i++) { 02311 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 02312 r = get_max_match_length(nodes[backs[i]], &tmax, env); 02313 if (r != 0) break; 02314 if (*max < tmax) *max = tmax; 02315 } 02316 } 02317 break; 02318 02319 #ifdef USE_SUBEXP_CALL 02320 case NT_CALL: 02321 if (! IS_CALL_RECURSION(NCALL(node))) 02322 r = get_max_match_length(NCALL(node)->target, max, env); 02323 else 02324 *max = ONIG_INFINITE_DISTANCE; 02325 break; 02326 #endif 02327 02328 case NT_QTFR: 02329 { 02330 QtfrNode* qn = NQTFR(node); 02331 02332 if (qn->upper != 0) { 02333 r = get_max_match_length(qn->target, max, env); 02334 if (r == 0 && *max != 0) { 02335 if (! IS_REPEAT_INFINITE(qn->upper)) 02336 *max = distance_multiply(*max, qn->upper); 02337 else 02338 *max = ONIG_INFINITE_DISTANCE; 02339 } 02340 } 02341 } 02342 break; 02343 02344 case NT_ENCLOSE: 02345 { 02346 EncloseNode* en = NENCLOSE(node); 02347 switch (en->type) { 02348 case ENCLOSE_MEMORY: 02349 #ifdef USE_SUBEXP_CALL 02350 if (IS_ENCLOSE_MAX_FIXED(en)) 02351 *max = en->max_len; 02352 else { 02353 r = get_max_match_length(en->target, max, env); 02354 if (r == 0) { 02355 en->max_len = *max; 02356 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); 02357 } 02358 } 02359 break; 02360 #endif 02361 case ENCLOSE_OPTION: 02362 case ENCLOSE_STOP_BACKTRACK: 02363 case ENCLOSE_CONDITION: 02364 r = get_max_match_length(en->target, max, env); 02365 break; 02366 } 02367 } 02368 break; 02369 02370 case NT_ANCHOR: 02371 default: 02372 break; 02373 } 02374 02375 return r; 02376 } 02377 02378 #define GET_CHAR_LEN_VARLEN -1 02379 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2 02380 02381 /* fixed size pattern node only */ 02382 static int 02383 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) 02384 { 02385 int tlen; 02386 int r = 0; 02387 02388 level++; 02389 *len = 0; 02390 switch (NTYPE(node)) { 02391 case NT_LIST: 02392 do { 02393 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 02394 if (r == 0) 02395 *len = (int )distance_add(*len, tlen); 02396 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02397 break; 02398 02399 case NT_ALT: 02400 { 02401 int tlen2; 02402 int varlen = 0; 02403 02404 r = get_char_length_tree1(NCAR(node), reg, &tlen, level); 02405 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { 02406 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); 02407 if (r == 0) { 02408 if (tlen != tlen2) 02409 varlen = 1; 02410 } 02411 } 02412 if (r == 0) { 02413 if (varlen != 0) { 02414 if (level == 1) 02415 r = GET_CHAR_LEN_TOP_ALT_VARLEN; 02416 else 02417 r = GET_CHAR_LEN_VARLEN; 02418 } 02419 else 02420 *len = tlen; 02421 } 02422 } 02423 break; 02424 02425 case NT_STR: 02426 { 02427 StrNode* sn = NSTR(node); 02428 UChar *s = sn->s; 02429 while (s < sn->end) { 02430 s += enclen(reg->enc, s, sn->end); 02431 (*len)++; 02432 } 02433 } 02434 break; 02435 02436 case NT_QTFR: 02437 { 02438 QtfrNode* qn = NQTFR(node); 02439 if (qn->lower == qn->upper) { 02440 r = get_char_length_tree1(qn->target, reg, &tlen, level); 02441 if (r == 0) 02442 *len = (int )distance_multiply(tlen, qn->lower); 02443 } 02444 else 02445 r = GET_CHAR_LEN_VARLEN; 02446 } 02447 break; 02448 02449 #ifdef USE_SUBEXP_CALL 02450 case NT_CALL: 02451 if (! IS_CALL_RECURSION(NCALL(node))) 02452 r = get_char_length_tree1(NCALL(node)->target, reg, len, level); 02453 else 02454 r = GET_CHAR_LEN_VARLEN; 02455 break; 02456 #endif 02457 02458 case NT_CTYPE: 02459 *len = 1; 02460 break; 02461 02462 case NT_CCLASS: 02463 case NT_CANY: 02464 *len = 1; 02465 break; 02466 02467 case NT_ENCLOSE: 02468 { 02469 EncloseNode* en = NENCLOSE(node); 02470 switch (en->type) { 02471 case ENCLOSE_MEMORY: 02472 #ifdef USE_SUBEXP_CALL 02473 if (IS_ENCLOSE_CLEN_FIXED(en)) 02474 *len = en->char_len; 02475 else { 02476 r = get_char_length_tree1(en->target, reg, len, level); 02477 if (r == 0) { 02478 en->char_len = *len; 02479 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); 02480 } 02481 } 02482 break; 02483 #endif 02484 case ENCLOSE_OPTION: 02485 case ENCLOSE_STOP_BACKTRACK: 02486 case ENCLOSE_CONDITION: 02487 r = get_char_length_tree1(en->target, reg, len, level); 02488 break; 02489 default: 02490 break; 02491 } 02492 } 02493 break; 02494 02495 case NT_ANCHOR: 02496 break; 02497 02498 default: 02499 r = GET_CHAR_LEN_VARLEN; 02500 break; 02501 } 02502 02503 return r; 02504 } 02505 02506 static int 02507 get_char_length_tree(Node* node, regex_t* reg, int* len) 02508 { 02509 return get_char_length_tree1(node, reg, len, 0); 02510 } 02511 02512 /* x is not included y ==> 1 : 0 */ 02513 static int 02514 is_not_included(Node* x, Node* y, regex_t* reg) 02515 { 02516 int i; 02517 OnigDistance len; 02518 OnigCodePoint code; 02519 UChar *p; 02520 int ytype; 02521 02522 retry: 02523 ytype = NTYPE(y); 02524 switch (NTYPE(x)) { 02525 case NT_CTYPE: 02526 { 02527 switch (ytype) { 02528 case NT_CTYPE: 02529 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && 02530 NCTYPE(y)->not != NCTYPE(x)->not && 02531 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range) 02532 return 1; 02533 else 02534 return 0; 02535 break; 02536 02537 case NT_CCLASS: 02538 swap: 02539 { 02540 Node* tmp; 02541 tmp = x; x = y; y = tmp; 02542 goto retry; 02543 } 02544 break; 02545 02546 case NT_STR: 02547 goto swap; 02548 break; 02549 02550 default: 02551 break; 02552 } 02553 } 02554 break; 02555 02556 case NT_CCLASS: 02557 { 02558 CClassNode* xc = NCCLASS(x); 02559 switch (ytype) { 02560 case NT_CTYPE: 02561 switch (NCTYPE(y)->ctype) { 02562 case ONIGENC_CTYPE_WORD: 02563 if (NCTYPE(y)->not == 0) { 02564 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { 02565 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02566 if (BITSET_AT(xc->bs, i)) { 02567 if (NCTYPE(y)->ascii_range) { 02568 if (IS_CODE_SB_WORD(reg->enc, i)) return 0; 02569 } 02570 else { 02571 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0; 02572 } 02573 } 02574 } 02575 return 1; 02576 } 02577 return 0; 02578 } 02579 else { 02580 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02581 int is_word; 02582 if (NCTYPE(y)->ascii_range) 02583 is_word = IS_CODE_SB_WORD(reg->enc, i); 02584 else 02585 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i); 02586 if (! is_word) { 02587 if (!IS_NCCLASS_NOT(xc)) { 02588 if (BITSET_AT(xc->bs, i)) 02589 return 0; 02590 } 02591 else { 02592 if (! BITSET_AT(xc->bs, i)) 02593 return 0; 02594 } 02595 } 02596 } 02597 return 1; 02598 } 02599 break; 02600 02601 default: 02602 break; 02603 } 02604 break; 02605 02606 case NT_CCLASS: 02607 { 02608 int v; 02609 CClassNode* yc = NCCLASS(y); 02610 02611 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 02612 v = BITSET_AT(xc->bs, i); 02613 if ((v != 0 && !IS_NCCLASS_NOT(xc)) || 02614 (v == 0 && IS_NCCLASS_NOT(xc))) { 02615 v = BITSET_AT(yc->bs, i); 02616 if ((v != 0 && !IS_NCCLASS_NOT(yc)) || 02617 (v == 0 && IS_NCCLASS_NOT(yc))) 02618 return 0; 02619 } 02620 } 02621 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || 02622 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) 02623 return 1; 02624 return 0; 02625 } 02626 break; 02627 02628 case NT_STR: 02629 goto swap; 02630 break; 02631 02632 default: 02633 break; 02634 } 02635 } 02636 break; 02637 02638 case NT_STR: 02639 { 02640 StrNode* xs = NSTR(x); 02641 if (NSTRING_LEN(x) == 0) 02642 break; 02643 02644 switch (ytype) { 02645 case NT_CTYPE: 02646 switch (NCTYPE(y)->ctype) { 02647 case ONIGENC_CTYPE_WORD: 02648 if (NCTYPE(y)->ascii_range) { 02649 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end)) 02650 return NCTYPE(y)->not; 02651 else 02652 return !(NCTYPE(y)->not); 02653 } 02654 else { 02655 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) 02656 return NCTYPE(y)->not; 02657 else 02658 return !(NCTYPE(y)->not); 02659 } 02660 break; 02661 default: 02662 break; 02663 } 02664 break; 02665 02666 case NT_CCLASS: 02667 { 02668 CClassNode* cc = NCCLASS(y); 02669 02670 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, 02671 xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); 02672 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); 02673 } 02674 break; 02675 02676 case NT_STR: 02677 { 02678 UChar *q; 02679 StrNode* ys = NSTR(y); 02680 len = NSTRING_LEN(x); 02681 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); 02682 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { 02683 /* tiny version */ 02684 return 0; 02685 } 02686 else { 02687 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) { 02688 if (*p != *q) return 1; 02689 } 02690 } 02691 } 02692 break; 02693 02694 default: 02695 break; 02696 } 02697 } 02698 break; 02699 02700 default: 02701 break; 02702 } 02703 02704 return 0; 02705 } 02706 02707 static Node* 02708 get_head_value_node(Node* node, int exact, regex_t* reg) 02709 { 02710 Node* n = NULL_NODE; 02711 02712 switch (NTYPE(node)) { 02713 case NT_BREF: 02714 case NT_ALT: 02715 case NT_CANY: 02716 #ifdef USE_SUBEXP_CALL 02717 case NT_CALL: 02718 #endif 02719 break; 02720 02721 case NT_CTYPE: 02722 case NT_CCLASS: 02723 if (exact == 0) { 02724 n = node; 02725 } 02726 break; 02727 02728 case NT_LIST: 02729 n = get_head_value_node(NCAR(node), exact, reg); 02730 break; 02731 02732 case NT_STR: 02733 { 02734 StrNode* sn = NSTR(node); 02735 02736 if (sn->end <= sn->s) 02737 break; 02738 02739 if (exact != 0 && 02740 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { 02741 } 02742 else { 02743 n = node; 02744 } 02745 } 02746 break; 02747 02748 case NT_QTFR: 02749 { 02750 QtfrNode* qn = NQTFR(node); 02751 if (qn->lower > 0) { 02752 if (IS_NOT_NULL(qn->head_exact)) 02753 n = qn->head_exact; 02754 else 02755 n = get_head_value_node(qn->target, exact, reg); 02756 } 02757 } 02758 break; 02759 02760 case NT_ENCLOSE: 02761 { 02762 EncloseNode* en = NENCLOSE(node); 02763 switch (en->type) { 02764 case ENCLOSE_OPTION: 02765 { 02766 OnigOptionType options = reg->options; 02767 02768 reg->options = NENCLOSE(node)->option; 02769 n = get_head_value_node(NENCLOSE(node)->target, exact, reg); 02770 reg->options = options; 02771 } 02772 break; 02773 02774 case ENCLOSE_MEMORY: 02775 case ENCLOSE_STOP_BACKTRACK: 02776 case ENCLOSE_CONDITION: 02777 n = get_head_value_node(en->target, exact, reg); 02778 break; 02779 } 02780 } 02781 break; 02782 02783 case NT_ANCHOR: 02784 if (NANCHOR(node)->type == ANCHOR_PREC_READ) 02785 n = get_head_value_node(NANCHOR(node)->target, exact, reg); 02786 break; 02787 02788 default: 02789 break; 02790 } 02791 02792 return n; 02793 } 02794 02795 static int 02796 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) 02797 { 02798 int type, r = 0; 02799 02800 type = NTYPE(node); 02801 if ((NTYPE2BIT(type) & type_mask) == 0) 02802 return 1; 02803 02804 switch (type) { 02805 case NT_LIST: 02806 case NT_ALT: 02807 do { 02808 r = check_type_tree(NCAR(node), type_mask, enclose_mask, 02809 anchor_mask); 02810 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02811 break; 02812 02813 case NT_QTFR: 02814 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, 02815 anchor_mask); 02816 break; 02817 02818 case NT_ENCLOSE: 02819 { 02820 EncloseNode* en = NENCLOSE(node); 02821 if ((en->type & enclose_mask) == 0) 02822 return 1; 02823 02824 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); 02825 } 02826 break; 02827 02828 case NT_ANCHOR: 02829 type = NANCHOR(node)->type; 02830 if ((type & anchor_mask) == 0) 02831 return 1; 02832 02833 if (NANCHOR(node)->target) 02834 r = check_type_tree(NANCHOR(node)->target, 02835 type_mask, enclose_mask, anchor_mask); 02836 break; 02837 02838 default: 02839 break; 02840 } 02841 return r; 02842 } 02843 02844 #ifdef USE_SUBEXP_CALL 02845 02846 #define RECURSION_EXIST 1 02847 #define RECURSION_INFINITE 2 02848 02849 static int 02850 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) 02851 { 02852 int type; 02853 int r = 0; 02854 02855 type = NTYPE(node); 02856 switch (type) { 02857 case NT_LIST: 02858 { 02859 Node *x; 02860 OnigDistance min; 02861 int ret; 02862 02863 x = node; 02864 do { 02865 ret = subexp_inf_recursive_check(NCAR(x), env, head); 02866 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 02867 r |= ret; 02868 if (head) { 02869 ret = get_min_match_length(NCAR(x), &min, env); 02870 if (ret != 0) return ret; 02871 if (min != 0) head = 0; 02872 } 02873 } while (IS_NOT_NULL(x = NCDR(x))); 02874 } 02875 break; 02876 02877 case NT_ALT: 02878 { 02879 int ret; 02880 r = RECURSION_EXIST; 02881 do { 02882 ret = subexp_inf_recursive_check(NCAR(node), env, head); 02883 if (ret < 0 || ret == RECURSION_INFINITE) return ret; 02884 r &= ret; 02885 } while (IS_NOT_NULL(node = NCDR(node))); 02886 } 02887 break; 02888 02889 case NT_QTFR: 02890 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); 02891 if (r == RECURSION_EXIST) { 02892 if (NQTFR(node)->lower == 0) r = 0; 02893 } 02894 break; 02895 02896 case NT_ANCHOR: 02897 { 02898 AnchorNode* an = NANCHOR(node); 02899 switch (an->type) { 02900 case ANCHOR_PREC_READ: 02901 case ANCHOR_PREC_READ_NOT: 02902 case ANCHOR_LOOK_BEHIND: 02903 case ANCHOR_LOOK_BEHIND_NOT: 02904 r = subexp_inf_recursive_check(an->target, env, head); 02905 break; 02906 } 02907 } 02908 break; 02909 02910 case NT_CALL: 02911 r = subexp_inf_recursive_check(NCALL(node)->target, env, head); 02912 break; 02913 02914 case NT_ENCLOSE: 02915 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 02916 return 0; 02917 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 02918 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); 02919 else { 02920 SET_ENCLOSE_STATUS(node, NST_MARK2); 02921 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); 02922 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 02923 } 02924 break; 02925 02926 default: 02927 break; 02928 } 02929 02930 return r; 02931 } 02932 02933 static int 02934 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) 02935 { 02936 int type; 02937 int r = 0; 02938 02939 type = NTYPE(node); 02940 switch (type) { 02941 case NT_LIST: 02942 case NT_ALT: 02943 do { 02944 r = subexp_inf_recursive_check_trav(NCAR(node), env); 02945 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 02946 break; 02947 02948 case NT_QTFR: 02949 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); 02950 break; 02951 02952 case NT_ANCHOR: 02953 { 02954 AnchorNode* an = NANCHOR(node); 02955 switch (an->type) { 02956 case ANCHOR_PREC_READ: 02957 case ANCHOR_PREC_READ_NOT: 02958 case ANCHOR_LOOK_BEHIND: 02959 case ANCHOR_LOOK_BEHIND_NOT: 02960 r = subexp_inf_recursive_check_trav(an->target, env); 02961 break; 02962 } 02963 } 02964 break; 02965 02966 case NT_ENCLOSE: 02967 { 02968 EncloseNode* en = NENCLOSE(node); 02969 02970 if (IS_ENCLOSE_RECURSION(en)) { 02971 SET_ENCLOSE_STATUS(node, NST_MARK1); 02972 r = subexp_inf_recursive_check(en->target, env, 1); 02973 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; 02974 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 02975 } 02976 r = subexp_inf_recursive_check_trav(en->target, env); 02977 } 02978 02979 break; 02980 02981 default: 02982 break; 02983 } 02984 02985 return r; 02986 } 02987 02988 static int 02989 subexp_recursive_check(Node* node) 02990 { 02991 int r = 0; 02992 02993 switch (NTYPE(node)) { 02994 case NT_LIST: 02995 case NT_ALT: 02996 do { 02997 r |= subexp_recursive_check(NCAR(node)); 02998 } while (IS_NOT_NULL(node = NCDR(node))); 02999 break; 03000 03001 case NT_QTFR: 03002 r = subexp_recursive_check(NQTFR(node)->target); 03003 break; 03004 03005 case NT_ANCHOR: 03006 { 03007 AnchorNode* an = NANCHOR(node); 03008 switch (an->type) { 03009 case ANCHOR_PREC_READ: 03010 case ANCHOR_PREC_READ_NOT: 03011 case ANCHOR_LOOK_BEHIND: 03012 case ANCHOR_LOOK_BEHIND_NOT: 03013 r = subexp_recursive_check(an->target); 03014 break; 03015 } 03016 } 03017 break; 03018 03019 case NT_CALL: 03020 r = subexp_recursive_check(NCALL(node)->target); 03021 if (r != 0) SET_CALL_RECURSION(node); 03022 break; 03023 03024 case NT_ENCLOSE: 03025 if (IS_ENCLOSE_MARK2(NENCLOSE(node))) 03026 return 0; 03027 else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) 03028 return 1; /* recursion */ 03029 else { 03030 SET_ENCLOSE_STATUS(node, NST_MARK2); 03031 r = subexp_recursive_check(NENCLOSE(node)->target); 03032 CLEAR_ENCLOSE_STATUS(node, NST_MARK2); 03033 } 03034 break; 03035 03036 default: 03037 break; 03038 } 03039 03040 return r; 03041 } 03042 03043 03044 static int 03045 subexp_recursive_check_trav(Node* node, ScanEnv* env) 03046 { 03047 #define FOUND_CALLED_NODE 1 03048 03049 int type; 03050 int r = 0; 03051 03052 type = NTYPE(node); 03053 switch (type) { 03054 case NT_LIST: 03055 case NT_ALT: 03056 { 03057 int ret; 03058 do { 03059 ret = subexp_recursive_check_trav(NCAR(node), env); 03060 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; 03061 else if (ret < 0) return ret; 03062 } while (IS_NOT_NULL(node = NCDR(node))); 03063 } 03064 break; 03065 03066 case NT_QTFR: 03067 r = subexp_recursive_check_trav(NQTFR(node)->target, env); 03068 if (NQTFR(node)->upper == 0) { 03069 if (r == FOUND_CALLED_NODE) 03070 NQTFR(node)->is_refered = 1; 03071 } 03072 break; 03073 03074 case NT_ANCHOR: 03075 { 03076 AnchorNode* an = NANCHOR(node); 03077 switch (an->type) { 03078 case ANCHOR_PREC_READ: 03079 case ANCHOR_PREC_READ_NOT: 03080 case ANCHOR_LOOK_BEHIND: 03081 case ANCHOR_LOOK_BEHIND_NOT: 03082 r = subexp_recursive_check_trav(an->target, env); 03083 break; 03084 } 03085 } 03086 break; 03087 03088 case NT_ENCLOSE: 03089 { 03090 EncloseNode* en = NENCLOSE(node); 03091 03092 if (! IS_ENCLOSE_RECURSION(en)) { 03093 if (IS_ENCLOSE_CALLED(en)) { 03094 SET_ENCLOSE_STATUS(node, NST_MARK1); 03095 r = subexp_recursive_check(en->target); 03096 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); 03097 CLEAR_ENCLOSE_STATUS(node, NST_MARK1); 03098 } 03099 } 03100 r = subexp_recursive_check_trav(en->target, env); 03101 if (IS_ENCLOSE_CALLED(en)) 03102 r |= FOUND_CALLED_NODE; 03103 } 03104 break; 03105 03106 default: 03107 break; 03108 } 03109 03110 return r; 03111 } 03112 03113 static int 03114 setup_subexp_call(Node* node, ScanEnv* env) 03115 { 03116 int type; 03117 int r = 0; 03118 03119 type = NTYPE(node); 03120 switch (type) { 03121 case NT_LIST: 03122 do { 03123 r = setup_subexp_call(NCAR(node), env); 03124 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03125 break; 03126 03127 case NT_ALT: 03128 do { 03129 r = setup_subexp_call(NCAR(node), env); 03130 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03131 break; 03132 03133 case NT_QTFR: 03134 r = setup_subexp_call(NQTFR(node)->target, env); 03135 break; 03136 case NT_ENCLOSE: 03137 r = setup_subexp_call(NENCLOSE(node)->target, env); 03138 break; 03139 03140 case NT_CALL: 03141 { 03142 CallNode* cn = NCALL(node); 03143 Node** nodes = SCANENV_MEM_NODES(env); 03144 03145 if (cn->group_num != 0) { 03146 int gnum = cn->group_num; 03147 03148 #ifdef USE_NAMED_GROUP 03149 if (env->num_named > 0 && 03150 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 03151 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { 03152 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 03153 } 03154 #endif 03155 if (gnum > env->num_mem) { 03156 onig_scan_env_set_error_string(env, 03157 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); 03158 return ONIGERR_UNDEFINED_GROUP_REFERENCE; 03159 } 03160 03161 #ifdef USE_NAMED_GROUP 03162 set_call_attr: 03163 #endif 03164 cn->target = nodes[cn->group_num]; 03165 if (IS_NULL(cn->target)) { 03166 onig_scan_env_set_error_string(env, 03167 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 03168 return ONIGERR_UNDEFINED_NAME_REFERENCE; 03169 } 03170 SET_ENCLOSE_STATUS(cn->target, NST_CALLED); 03171 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); 03172 cn->unset_addr_list = env->unset_addr_list; 03173 } 03174 #ifdef USE_NAMED_GROUP 03175 #ifdef USE_PERL_SUBEXP_CALL 03176 else if (cn->name == cn->name_end) { 03177 goto set_call_attr; 03178 } 03179 #endif 03180 else { 03181 int *refs; 03182 03183 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, 03184 &refs); 03185 if (n <= 0) { 03186 onig_scan_env_set_error_string(env, 03187 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); 03188 return ONIGERR_UNDEFINED_NAME_REFERENCE; 03189 } 03190 else if (n > 1 && 03191 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) { 03192 onig_scan_env_set_error_string(env, 03193 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); 03194 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; 03195 } 03196 else { 03197 cn->group_num = refs[0]; 03198 goto set_call_attr; 03199 } 03200 } 03201 #endif 03202 } 03203 break; 03204 03205 case NT_ANCHOR: 03206 { 03207 AnchorNode* an = NANCHOR(node); 03208 03209 switch (an->type) { 03210 case ANCHOR_PREC_READ: 03211 case ANCHOR_PREC_READ_NOT: 03212 case ANCHOR_LOOK_BEHIND: 03213 case ANCHOR_LOOK_BEHIND_NOT: 03214 r = setup_subexp_call(an->target, env); 03215 break; 03216 } 03217 } 03218 break; 03219 03220 default: 03221 break; 03222 } 03223 03224 return r; 03225 } 03226 #endif 03227 03228 /* divide different length alternatives in look-behind. 03229 (?<=A|B) ==> (?<=A)|(?<=B) 03230 (?<!A|B) ==> (?<!A)(?<!B) 03231 */ 03232 static int 03233 divide_look_behind_alternatives(Node* node) 03234 { 03235 Node *head, *np, *insert_node; 03236 AnchorNode* an = NANCHOR(node); 03237 int anc_type = an->type; 03238 03239 head = an->target; 03240 np = NCAR(head); 03241 swap_node(node, head); 03242 NCAR(node) = head; 03243 NANCHOR(head)->target = np; 03244 03245 np = node; 03246 while ((np = NCDR(np)) != NULL_NODE) { 03247 insert_node = onig_node_new_anchor(anc_type); 03248 CHECK_NULL_RETURN_MEMERR(insert_node); 03249 NANCHOR(insert_node)->target = NCAR(np); 03250 NCAR(np) = insert_node; 03251 } 03252 03253 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { 03254 np = node; 03255 do { 03256 SET_NTYPE(np, NT_LIST); /* alt -> list */ 03257 } while ((np = NCDR(np)) != NULL_NODE); 03258 } 03259 return 0; 03260 } 03261 03262 static int 03263 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) 03264 { 03265 int r, len; 03266 AnchorNode* an = NANCHOR(node); 03267 03268 r = get_char_length_tree(an->target, reg, &len); 03269 if (r == 0) 03270 an->char_len = len; 03271 else if (r == GET_CHAR_LEN_VARLEN) 03272 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03273 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { 03274 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) 03275 r = divide_look_behind_alternatives(node); 03276 else 03277 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 03278 } 03279 03280 return r; 03281 } 03282 03283 static int 03284 next_setup(Node* node, Node* next_node, int in_root, regex_t* reg) 03285 { 03286 int type; 03287 03288 retry: 03289 type = NTYPE(node); 03290 if (type == NT_QTFR) { 03291 QtfrNode* qn = NQTFR(node); 03292 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { 03293 #ifdef USE_QTFR_PEEK_NEXT 03294 Node* n = get_head_value_node(next_node, 1, reg); 03295 /* '\0': for UTF-16BE etc... */ 03296 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { 03297 qn->next_head_exact = n; 03298 } 03299 #endif 03300 /* automatic possessivation a*b ==> (?>a*)b */ 03301 if (qn->lower <= 1) { 03302 int ttype = NTYPE(qn->target); 03303 if (IS_NODE_TYPE_SIMPLE(ttype)) { 03304 Node *x, *y; 03305 x = get_head_value_node(qn->target, 0, reg); 03306 if (IS_NOT_NULL(x)) { 03307 y = get_head_value_node(next_node, 0, reg); 03308 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { 03309 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); 03310 CHECK_NULL_RETURN_MEMERR(en); 03311 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); 03312 swap_node(node, en); 03313 NENCLOSE(node)->target = en; 03314 } 03315 } 03316 } 03317 } 03318 03319 #ifndef ONIG_DONT_OPTIMIZE 03320 if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */ 03321 in_root && /* qn->lower == 0 && */ 03322 NTYPE(qn->target) == NT_CANY && 03323 ! IS_MULTILINE(reg->options)) { 03324 /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */ 03325 Node *np; 03326 np = onig_node_new_list(NULL_NODE, NULL_NODE); 03327 CHECK_NULL_RETURN_MEMERR(np); 03328 swap_node(node, np); 03329 NCDR(node) = onig_node_new_list(np, NULL_NODE); 03330 if (IS_NULL(NCDR(node))) { 03331 onig_node_free(np); 03332 return ONIGERR_MEMORY; 03333 } 03334 np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */ 03335 CHECK_NULL_RETURN_MEMERR(np); 03336 NCAR(node) = np; 03337 } 03338 #endif 03339 } 03340 } 03341 else if (type == NT_ENCLOSE) { 03342 EncloseNode* en = NENCLOSE(node); 03343 in_root = 0; 03344 if (en->type == ENCLOSE_MEMORY) { 03345 node = en->target; 03346 goto retry; 03347 } 03348 } 03349 return 0; 03350 } 03351 03352 03353 static int 03354 update_string_node_case_fold(regex_t* reg, Node *node) 03355 { 03356 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 03357 UChar *sbuf, *ebuf, *sp; 03358 int r, i, len; 03359 OnigDistance sbuf_size; 03360 StrNode* sn = NSTR(node); 03361 03362 end = sn->end; 03363 sbuf_size = (end - sn->s) * 2; 03364 sbuf = (UChar* )xmalloc(sbuf_size); 03365 CHECK_NULL_RETURN_MEMERR(sbuf); 03366 ebuf = sbuf + sbuf_size; 03367 03368 sp = sbuf; 03369 p = sn->s; 03370 while (p < end) { 03371 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); 03372 for (i = 0; i < len; i++) { 03373 if (sp >= ebuf) { 03374 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2); 03375 if (IS_NULL(p)) { 03376 xfree(sbuf); 03377 return ONIGERR_MEMORY; 03378 } 03379 sbuf = p; 03380 sp = sbuf + sbuf_size; 03381 sbuf_size *= 2; 03382 ebuf = sbuf + sbuf_size; 03383 } 03384 03385 *sp++ = buf[i]; 03386 } 03387 } 03388 03389 r = onig_node_str_set(node, sbuf, sp); 03390 if (r != 0) { 03391 xfree(sbuf); 03392 return r; 03393 } 03394 03395 xfree(sbuf); 03396 return 0; 03397 } 03398 03399 static int 03400 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, 03401 regex_t* reg) 03402 { 03403 int r; 03404 Node *node; 03405 03406 node = onig_node_new_str(s, end); 03407 if (IS_NULL(node)) return ONIGERR_MEMORY; 03408 03409 r = update_string_node_case_fold(reg, node); 03410 if (r != 0) { 03411 onig_node_free(node); 03412 return r; 03413 } 03414 03415 NSTRING_SET_AMBIG(node); 03416 NSTRING_SET_DONT_GET_OPT_INFO(node); 03417 *rnode = node; 03418 return 0; 03419 } 03420 03421 static int 03422 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], 03423 UChar *p, int slen, UChar *end, 03424 regex_t* reg, Node **rnode) 03425 { 03426 int r, i, j, len, varlen, varclen; 03427 Node *anode, *var_anode, *snode, *xnode, *an; 03428 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 03429 03430 *rnode = var_anode = NULL_NODE; 03431 03432 varlen = 0; 03433 varclen = 0; 03434 for (i = 0; i < item_num; i++) { 03435 if (items[i].byte_len != slen) { 03436 varlen = 1; 03437 break; 03438 } 03439 if (items[i].code_len != 1) { 03440 varclen = 1; 03441 } 03442 } 03443 03444 if (varlen != 0) { 03445 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03446 if (IS_NULL(var_anode)) return ONIGERR_MEMORY; 03447 03448 xnode = onig_node_new_list(NULL, NULL); 03449 if (IS_NULL(xnode)) goto mem_err; 03450 NCAR(var_anode) = xnode; 03451 03452 anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03453 if (IS_NULL(anode)) goto mem_err; 03454 NCAR(xnode) = anode; 03455 } 03456 else { 03457 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); 03458 if (IS_NULL(anode)) return ONIGERR_MEMORY; 03459 } 03460 03461 snode = onig_node_new_str(p, p + slen); 03462 if (IS_NULL(snode)) goto mem_err; 03463 03464 NCAR(anode) = snode; 03465 03466 for (i = 0; i < item_num; i++) { 03467 snode = onig_node_new_str(NULL, NULL); 03468 if (IS_NULL(snode)) goto mem_err; 03469 03470 for (j = 0; j < items[i].code_len; j++) { 03471 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); 03472 if (len < 0) { 03473 r = len; 03474 goto mem_err2; 03475 } 03476 03477 r = onig_node_str_cat(snode, buf, buf + len); 03478 if (r != 0) goto mem_err2; 03479 } 03480 03481 an = onig_node_new_alt(NULL_NODE, NULL_NODE); 03482 if (IS_NULL(an)) { 03483 goto mem_err2; 03484 } 03485 03486 if (items[i].byte_len != slen) { 03487 Node *rem; 03488 UChar *q = p + items[i].byte_len; 03489 03490 if (q < end) { 03491 r = expand_case_fold_make_rem_string(&rem, q, end, reg); 03492 if (r != 0) { 03493 onig_node_free(an); 03494 goto mem_err2; 03495 } 03496 03497 xnode = onig_node_list_add(NULL_NODE, snode); 03498 if (IS_NULL(xnode)) { 03499 onig_node_free(an); 03500 onig_node_free(rem); 03501 goto mem_err2; 03502 } 03503 if (IS_NULL(onig_node_list_add(xnode, rem))) { 03504 onig_node_free(an); 03505 onig_node_free(xnode); 03506 onig_node_free(rem); 03507 goto mem_err; 03508 } 03509 03510 NCAR(an) = xnode; 03511 } 03512 else { 03513 NCAR(an) = snode; 03514 } 03515 03516 NCDR(var_anode) = an; 03517 var_anode = an; 03518 } 03519 else { 03520 NCAR(an) = snode; 03521 NCDR(anode) = an; 03522 anode = an; 03523 } 03524 } 03525 03526 if (varclen && !varlen) 03527 return 2; 03528 return varlen; 03529 03530 mem_err2: 03531 onig_node_free(snode); 03532 03533 mem_err: 03534 onig_node_free(*rnode); 03535 03536 return ONIGERR_MEMORY; 03537 } 03538 03539 static int 03540 expand_case_fold_string(Node* node, regex_t* reg) 03541 { 03542 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 03543 03544 int r, n, len, alt_num; 03545 int varlen = 0; 03546 UChar *start, *end, *p; 03547 Node *top_root, *root, *snode, *prev_node; 03548 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 03549 StrNode* sn = NSTR(node); 03550 03551 if (NSTRING_IS_AMBIG(node)) return 0; 03552 03553 start = sn->s; 03554 end = sn->end; 03555 if (start >= end) return 0; 03556 03557 r = 0; 03558 top_root = root = prev_node = snode = NULL_NODE; 03559 alt_num = 1; 03560 p = start; 03561 while (p < end) { 03562 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, 03563 p, end, items); 03564 if (n < 0) { 03565 r = n; 03566 goto err; 03567 } 03568 03569 len = enclen(reg->enc, p, end); 03570 03571 if (n == 0) { 03572 if (IS_NULL(snode)) { 03573 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 03574 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03575 if (IS_NULL(root)) { 03576 onig_node_free(prev_node); 03577 goto mem_err; 03578 } 03579 } 03580 03581 prev_node = snode = onig_node_new_str(NULL, NULL); 03582 if (IS_NULL(snode)) goto mem_err; 03583 if (IS_NOT_NULL(root)) { 03584 if (IS_NULL(onig_node_list_add(root, snode))) { 03585 onig_node_free(snode); 03586 goto mem_err; 03587 } 03588 } 03589 } 03590 03591 r = onig_node_str_cat(snode, p, p + len); 03592 if (r != 0) goto err; 03593 } 03594 else { 03595 alt_num *= (n + 1); 03596 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; 03597 03598 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { 03599 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03600 if (IS_NULL(root)) { 03601 onig_node_free(prev_node); 03602 goto mem_err; 03603 } 03604 } 03605 03606 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); 03607 if (r < 0) goto mem_err; 03608 if (r > 0) varlen = 1; 03609 if (r == 1) { 03610 if (IS_NULL(root)) { 03611 top_root = prev_node; 03612 } 03613 else { 03614 if (IS_NULL(onig_node_list_add(root, prev_node))) { 03615 onig_node_free(prev_node); 03616 goto mem_err; 03617 } 03618 } 03619 03620 root = NCAR(prev_node); 03621 } 03622 else { /* r == 0 || r == 2 */ 03623 if (IS_NOT_NULL(root)) { 03624 if (IS_NULL(onig_node_list_add(root, prev_node))) { 03625 onig_node_free(prev_node); 03626 goto mem_err; 03627 } 03628 } 03629 } 03630 03631 snode = NULL_NODE; 03632 } 03633 03634 p += len; 03635 } 03636 03637 if (p < end) { 03638 Node *srem; 03639 03640 r = expand_case_fold_make_rem_string(&srem, p, end, reg); 03641 if (r != 0) goto mem_err; 03642 03643 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { 03644 top_root = root = onig_node_list_add(NULL_NODE, prev_node); 03645 if (IS_NULL(root)) { 03646 onig_node_free(srem); 03647 onig_node_free(prev_node); 03648 goto mem_err; 03649 } 03650 } 03651 03652 if (IS_NULL(root)) { 03653 prev_node = srem; 03654 } 03655 else { 03656 if (IS_NULL(onig_node_list_add(root, srem))) { 03657 onig_node_free(srem); 03658 goto mem_err; 03659 } 03660 } 03661 } 03662 03663 /* ending */ 03664 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); 03665 if (!varlen) { 03666 /* When all expanded strings are same length, case-insensitive 03667 BM search will be used. */ 03668 r = update_string_node_case_fold(reg, node); 03669 if (r == 0) { 03670 NSTRING_SET_AMBIG(node); 03671 } 03672 } 03673 else { 03674 swap_node(node, top_root); 03675 r = 0; 03676 } 03677 onig_node_free(top_root); 03678 return r; 03679 03680 mem_err: 03681 r = ONIGERR_MEMORY; 03682 03683 err: 03684 onig_node_free(top_root); 03685 return r; 03686 } 03687 03688 03689 #ifdef USE_COMBINATION_EXPLOSION_CHECK 03690 03691 #define CEC_THRES_NUM_BIG_REPEAT 512 03692 #define CEC_INFINITE_NUM 0x7fffffff 03693 03694 #define CEC_IN_INFINITE_REPEAT (1<<0) 03695 #define CEC_IN_FINITE_REPEAT (1<<1) 03696 #define CEC_CONT_BIG_REPEAT (1<<2) 03697 03698 static int 03699 setup_comb_exp_check(Node* node, int state, ScanEnv* env) 03700 { 03701 int type; 03702 int r = state; 03703 03704 type = NTYPE(node); 03705 switch (type) { 03706 case NT_LIST: 03707 { 03708 Node* prev = NULL_NODE; 03709 do { 03710 r = setup_comb_exp_check(NCAR(node), r, env); 03711 prev = NCAR(node); 03712 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); 03713 } 03714 break; 03715 03716 case NT_ALT: 03717 { 03718 int ret; 03719 do { 03720 ret = setup_comb_exp_check(NCAR(node), state, env); 03721 r |= ret; 03722 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); 03723 } 03724 break; 03725 03726 case NT_QTFR: 03727 { 03728 int child_state = state; 03729 int add_state = 0; 03730 QtfrNode* qn = NQTFR(node); 03731 Node* target = qn->target; 03732 int var_num; 03733 03734 if (! IS_REPEAT_INFINITE(qn->upper)) { 03735 if (qn->upper > 1) { 03736 /* {0,1}, {1,1} are allowed */ 03737 child_state |= CEC_IN_FINITE_REPEAT; 03738 03739 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ 03740 if (env->backrefed_mem == 0) { 03741 if (NTYPE(qn->target) == NT_ENCLOSE) { 03742 EncloseNode* en = NENCLOSE(qn->target); 03743 if (en->type == ENCLOSE_MEMORY) { 03744 if (NTYPE(en->target) == NT_QTFR) { 03745 QtfrNode* q = NQTFR(en->target); 03746 if (IS_REPEAT_INFINITE(q->upper) 03747 && q->greedy == qn->greedy) { 03748 qn->upper = (qn->lower == 0 ? 1 : qn->lower); 03749 if (qn->upper == 1) 03750 child_state = state; 03751 } 03752 } 03753 } 03754 } 03755 } 03756 } 03757 } 03758 03759 if (state & CEC_IN_FINITE_REPEAT) { 03760 qn->comb_exp_check_num = -1; 03761 } 03762 else { 03763 if (IS_REPEAT_INFINITE(qn->upper)) { 03764 var_num = CEC_INFINITE_NUM; 03765 child_state |= CEC_IN_INFINITE_REPEAT; 03766 } 03767 else { 03768 var_num = qn->upper - qn->lower; 03769 } 03770 03771 if (var_num >= CEC_THRES_NUM_BIG_REPEAT) 03772 add_state |= CEC_CONT_BIG_REPEAT; 03773 03774 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || 03775 ((state & CEC_CONT_BIG_REPEAT) != 0 && 03776 var_num >= CEC_THRES_NUM_BIG_REPEAT)) { 03777 if (qn->comb_exp_check_num == 0) { 03778 env->num_comb_exp_check++; 03779 qn->comb_exp_check_num = env->num_comb_exp_check; 03780 if (env->curr_max_regnum > env->comb_exp_max_regnum) 03781 env->comb_exp_max_regnum = env->curr_max_regnum; 03782 } 03783 } 03784 } 03785 03786 r = setup_comb_exp_check(target, child_state, env); 03787 r |= add_state; 03788 } 03789 break; 03790 03791 case NT_ENCLOSE: 03792 { 03793 EncloseNode* en = NENCLOSE(node); 03794 03795 switch (en->type) { 03796 case ENCLOSE_MEMORY: 03797 { 03798 if (env->curr_max_regnum < en->regnum) 03799 env->curr_max_regnum = en->regnum; 03800 03801 r = setup_comb_exp_check(en->target, state, env); 03802 } 03803 break; 03804 03805 default: 03806 r = setup_comb_exp_check(en->target, state, env); 03807 break; 03808 } 03809 } 03810 break; 03811 03812 #ifdef USE_SUBEXP_CALL 03813 case NT_CALL: 03814 if (IS_CALL_RECURSION(NCALL(node))) 03815 env->has_recursion = 1; 03816 else 03817 r = setup_comb_exp_check(NCALL(node)->target, state, env); 03818 break; 03819 #endif 03820 03821 default: 03822 break; 03823 } 03824 03825 return r; 03826 } 03827 #endif 03828 03829 #define IN_ALT (1<<0) 03830 #define IN_NOT (1<<1) 03831 #define IN_REPEAT (1<<2) 03832 #define IN_VAR_REPEAT (1<<3) 03833 #define IN_ROOT (1<<4) 03834 03835 /* setup_tree does the following work. 03836 1. check empty loop. (set qn->target_empty_info) 03837 2. expand ignore-case in char class. 03838 3. set memory status bit flags. (reg->mem_stats) 03839 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 03840 5. find invalid patterns in look-behind. 03841 6. expand repeated string. 03842 */ 03843 static int 03844 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) 03845 { 03846 int type; 03847 int r = 0; 03848 int in_root = state & IN_ROOT; 03849 03850 state &= ~IN_ROOT; 03851 restart: 03852 type = NTYPE(node); 03853 switch (type) { 03854 case NT_LIST: 03855 { 03856 Node* prev = NULL_NODE; 03857 int prev_in_root = 0; 03858 state |= in_root; 03859 do { 03860 r = setup_tree(NCAR(node), reg, state, env); 03861 if (IS_NOT_NULL(prev) && r == 0) { 03862 r = next_setup(prev, NCAR(node), prev_in_root, reg); 03863 } 03864 prev = NCAR(node); 03865 prev_in_root = state & IN_ROOT; 03866 state &= ~IN_ROOT; 03867 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03868 } 03869 break; 03870 03871 case NT_ALT: 03872 do { 03873 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); 03874 } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); 03875 break; 03876 03877 case NT_CCLASS: 03878 break; 03879 03880 case NT_STR: 03881 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { 03882 r = expand_case_fold_string(node, reg); 03883 } 03884 break; 03885 03886 case NT_CTYPE: 03887 case NT_CANY: 03888 break; 03889 03890 #ifdef USE_SUBEXP_CALL 03891 case NT_CALL: 03892 break; 03893 #endif 03894 03895 case NT_BREF: 03896 { 03897 int i; 03898 int* p; 03899 Node** nodes = SCANENV_MEM_NODES(env); 03900 BRefNode* br = NBREF(node); 03901 p = BACKREFS_P(br); 03902 for (i = 0; i < br->back_num; i++) { 03903 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; 03904 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); 03905 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); 03906 #ifdef USE_BACKREF_WITH_LEVEL 03907 if (IS_BACKREF_NEST_LEVEL(br)) { 03908 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); 03909 } 03910 #endif 03911 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); 03912 } 03913 } 03914 break; 03915 03916 case NT_QTFR: 03917 { 03918 OnigDistance d; 03919 QtfrNode* qn = NQTFR(node); 03920 Node* target = qn->target; 03921 03922 if ((state & IN_REPEAT) != 0) { 03923 qn->state |= NST_IN_REPEAT; 03924 } 03925 03926 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { 03927 r = get_min_match_length(target, &d, env); 03928 if (r) break; 03929 if (d == 0) { 03930 qn->target_empty_info = NQ_TARGET_IS_EMPTY; 03931 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 03932 r = quantifiers_memory_node_info(target); 03933 if (r < 0) break; 03934 if (r > 0) { 03935 qn->target_empty_info = r; 03936 } 03937 #endif 03938 #if 0 03939 r = get_max_match_length(target, &d, env); 03940 if (r == 0 && d == 0) { 03941 /* ()* ==> ()?, ()+ ==> () */ 03942 qn->upper = 1; 03943 if (qn->lower > 1) qn->lower = 1; 03944 if (NTYPE(target) == NT_STR) { 03945 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ 03946 } 03947 } 03948 #endif 03949 } 03950 } 03951 03952 state |= IN_REPEAT; 03953 if (qn->lower != qn->upper) 03954 state |= IN_VAR_REPEAT; 03955 r = setup_tree(target, reg, state, env); 03956 if (r) break; 03957 03958 /* expand string */ 03959 #define EXPAND_STRING_MAX_LENGTH 100 03960 if (NTYPE(target) == NT_STR) { 03961 if (qn->lower > 1) { 03962 int i, n = qn->lower; 03963 OnigDistance len = NSTRING_LEN(target); 03964 StrNode* sn = NSTR(target); 03965 Node* np; 03966 03967 np = onig_node_new_str(sn->s, sn->end); 03968 if (IS_NULL(np)) return ONIGERR_MEMORY; 03969 NSTR(np)->flag = sn->flag; 03970 03971 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) { 03972 r = onig_node_str_cat(np, sn->s, sn->end); 03973 if (r) { 03974 onig_node_free(np); 03975 return r; 03976 } 03977 } 03978 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) { 03979 Node *np1, *np2; 03980 03981 qn->lower -= i; 03982 if (! IS_REPEAT_INFINITE(qn->upper)) 03983 qn->upper -= i; 03984 03985 np1 = onig_node_new_list(np, NULL); 03986 if (IS_NULL(np1)) { 03987 onig_node_free(np); 03988 return ONIGERR_MEMORY; 03989 } 03990 swap_node(np1, node); 03991 np2 = onig_node_list_add(node, np1); 03992 if (IS_NULL(np2)) { 03993 onig_node_free(np1); 03994 return ONIGERR_MEMORY; 03995 } 03996 } 03997 else { 03998 swap_node(np, node); 03999 onig_node_free(np); 04000 } 04001 break; /* break case NT_QTFR: */ 04002 } 04003 } 04004 04005 #ifdef USE_OP_PUSH_OR_JUMP_EXACT 04006 if (qn->greedy && (qn->target_empty_info != 0)) { 04007 if (NTYPE(target) == NT_QTFR) { 04008 QtfrNode* tqn = NQTFR(target); 04009 if (IS_NOT_NULL(tqn->head_exact)) { 04010 qn->head_exact = tqn->head_exact; 04011 tqn->head_exact = NULL; 04012 } 04013 } 04014 else { 04015 qn->head_exact = get_head_value_node(qn->target, 1, reg); 04016 } 04017 } 04018 #endif 04019 } 04020 break; 04021 04022 case NT_ENCLOSE: 04023 { 04024 EncloseNode* en = NENCLOSE(node); 04025 04026 switch (en->type) { 04027 case ENCLOSE_OPTION: 04028 { 04029 OnigOptionType options = reg->options; 04030 state |= in_root; 04031 reg->options = NENCLOSE(node)->option; 04032 r = setup_tree(NENCLOSE(node)->target, reg, state, env); 04033 reg->options = options; 04034 } 04035 break; 04036 04037 case ENCLOSE_MEMORY: 04038 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { 04039 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); 04040 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ 04041 } 04042 r = setup_tree(en->target, reg, state, env); 04043 break; 04044 04045 case ENCLOSE_STOP_BACKTRACK: 04046 { 04047 Node* target = en->target; 04048 r = setup_tree(target, reg, state, env); 04049 if (NTYPE(target) == NT_QTFR) { 04050 QtfrNode* tqn = NQTFR(target); 04051 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && 04052 tqn->greedy != 0) { /* (?>a*), a*+ etc... */ 04053 int qtype = NTYPE(tqn->target); 04054 if (IS_NODE_TYPE_SIMPLE(qtype)) 04055 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); 04056 } 04057 } 04058 } 04059 break; 04060 04061 case ENCLOSE_CONDITION: 04062 #ifdef USE_NAMED_GROUP 04063 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) && 04064 env->num_named > 0 && 04065 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 04066 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { 04067 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; 04068 } 04069 #endif 04070 r = setup_tree(NENCLOSE(node)->target, reg, state, env); 04071 break; 04072 } 04073 } 04074 break; 04075 04076 case NT_ANCHOR: 04077 { 04078 AnchorNode* an = NANCHOR(node); 04079 04080 switch (an->type) { 04081 case ANCHOR_PREC_READ: 04082 r = setup_tree(an->target, reg, state, env); 04083 break; 04084 case ANCHOR_PREC_READ_NOT: 04085 r = setup_tree(an->target, reg, (state | IN_NOT), env); 04086 break; 04087 04088 /* allowed node types in look-behind */ 04089 #define ALLOWED_TYPE_IN_LB \ 04090 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ 04091 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) 04092 04093 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) 04094 #define ALLOWED_ENCLOSE_IN_LB_NOT 0 04095 04096 #define ALLOWED_ANCHOR_IN_LB \ 04097 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \ 04098 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \ 04099 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \ 04100 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) 04101 #define ALLOWED_ANCHOR_IN_LB_NOT \ 04102 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \ 04103 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \ 04104 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \ 04105 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) 04106 04107 case ANCHOR_LOOK_BEHIND: 04108 { 04109 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 04110 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); 04111 if (r < 0) return r; 04112 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 04113 r = setup_look_behind(node, reg, env); 04114 if (r != 0) return r; 04115 if (NTYPE(node) != NT_ANCHOR) goto restart; 04116 r = setup_tree(an->target, reg, state, env); 04117 } 04118 break; 04119 04120 case ANCHOR_LOOK_BEHIND_NOT: 04121 { 04122 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, 04123 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); 04124 if (r < 0) return r; 04125 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; 04126 r = setup_look_behind(node, reg, env); 04127 if (r != 0) return r; 04128 if (NTYPE(node) != NT_ANCHOR) goto restart; 04129 r = setup_tree(an->target, reg, (state | IN_NOT), env); 04130 } 04131 break; 04132 } 04133 } 04134 break; 04135 04136 default: 04137 break; 04138 } 04139 04140 return r; 04141 } 04142 04143 #ifndef USE_SUNDAY_QUICK_SEARCH 04144 /* set skip map for Boyer-Moore search */ 04145 static int 04146 set_bm_skip(UChar* s, UChar* end, regex_t* reg, 04147 UChar skip[], int** int_skip, int ignore_case) 04148 { 04149 OnigDistance i, len; 04150 int clen, flen, n, j, k; 04151 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; 04152 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 04153 OnigEncoding enc = reg->enc; 04154 04155 len = end - s; 04156 if (len < ONIG_CHAR_TABLE_SIZE) { 04157 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len; 04158 04159 n = 0; 04160 for (i = 0; i < len - 1; i += clen) { 04161 p = s + i; 04162 if (ignore_case) 04163 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, 04164 p, end, items); 04165 clen = enclen(enc, p, end); 04166 04167 for (j = 0; j < n; j++) { 04168 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) 04169 return 1; /* different length isn't supported. */ 04170 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); 04171 if (flen != clen) 04172 return 1; /* different length isn't supported. */ 04173 } 04174 for (j = 0; j < clen; j++) { 04175 skip[s[i + j]] = (UChar )(len - 1 - i - j); 04176 for (k = 0; k < n; k++) { 04177 skip[buf[k][j]] = (UChar )(len - 1 - i - j); 04178 } 04179 } 04180 } 04181 } 04182 else { 04183 if (IS_NULL(*int_skip)) { 04184 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 04185 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; 04186 } 04187 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len; 04188 04189 n = 0; 04190 for (i = 0; i < len - 1; i += clen) { 04191 p = s + i; 04192 if (ignore_case) 04193 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, 04194 p, end, items); 04195 clen = enclen(enc, p, end); 04196 04197 for (j = 0; j < n; j++) { 04198 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) 04199 return 1; /* different length isn't supported. */ 04200 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); 04201 if (flen != clen) 04202 return 1; /* different length isn't supported. */ 04203 } 04204 for (j = 0; j < clen; j++) { 04205 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j); 04206 for (k = 0; k < n; k++) { 04207 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j); 04208 } 04209 } 04210 } 04211 } 04212 return 0; 04213 } 04214 04215 #else /* USE_SUNDAY_QUICK_SEARCH */ 04216 04217 /* set skip map for Sunday's quick search */ 04218 static int 04219 set_bm_skip(UChar* s, UChar* end, regex_t* reg, 04220 UChar skip[], int** int_skip, int ignore_case) 04221 { 04222 OnigDistance i, len; 04223 int clen, flen, n, j, k; 04224 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN]; 04225 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 04226 OnigEncoding enc = reg->enc; 04227 04228 len = end - s; 04229 if (len < ONIG_CHAR_TABLE_SIZE) { 04230 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1); 04231 04232 n = 0; 04233 for (i = 0; i < len; i += clen) { 04234 p = s + i; 04235 if (ignore_case) 04236 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, 04237 p, end, items); 04238 clen = enclen(enc, p, end); 04239 04240 for (j = 0; j < n; j++) { 04241 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) 04242 return 1; /* different length isn't supported. */ 04243 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); 04244 if (flen != clen) 04245 return 1; /* different length isn't supported. */ 04246 } 04247 for (j = 0; j < clen; j++) { 04248 skip[s[i + j]] = (UChar )(len - i - j); 04249 for (k = 0; k < n; k++) { 04250 skip[buf[k][j]] = (UChar )(len - i - j); 04251 } 04252 } 04253 } 04254 } 04255 else { 04256 if (IS_NULL(*int_skip)) { 04257 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 04258 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; 04259 } 04260 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1); 04261 04262 n = 0; 04263 for (i = 0; i < len; i += clen) { 04264 p = s + i; 04265 if (ignore_case) 04266 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, 04267 p, end, items); 04268 clen = enclen(enc, p, end); 04269 04270 for (j = 0; j < n; j++) { 04271 if ((items[j].code_len != 1) || (items[j].byte_len != clen)) 04272 return 1; /* different length isn't supported. */ 04273 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]); 04274 if (flen != clen) 04275 return 1; /* different length isn't supported. */ 04276 } 04277 for (j = 0; j < clen; j++) { 04278 (*int_skip)[s[i + j]] = (int )(len - i - j); 04279 for (k = 0; k < n; k++) { 04280 (*int_skip)[buf[k][j]] = (int )(len - i - j); 04281 } 04282 } 04283 } 04284 } 04285 return 0; 04286 } 04287 #endif /* USE_SUNDAY_QUICK_SEARCH */ 04288 04289 #define OPT_EXACT_MAXLEN 24 04290 04291 typedef struct { 04292 OnigDistance min; /* min byte length */ 04293 OnigDistance max; /* max byte length */ 04294 } MinMaxLen; 04295 04296 typedef struct { 04297 MinMaxLen mmd; 04298 OnigEncoding enc; 04299 OnigOptionType options; 04300 OnigCaseFoldType case_fold_flag; 04301 ScanEnv* scan_env; 04302 } OptEnv; 04303 04304 typedef struct { 04305 int left_anchor; 04306 int right_anchor; 04307 } OptAncInfo; 04308 04309 typedef struct { 04310 MinMaxLen mmd; /* info position */ 04311 OptAncInfo anc; 04312 04313 int reach_end; 04314 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */ 04315 int len; 04316 UChar s[OPT_EXACT_MAXLEN]; 04317 } OptExactInfo; 04318 04319 typedef struct { 04320 MinMaxLen mmd; /* info position */ 04321 OptAncInfo anc; 04322 04323 int value; /* weighted value */ 04324 UChar map[ONIG_CHAR_TABLE_SIZE]; 04325 } OptMapInfo; 04326 04327 typedef struct { 04328 MinMaxLen len; 04329 04330 OptAncInfo anc; 04331 OptExactInfo exb; /* boundary */ 04332 OptExactInfo exm; /* middle */ 04333 OptExactInfo expr; /* prec read (?=...) */ 04334 04335 OptMapInfo map; /* boundary */ 04336 } NodeOptInfo; 04337 04338 04339 static int 04340 map_position_value(OnigEncoding enc, int i) 04341 { 04342 static const short int ByteValTable[] = { 04343 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 04344 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 04345 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 04346 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 04347 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 04348 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, 04349 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 04350 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 04351 }; 04352 04353 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { 04354 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) 04355 return 20; 04356 else 04357 return (int )ByteValTable[i]; 04358 } 04359 else 04360 return 4; /* Take it easy. */ 04361 } 04362 04363 static int 04364 distance_value(MinMaxLen* mm) 04365 { 04366 /* 1000 / (min-max-dist + 1) */ 04367 static const short int dist_vals[] = { 04368 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 04369 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 04370 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 04371 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 04372 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 04373 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 04374 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 04375 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 04376 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 04377 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 04378 }; 04379 04380 OnigDistance d; 04381 04382 if (mm->max == ONIG_INFINITE_DISTANCE) return 0; 04383 04384 d = mm->max - mm->min; 04385 if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) 04386 /* return dist_vals[d] * 16 / (mm->min + 12); */ 04387 return (int )dist_vals[d]; 04388 else 04389 return 1; 04390 } 04391 04392 static int 04393 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) 04394 { 04395 if (v2 <= 0) return -1; 04396 if (v1 <= 0) return 1; 04397 04398 v1 *= distance_value(d1); 04399 v2 *= distance_value(d2); 04400 04401 if (v2 > v1) return 1; 04402 if (v2 < v1) return -1; 04403 04404 if (d2->min < d1->min) return 1; 04405 if (d2->min > d1->min) return -1; 04406 return 0; 04407 } 04408 04409 static int 04410 is_equal_mml(MinMaxLen* a, MinMaxLen* b) 04411 { 04412 return (a->min == b->min && a->max == b->max) ? 1 : 0; 04413 } 04414 04415 04416 static void 04417 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) 04418 { 04419 mml->min = min; 04420 mml->max = max; 04421 } 04422 04423 static void 04424 clear_mml(MinMaxLen* mml) 04425 { 04426 mml->min = mml->max = 0; 04427 } 04428 04429 static void 04430 copy_mml(MinMaxLen* to, MinMaxLen* from) 04431 { 04432 to->min = from->min; 04433 to->max = from->max; 04434 } 04435 04436 static void 04437 add_mml(MinMaxLen* to, MinMaxLen* from) 04438 { 04439 to->min = distance_add(to->min, from->min); 04440 to->max = distance_add(to->max, from->max); 04441 } 04442 04443 #if 0 04444 static void 04445 add_len_mml(MinMaxLen* to, OnigDistance len) 04446 { 04447 to->min = distance_add(to->min, len); 04448 to->max = distance_add(to->max, len); 04449 } 04450 #endif 04451 04452 static void 04453 alt_merge_mml(MinMaxLen* to, MinMaxLen* from) 04454 { 04455 if (to->min > from->min) to->min = from->min; 04456 if (to->max < from->max) to->max = from->max; 04457 } 04458 04459 static void 04460 copy_opt_env(OptEnv* to, OptEnv* from) 04461 { 04462 *to = *from; 04463 } 04464 04465 static void 04466 clear_opt_anc_info(OptAncInfo* anc) 04467 { 04468 anc->left_anchor = 0; 04469 anc->right_anchor = 0; 04470 } 04471 04472 static void 04473 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) 04474 { 04475 *to = *from; 04476 } 04477 04478 static void 04479 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, 04480 OnigDistance left_len, OnigDistance right_len) 04481 { 04482 clear_opt_anc_info(to); 04483 04484 to->left_anchor = left->left_anchor; 04485 if (left_len == 0) { 04486 to->left_anchor |= right->left_anchor; 04487 } 04488 04489 to->right_anchor = right->right_anchor; 04490 if (right_len == 0) { 04491 to->right_anchor |= left->right_anchor; 04492 } 04493 } 04494 04495 static int 04496 is_left_anchor(int anc) 04497 { 04498 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || 04499 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || 04500 anc == ANCHOR_PREC_READ_NOT) 04501 return 0; 04502 04503 return 1; 04504 } 04505 04506 static int 04507 is_set_opt_anc_info(OptAncInfo* to, int anc) 04508 { 04509 if ((to->left_anchor & anc) != 0) return 1; 04510 04511 return ((to->right_anchor & anc) != 0 ? 1 : 0); 04512 } 04513 04514 static void 04515 add_opt_anc_info(OptAncInfo* to, int anc) 04516 { 04517 if (is_left_anchor(anc)) 04518 to->left_anchor |= anc; 04519 else 04520 to->right_anchor |= anc; 04521 } 04522 04523 static void 04524 remove_opt_anc_info(OptAncInfo* to, int anc) 04525 { 04526 if (is_left_anchor(anc)) 04527 to->left_anchor &= ~anc; 04528 else 04529 to->right_anchor &= ~anc; 04530 } 04531 04532 static void 04533 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) 04534 { 04535 to->left_anchor &= add->left_anchor; 04536 to->right_anchor &= add->right_anchor; 04537 } 04538 04539 static int 04540 is_full_opt_exact_info(OptExactInfo* ex) 04541 { 04542 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); 04543 } 04544 04545 static void 04546 clear_opt_exact_info(OptExactInfo* ex) 04547 { 04548 clear_mml(&ex->mmd); 04549 clear_opt_anc_info(&ex->anc); 04550 ex->reach_end = 0; 04551 ex->ignore_case = -1; /* unset */ 04552 ex->len = 0; 04553 ex->s[0] = '\0'; 04554 } 04555 04556 static void 04557 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) 04558 { 04559 *to = *from; 04560 } 04561 04562 static void 04563 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) 04564 { 04565 int i, j, len; 04566 UChar *p, *end; 04567 OptAncInfo tanc; 04568 04569 if (to->ignore_case < 0) 04570 to->ignore_case = add->ignore_case; 04571 else if (to->ignore_case != add->ignore_case) 04572 return ; /* avoid */ 04573 04574 p = add->s; 04575 end = p + add->len; 04576 for (i = to->len; p < end; ) { 04577 len = enclen(enc, p, end); 04578 if (i + len > OPT_EXACT_MAXLEN) break; 04579 for (j = 0; j < len && p < end; j++) 04580 to->s[i++] = *p++; 04581 } 04582 04583 to->len = i; 04584 to->reach_end = (p == end ? add->reach_end : 0); 04585 04586 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); 04587 if (! to->reach_end) tanc.right_anchor = 0; 04588 copy_opt_anc_info(&to->anc, &tanc); 04589 } 04590 04591 static void 04592 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, 04593 int raw ARG_UNUSED, OnigEncoding enc) 04594 { 04595 int i, j, len; 04596 UChar *p; 04597 04598 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { 04599 len = enclen(enc, p, end); 04600 if (i + len > OPT_EXACT_MAXLEN) break; 04601 for (j = 0; j < len && p < end; j++) 04602 to->s[i++] = *p++; 04603 } 04604 04605 to->len = i; 04606 } 04607 04608 static void 04609 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) 04610 { 04611 int i, j, len; 04612 04613 if (add->len == 0 || to->len == 0) { 04614 clear_opt_exact_info(to); 04615 return ; 04616 } 04617 04618 if (! is_equal_mml(&to->mmd, &add->mmd)) { 04619 clear_opt_exact_info(to); 04620 return ; 04621 } 04622 04623 for (i = 0; i < to->len && i < add->len; ) { 04624 if (to->s[i] != add->s[i]) break; 04625 len = enclen(env->enc, to->s + i, to->s + to->len); 04626 04627 for (j = 1; j < len; j++) { 04628 if (to->s[i+j] != add->s[i+j]) break; 04629 } 04630 if (j < len) break; 04631 i += len; 04632 } 04633 04634 if (! add->reach_end || i < add->len || i < to->len) { 04635 to->reach_end = 0; 04636 } 04637 to->len = i; 04638 if (to->ignore_case < 0) 04639 to->ignore_case = add->ignore_case; 04640 else if (add->ignore_case >= 0) 04641 to->ignore_case |= add->ignore_case; 04642 04643 alt_merge_opt_anc_info(&to->anc, &add->anc); 04644 if (! to->reach_end) to->anc.right_anchor = 0; 04645 } 04646 04647 static void 04648 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) 04649 { 04650 int v1, v2; 04651 04652 v1 = now->len; 04653 v2 = alt->len; 04654 04655 if (v2 == 0) { 04656 return ; 04657 } 04658 else if (v1 == 0) { 04659 copy_opt_exact_info(now, alt); 04660 return ; 04661 } 04662 else if (v1 <= 2 && v2 <= 2) { 04663 /* ByteValTable[x] is big value --> low price */ 04664 v2 = map_position_value(enc, now->s[0]); 04665 v1 = map_position_value(enc, alt->s[0]); 04666 04667 if (now->len > 1) v1 += 5; 04668 if (alt->len > 1) v2 += 5; 04669 } 04670 04671 if (now->ignore_case <= 0) v1 *= 2; 04672 if (alt->ignore_case <= 0) v2 *= 2; 04673 04674 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 04675 copy_opt_exact_info(now, alt); 04676 } 04677 04678 static void 04679 clear_opt_map_info(OptMapInfo* map) 04680 { 04681 static const OptMapInfo clean_info = { 04682 {0, 0}, {0, 0}, 0, 04683 { 04684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04687 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04688 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04698 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 04699 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 04700 } 04701 }; 04702 04703 xmemcpy(map, &clean_info, sizeof(OptMapInfo)); 04704 } 04705 04706 static void 04707 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) 04708 { 04709 *to = *from; 04710 } 04711 04712 static void 04713 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) 04714 { 04715 if (map->map[c] == 0) { 04716 map->map[c] = 1; 04717 map->value += map_position_value(enc, c); 04718 } 04719 } 04720 04721 static int 04722 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, 04723 OnigEncoding enc, OnigCaseFoldType case_fold_flag) 04724 { 04725 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; 04726 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; 04727 int i, n; 04728 04729 add_char_opt_map_info(map, p[0], enc); 04730 04731 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); 04732 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); 04733 if (n < 0) return n; 04734 04735 for (i = 0; i < n; i++) { 04736 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); 04737 add_char_opt_map_info(map, buf[0], enc); 04738 } 04739 04740 return 0; 04741 } 04742 04743 static void 04744 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) 04745 { 04746 const int z = 1<<15; /* 32768: something big value */ 04747 04748 int v1, v2; 04749 04750 if (alt->value == 0) return ; 04751 if (now->value == 0) { 04752 copy_opt_map_info(now, alt); 04753 return ; 04754 } 04755 04756 v1 = z / now->value; 04757 v2 = z / alt->value; 04758 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) 04759 copy_opt_map_info(now, alt); 04760 } 04761 04762 static int 04763 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) 04764 { 04765 #define COMP_EM_BASE 20 04766 int ve, vm; 04767 04768 if (m->value <= 0) return -1; 04769 04770 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2); 04771 vm = COMP_EM_BASE * 5 * 2 / m->value; 04772 return comp_distance_value(&e->mmd, &m->mmd, ve, vm); 04773 } 04774 04775 static void 04776 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) 04777 { 04778 int i, val; 04779 04780 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ 04781 if (to->value == 0) return ; 04782 if (add->value == 0 || to->mmd.max < add->mmd.min) { 04783 clear_opt_map_info(to); 04784 return ; 04785 } 04786 04787 alt_merge_mml(&to->mmd, &add->mmd); 04788 04789 val = 0; 04790 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 04791 if (add->map[i]) 04792 to->map[i] = 1; 04793 04794 if (to->map[i]) 04795 val += map_position_value(enc, i); 04796 } 04797 to->value = val; 04798 04799 alt_merge_opt_anc_info(&to->anc, &add->anc); 04800 } 04801 04802 static void 04803 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) 04804 { 04805 copy_mml(&(opt->exb.mmd), mmd); 04806 copy_mml(&(opt->expr.mmd), mmd); 04807 copy_mml(&(opt->map.mmd), mmd); 04808 } 04809 04810 static void 04811 clear_node_opt_info(NodeOptInfo* opt) 04812 { 04813 clear_mml(&opt->len); 04814 clear_opt_anc_info(&opt->anc); 04815 clear_opt_exact_info(&opt->exb); 04816 clear_opt_exact_info(&opt->exm); 04817 clear_opt_exact_info(&opt->expr); 04818 clear_opt_map_info(&opt->map); 04819 } 04820 04821 static void 04822 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) 04823 { 04824 *to = *from; 04825 } 04826 04827 static void 04828 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) 04829 { 04830 int exb_reach, exm_reach; 04831 OptAncInfo tanc; 04832 04833 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); 04834 copy_opt_anc_info(&to->anc, &tanc); 04835 04836 if (add->exb.len > 0 && to->len.max == 0) { 04837 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, 04838 to->len.max, add->len.max); 04839 copy_opt_anc_info(&add->exb.anc, &tanc); 04840 } 04841 04842 if (add->map.value > 0 && to->len.max == 0) { 04843 if (add->map.mmd.max == 0) 04844 add->map.anc.left_anchor |= to->anc.left_anchor; 04845 } 04846 04847 exb_reach = to->exb.reach_end; 04848 exm_reach = to->exm.reach_end; 04849 04850 if (add->len.max != 0) 04851 to->exb.reach_end = to->exm.reach_end = 0; 04852 04853 if (add->exb.len > 0) { 04854 if (exb_reach) { 04855 concat_opt_exact_info(&to->exb, &add->exb, enc); 04856 clear_opt_exact_info(&add->exb); 04857 } 04858 else if (exm_reach) { 04859 concat_opt_exact_info(&to->exm, &add->exb, enc); 04860 clear_opt_exact_info(&add->exb); 04861 } 04862 } 04863 select_opt_exact_info(enc, &to->exm, &add->exb); 04864 select_opt_exact_info(enc, &to->exm, &add->exm); 04865 04866 if (to->expr.len > 0) { 04867 if (add->len.max > 0) { 04868 if (to->expr.len > (int )add->len.max) 04869 to->expr.len = (int )add->len.max; 04870 04871 if (to->expr.mmd.max == 0) 04872 select_opt_exact_info(enc, &to->exb, &to->expr); 04873 else 04874 select_opt_exact_info(enc, &to->exm, &to->expr); 04875 } 04876 } 04877 else if (add->expr.len > 0) { 04878 copy_opt_exact_info(&to->expr, &add->expr); 04879 } 04880 04881 select_opt_map_info(&to->map, &add->map); 04882 04883 add_mml(&to->len, &add->len); 04884 } 04885 04886 static void 04887 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) 04888 { 04889 alt_merge_opt_anc_info (&to->anc, &add->anc); 04890 alt_merge_opt_exact_info(&to->exb, &add->exb, env); 04891 alt_merge_opt_exact_info(&to->exm, &add->exm, env); 04892 alt_merge_opt_exact_info(&to->expr, &add->expr, env); 04893 alt_merge_opt_map_info(env->enc, &to->map, &add->map); 04894 04895 alt_merge_mml(&to->len, &add->len); 04896 } 04897 04898 04899 #define MAX_NODE_OPT_INFO_REF_COUNT 5 04900 04901 static int 04902 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) 04903 { 04904 int type; 04905 int r = 0; 04906 04907 clear_node_opt_info(opt); 04908 set_bound_node_opt_info(opt, &env->mmd); 04909 04910 type = NTYPE(node); 04911 switch (type) { 04912 case NT_LIST: 04913 { 04914 OptEnv nenv; 04915 NodeOptInfo nopt; 04916 Node* nd = node; 04917 04918 copy_opt_env(&nenv, env); 04919 do { 04920 r = optimize_node_left(NCAR(nd), &nopt, &nenv); 04921 if (r == 0) { 04922 add_mml(&nenv.mmd, &nopt.len); 04923 concat_left_node_opt_info(env->enc, opt, &nopt); 04924 } 04925 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); 04926 } 04927 break; 04928 04929 case NT_ALT: 04930 { 04931 NodeOptInfo nopt; 04932 Node* nd = node; 04933 04934 do { 04935 r = optimize_node_left(NCAR(nd), &nopt, env); 04936 if (r == 0) { 04937 if (nd == node) copy_node_opt_info(opt, &nopt); 04938 else alt_merge_node_opt_info(opt, &nopt, env); 04939 } 04940 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); 04941 } 04942 break; 04943 04944 case NT_STR: 04945 { 04946 StrNode* sn = NSTR(node); 04947 OnigDistance slen = sn->end - sn->s; 04948 int is_raw = NSTRING_IS_RAW(node); 04949 04950 if (! NSTRING_IS_AMBIG(node)) { 04951 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 04952 is_raw, env->enc); 04953 opt->exb.ignore_case = 0; 04954 if (slen > 0) { 04955 add_char_opt_map_info(&opt->map, *(sn->s), env->enc); 04956 } 04957 set_mml(&opt->len, slen, slen); 04958 } 04959 else { 04960 OnigDistance max; 04961 04962 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { 04963 int n = onigenc_strlen(env->enc, sn->s, sn->end); 04964 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; 04965 } 04966 else { 04967 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, 04968 is_raw, env->enc); 04969 opt->exb.ignore_case = 1; 04970 04971 if (slen > 0) { 04972 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, 04973 env->enc, env->case_fold_flag); 04974 if (r != 0) break; 04975 } 04976 04977 max = slen; 04978 } 04979 04980 set_mml(&opt->len, slen, max); 04981 } 04982 04983 if ((OnigDistance )opt->exb.len == slen) 04984 opt->exb.reach_end = 1; 04985 } 04986 break; 04987 04988 case NT_CCLASS: 04989 { 04990 int i, z; 04991 CClassNode* cc = NCCLASS(node); 04992 04993 /* no need to check ignore case. (set in setup_tree()) */ 04994 04995 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { 04996 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 04997 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 04998 04999 set_mml(&opt->len, min, max); 05000 } 05001 else { 05002 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 05003 z = BITSET_AT(cc->bs, i); 05004 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { 05005 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 05006 } 05007 } 05008 set_mml(&opt->len, 1, 1); 05009 } 05010 } 05011 break; 05012 05013 case NT_CTYPE: 05014 { 05015 int i, min, max; 05016 int maxcode; 05017 05018 max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 05019 05020 if (max == 1) { 05021 min = 1; 05022 05023 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE; 05024 switch (NCTYPE(node)->ctype) { 05025 case ONIGENC_CTYPE_WORD: 05026 if (NCTYPE(node)->not != 0) { 05027 for (i = 0; i < SINGLE_BYTE_SIZE; i++) { 05028 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) { 05029 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 05030 } 05031 } 05032 } 05033 else { 05034 for (i = 0; i < maxcode; i++) { 05035 if (ONIGENC_IS_CODE_WORD(env->enc, i)) { 05036 add_char_opt_map_info(&opt->map, (UChar )i, env->enc); 05037 } 05038 } 05039 } 05040 break; 05041 } 05042 } 05043 else { 05044 min = ONIGENC_MBC_MINLEN(env->enc); 05045 } 05046 set_mml(&opt->len, min, max); 05047 } 05048 break; 05049 05050 case NT_CANY: 05051 { 05052 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); 05053 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); 05054 set_mml(&opt->len, min, max); 05055 } 05056 break; 05057 05058 case NT_ANCHOR: 05059 switch (NANCHOR(node)->type) { 05060 case ANCHOR_BEGIN_BUF: 05061 case ANCHOR_BEGIN_POSITION: 05062 case ANCHOR_BEGIN_LINE: 05063 case ANCHOR_END_BUF: 05064 case ANCHOR_SEMI_END_BUF: 05065 case ANCHOR_END_LINE: 05066 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */ 05067 add_opt_anc_info(&opt->anc, NANCHOR(node)->type); 05068 break; 05069 05070 case ANCHOR_PREC_READ: 05071 { 05072 NodeOptInfo nopt; 05073 05074 r = optimize_node_left(NANCHOR(node)->target, &nopt, env); 05075 if (r == 0) { 05076 if (nopt.exb.len > 0) 05077 copy_opt_exact_info(&opt->expr, &nopt.exb); 05078 else if (nopt.exm.len > 0) 05079 copy_opt_exact_info(&opt->expr, &nopt.exm); 05080 05081 opt->expr.reach_end = 0; 05082 05083 if (nopt.map.value > 0) 05084 copy_opt_map_info(&opt->map, &nopt.map); 05085 } 05086 } 05087 break; 05088 05089 case ANCHOR_PREC_READ_NOT: 05090 case ANCHOR_LOOK_BEHIND_NOT: 05091 break; 05092 } 05093 break; 05094 05095 case NT_BREF: 05096 { 05097 int i; 05098 int* backs; 05099 OnigDistance min, max, tmin, tmax; 05100 Node** nodes = SCANENV_MEM_NODES(env->scan_env); 05101 BRefNode* br = NBREF(node); 05102 05103 if (br->state & NST_RECURSION) { 05104 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 05105 break; 05106 } 05107 backs = BACKREFS_P(br); 05108 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); 05109 if (r != 0) break; 05110 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); 05111 if (r != 0) break; 05112 for (i = 1; i < br->back_num; i++) { 05113 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); 05114 if (r != 0) break; 05115 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); 05116 if (r != 0) break; 05117 if (min > tmin) min = tmin; 05118 if (max < tmax) max = tmax; 05119 } 05120 if (r == 0) set_mml(&opt->len, min, max); 05121 } 05122 break; 05123 05124 #ifdef USE_SUBEXP_CALL 05125 case NT_CALL: 05126 if (IS_CALL_RECURSION(NCALL(node))) 05127 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); 05128 else { 05129 OnigOptionType save = env->options; 05130 env->options = NENCLOSE(NCALL(node)->target)->option; 05131 r = optimize_node_left(NCALL(node)->target, opt, env); 05132 env->options = save; 05133 } 05134 break; 05135 #endif 05136 05137 case NT_QTFR: 05138 { 05139 int i; 05140 OnigDistance min, max; 05141 NodeOptInfo nopt; 05142 QtfrNode* qn = NQTFR(node); 05143 05144 r = optimize_node_left(qn->target, &nopt, env); 05145 if (r) break; 05146 05147 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) { 05148 if (env->mmd.max == 0 && 05149 NTYPE(qn->target) == NT_CANY && qn->greedy) { 05150 if (IS_MULTILINE(env->options)) 05151 /* implicit anchor: /.*a/ ==> /\A.*a/ */ 05152 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); 05153 else 05154 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); 05155 } 05156 } 05157 else { 05158 if (qn->lower > 0) { 05159 copy_node_opt_info(opt, &nopt); 05160 if (nopt.exb.len > 0) { 05161 if (nopt.exb.reach_end) { 05162 for (i = 2; i <= qn->lower && 05163 ! is_full_opt_exact_info(&opt->exb); i++) { 05164 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); 05165 } 05166 if (i < qn->lower) { 05167 opt->exb.reach_end = 0; 05168 } 05169 } 05170 } 05171 05172 if (qn->lower != qn->upper) { 05173 opt->exb.reach_end = 0; 05174 opt->exm.reach_end = 0; 05175 } 05176 if (qn->lower > 1) 05177 opt->exm.reach_end = 0; 05178 } 05179 } 05180 05181 min = distance_multiply(nopt.len.min, qn->lower); 05182 if (IS_REPEAT_INFINITE(qn->upper)) 05183 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); 05184 else 05185 max = distance_multiply(nopt.len.max, qn->upper); 05186 05187 set_mml(&opt->len, min, max); 05188 } 05189 break; 05190 05191 case NT_ENCLOSE: 05192 { 05193 EncloseNode* en = NENCLOSE(node); 05194 05195 switch (en->type) { 05196 case ENCLOSE_OPTION: 05197 { 05198 OnigOptionType save = env->options; 05199 05200 env->options = en->option; 05201 r = optimize_node_left(en->target, opt, env); 05202 env->options = save; 05203 } 05204 break; 05205 05206 case ENCLOSE_MEMORY: 05207 #ifdef USE_SUBEXP_CALL 05208 en->opt_count++; 05209 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { 05210 OnigDistance min, max; 05211 05212 min = 0; 05213 max = ONIG_INFINITE_DISTANCE; 05214 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; 05215 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; 05216 set_mml(&opt->len, min, max); 05217 } 05218 else 05219 #endif 05220 { 05221 r = optimize_node_left(en->target, opt, env); 05222 05223 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { 05224 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) 05225 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); 05226 } 05227 } 05228 break; 05229 05230 case ENCLOSE_STOP_BACKTRACK: 05231 case ENCLOSE_CONDITION: 05232 r = optimize_node_left(en->target, opt, env); 05233 break; 05234 } 05235 } 05236 break; 05237 05238 default: 05239 #ifdef ONIG_DEBUG 05240 if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n", 05241 NTYPE(node)); 05242 #endif 05243 r = ONIGERR_TYPE_BUG; 05244 break; 05245 } 05246 05247 return r; 05248 } 05249 05250 static int 05251 set_optimize_exact_info(regex_t* reg, OptExactInfo* e) 05252 { 05253 int r; 05254 int allow_reverse; 05255 05256 if (e->len == 0) return 0; 05257 05258 reg->exact = (UChar* )xmalloc(e->len); 05259 CHECK_NULL_RETURN_MEMERR(reg->exact); 05260 xmemcpy(reg->exact, e->s, e->len); 05261 reg->exact_end = reg->exact + e->len; 05262 05263 allow_reverse = 05264 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); 05265 05266 if (e->ignore_case > 0) { 05267 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { 05268 r = set_bm_skip(reg->exact, reg->exact_end, reg, 05269 reg->map, &(reg->int_map), 1); 05270 if (r == 0) { 05271 reg->optimize = (allow_reverse != 0 05272 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC); 05273 } 05274 else { 05275 reg->optimize = ONIG_OPTIMIZE_EXACT_IC; 05276 } 05277 } 05278 else { 05279 reg->optimize = ONIG_OPTIMIZE_EXACT_IC; 05280 } 05281 } 05282 else { 05283 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { 05284 r = set_bm_skip(reg->exact, reg->exact_end, reg, 05285 reg->map, &(reg->int_map), 0); 05286 if (r) return r; 05287 05288 reg->optimize = (allow_reverse != 0 05289 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); 05290 } 05291 else { 05292 reg->optimize = ONIG_OPTIMIZE_EXACT; 05293 } 05294 } 05295 05296 reg->dmin = e->mmd.min; 05297 reg->dmax = e->mmd.max; 05298 05299 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 05300 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact)); 05301 } 05302 05303 return 0; 05304 } 05305 05306 static void 05307 set_optimize_map_info(regex_t* reg, OptMapInfo* m) 05308 { 05309 int i; 05310 05311 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 05312 reg->map[i] = m->map[i]; 05313 05314 reg->optimize = ONIG_OPTIMIZE_MAP; 05315 reg->dmin = m->mmd.min; 05316 reg->dmax = m->mmd.max; 05317 05318 if (reg->dmin != ONIG_INFINITE_DISTANCE) { 05319 reg->threshold_len = (int )(reg->dmin + 1); 05320 } 05321 } 05322 05323 static void 05324 set_sub_anchor(regex_t* reg, OptAncInfo* anc) 05325 { 05326 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; 05327 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; 05328 } 05329 05330 #ifdef ONIG_DEBUG 05331 static void print_optimize_info(FILE* f, regex_t* reg); 05332 #endif 05333 05334 static int 05335 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) 05336 { 05337 05338 int r; 05339 NodeOptInfo opt; 05340 OptEnv env; 05341 05342 env.enc = reg->enc; 05343 env.options = reg->options; 05344 env.case_fold_flag = reg->case_fold_flag; 05345 env.scan_env = scan_env; 05346 clear_mml(&env.mmd); 05347 05348 r = optimize_node_left(node, &opt, &env); 05349 if (r) return r; 05350 05351 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | 05352 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | 05353 ANCHOR_LOOK_BEHIND); 05354 05355 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); 05356 05357 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { 05358 reg->anchor_dmin = opt.len.min; 05359 reg->anchor_dmax = opt.len.max; 05360 } 05361 05362 if (opt.exb.len > 0 || opt.exm.len > 0) { 05363 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); 05364 if (opt.map.value > 0 && 05365 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { 05366 goto set_map; 05367 } 05368 else { 05369 r = set_optimize_exact_info(reg, &opt.exb); 05370 set_sub_anchor(reg, &opt.exb.anc); 05371 } 05372 } 05373 else if (opt.map.value > 0) { 05374 set_map: 05375 set_optimize_map_info(reg, &opt.map); 05376 set_sub_anchor(reg, &opt.map.anc); 05377 } 05378 else { 05379 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; 05380 if (opt.len.max == 0) 05381 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; 05382 } 05383 05384 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) 05385 if (!onig_is_prelude()) print_optimize_info(stderr, reg); 05386 #endif 05387 return r; 05388 } 05389 05390 static void 05391 clear_optimize_info(regex_t* reg) 05392 { 05393 reg->optimize = ONIG_OPTIMIZE_NONE; 05394 reg->anchor = 0; 05395 reg->anchor_dmin = 0; 05396 reg->anchor_dmax = 0; 05397 reg->sub_anchor = 0; 05398 reg->exact_end = (UChar* )NULL; 05399 reg->threshold_len = 0; 05400 if (IS_NOT_NULL(reg->exact)) { 05401 xfree(reg->exact); 05402 reg->exact = (UChar* )NULL; 05403 } 05404 } 05405 05406 #ifdef ONIG_DEBUG 05407 05408 static void print_enc_string(FILE* fp, OnigEncoding enc, 05409 const UChar *s, const UChar *end) 05410 { 05411 fprintf(fp, "\nPATTERN: /"); 05412 05413 if (ONIGENC_MBC_MINLEN(enc) > 1) { 05414 const UChar *p; 05415 OnigCodePoint code; 05416 05417 p = s; 05418 while (p < end) { 05419 code = ONIGENC_MBC_TO_CODE(enc, p, end); 05420 if (code >= 0x80) { 05421 fprintf(fp, " 0x%04x ", (int )code); 05422 } 05423 else { 05424 fputc((int )code, fp); 05425 } 05426 05427 p += enclen(enc, p, end); 05428 } 05429 } 05430 else { 05431 while (s < end) { 05432 fputc((int )*s, fp); 05433 s++; 05434 } 05435 } 05436 05437 fprintf(fp, "/ (%s)\n", enc->name); 05438 } 05439 05440 static void 05441 print_distance_range(FILE* f, OnigDistance a, OnigDistance b) 05442 { 05443 if (a == ONIG_INFINITE_DISTANCE) 05444 fputs("inf", f); 05445 else 05446 fprintf(f, "(%"PRIuSIZE")", a); 05447 05448 fputs("-", f); 05449 05450 if (b == ONIG_INFINITE_DISTANCE) 05451 fputs("inf", f); 05452 else 05453 fprintf(f, "(%"PRIuSIZE")", b); 05454 } 05455 05456 static void 05457 print_anchor(FILE* f, int anchor) 05458 { 05459 int q = 0; 05460 05461 fprintf(f, "["); 05462 05463 if (anchor & ANCHOR_BEGIN_BUF) { 05464 fprintf(f, "begin-buf"); 05465 q = 1; 05466 } 05467 if (anchor & ANCHOR_BEGIN_LINE) { 05468 if (q) fprintf(f, ", "); 05469 q = 1; 05470 fprintf(f, "begin-line"); 05471 } 05472 if (anchor & ANCHOR_BEGIN_POSITION) { 05473 if (q) fprintf(f, ", "); 05474 q = 1; 05475 fprintf(f, "begin-pos"); 05476 } 05477 if (anchor & ANCHOR_END_BUF) { 05478 if (q) fprintf(f, ", "); 05479 q = 1; 05480 fprintf(f, "end-buf"); 05481 } 05482 if (anchor & ANCHOR_SEMI_END_BUF) { 05483 if (q) fprintf(f, ", "); 05484 q = 1; 05485 fprintf(f, "semi-end-buf"); 05486 } 05487 if (anchor & ANCHOR_END_LINE) { 05488 if (q) fprintf(f, ", "); 05489 q = 1; 05490 fprintf(f, "end-line"); 05491 } 05492 if (anchor & ANCHOR_ANYCHAR_STAR) { 05493 if (q) fprintf(f, ", "); 05494 q = 1; 05495 fprintf(f, "anychar-star"); 05496 } 05497 if (anchor & ANCHOR_ANYCHAR_STAR_ML) { 05498 if (q) fprintf(f, ", "); 05499 fprintf(f, "anychar-star-ml"); 05500 } 05501 05502 fprintf(f, "]"); 05503 } 05504 05505 static void 05506 print_optimize_info(FILE* f, regex_t* reg) 05507 { 05508 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", 05509 "EXACT_IC", "MAP", 05510 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" }; 05511 05512 fprintf(f, "optimize: %s\n", on[reg->optimize]); 05513 fprintf(f, " anchor: "); print_anchor(f, reg->anchor); 05514 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) 05515 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); 05516 fprintf(f, "\n"); 05517 05518 if (reg->optimize) { 05519 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); 05520 fprintf(f, "\n"); 05521 } 05522 fprintf(f, "\n"); 05523 05524 if (reg->exact) { 05525 UChar *p; 05526 fprintf(f, "exact: ["); 05527 for (p = reg->exact; p < reg->exact_end; p++) { 05528 fputc(*p, f); 05529 } 05530 fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact)); 05531 } 05532 else if (reg->optimize & ONIG_OPTIMIZE_MAP) { 05533 int c, i, n = 0; 05534 05535 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 05536 if (reg->map[i]) n++; 05537 05538 fprintf(f, "map: n=%d\n", n); 05539 if (n > 0) { 05540 c = 0; 05541 fputc('[', f); 05542 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { 05543 if (reg->map[i] != 0) { 05544 if (c > 0) fputs(", ", f); 05545 c++; 05546 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && 05547 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) 05548 fputc(i, f); 05549 else 05550 fprintf(f, "%d", i); 05551 } 05552 } 05553 fprintf(f, "]\n"); 05554 } 05555 } 05556 } 05557 #endif /* ONIG_DEBUG */ 05558 05559 05560 extern void 05561 onig_free_body(regex_t* reg) 05562 { 05563 if (IS_NOT_NULL(reg)) { 05564 if (IS_NOT_NULL(reg->p)) xfree(reg->p); 05565 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); 05566 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); 05567 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); 05568 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); 05569 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); 05570 05571 #ifdef USE_NAMED_GROUP 05572 onig_names_free(reg); 05573 #endif 05574 } 05575 } 05576 05577 extern void 05578 onig_free(regex_t* reg) 05579 { 05580 if (IS_NOT_NULL(reg)) { 05581 onig_free_body(reg); 05582 xfree(reg); 05583 } 05584 } 05585 05586 size_t 05587 onig_memsize(const regex_t *reg) 05588 { 05589 size_t size = sizeof(regex_t); 05590 if (IS_NULL(reg)) return 0; 05591 if (IS_NOT_NULL(reg->p)) size += reg->alloc; 05592 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; 05593 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; 05594 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; 05595 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); 05596 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); 05597 05598 return size; 05599 } 05600 05601 size_t 05602 onig_region_memsize(const OnigRegion *regs) 05603 { 05604 size_t size = sizeof(*regs); 05605 if (IS_NULL(regs)) return 0; 05606 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end)); 05607 return size; 05608 } 05609 05610 #define REGEX_TRANSFER(to,from) do {\ 05611 (to)->state = ONIG_STATE_MODIFY;\ 05612 onig_free_body(to);\ 05613 xmemcpy(to, from, sizeof(regex_t));\ 05614 xfree(from);\ 05615 } while (0) 05616 05617 extern void 05618 onig_transfer(regex_t* to, regex_t* from) 05619 { 05620 THREAD_ATOMIC_START; 05621 REGEX_TRANSFER(to, from); 05622 THREAD_ATOMIC_END; 05623 } 05624 05625 #define REGEX_CHAIN_HEAD(reg) do {\ 05626 while (IS_NOT_NULL((reg)->chain)) {\ 05627 (reg) = (reg)->chain;\ 05628 }\ 05629 } while (0) 05630 05631 extern void 05632 onig_chain_link_add(regex_t* to, regex_t* add) 05633 { 05634 THREAD_ATOMIC_START; 05635 REGEX_CHAIN_HEAD(to); 05636 to->chain = add; 05637 THREAD_ATOMIC_END; 05638 } 05639 05640 extern void 05641 onig_chain_reduce(regex_t* reg) 05642 { 05643 regex_t *head, *prev; 05644 05645 prev = reg; 05646 head = prev->chain; 05647 if (IS_NOT_NULL(head)) { 05648 reg->state = ONIG_STATE_MODIFY; 05649 while (IS_NOT_NULL(head->chain)) { 05650 prev = head; 05651 head = head->chain; 05652 } 05653 prev->chain = (regex_t* )NULL; 05654 REGEX_TRANSFER(reg, head); 05655 } 05656 } 05657 05658 #ifdef ONIG_DEBUG 05659 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); 05660 #endif 05661 #ifdef ONIG_DEBUG_PARSE_TREE 05662 static void print_tree P_((FILE* f, Node* node)); 05663 #endif 05664 05665 extern int 05666 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 05667 OnigErrorInfo* einfo, const char *sourcefile, int sourceline) 05668 { 05669 #define COMPILE_INIT_SIZE 20 05670 05671 int r; 05672 OnigDistance init_size; 05673 Node* root; 05674 ScanEnv scan_env = {0}; 05675 #ifdef USE_SUBEXP_CALL 05676 UnsetAddrList uslist; 05677 #endif 05678 05679 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; 05680 05681 scan_env.sourcefile = sourcefile; 05682 scan_env.sourceline = sourceline; 05683 reg->state = ONIG_STATE_COMPILING; 05684 05685 #ifdef ONIG_DEBUG 05686 if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end); 05687 #endif 05688 05689 if (reg->alloc == 0) { 05690 init_size = (pattern_end - pattern) * 2; 05691 if (init_size <= 0) init_size = COMPILE_INIT_SIZE; 05692 r = BBUF_INIT(reg, init_size); 05693 if (r != 0) goto end; 05694 } 05695 else 05696 reg->used = 0; 05697 05698 reg->num_mem = 0; 05699 reg->num_repeat = 0; 05700 reg->num_null_check = 0; 05701 reg->repeat_range_alloc = 0; 05702 reg->repeat_range = (OnigRepeatRange* )NULL; 05703 #ifdef USE_COMBINATION_EXPLOSION_CHECK 05704 reg->num_comb_exp_check = 0; 05705 #endif 05706 05707 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); 05708 if (r != 0) goto err; 05709 05710 #ifdef ONIG_DEBUG_PARSE_TREE 05711 # if 0 05712 fprintf(stderr, "ORIGINAL PARSE TREE:\n"); 05713 if (!onig_is_prelude()) { 05714 print_tree(stderr, root); 05715 } 05716 # endif 05717 #endif 05718 05719 #ifdef USE_NAMED_GROUP 05720 /* mixed use named group and no-named group */ 05721 if (scan_env.num_named > 0 && 05722 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && 05723 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { 05724 if (scan_env.num_named != scan_env.num_mem) 05725 r = disable_noname_group_capture(&root, reg, &scan_env); 05726 else 05727 r = numbered_ref_check(root); 05728 05729 if (r != 0) goto err; 05730 } 05731 #endif 05732 05733 #ifdef USE_SUBEXP_CALL 05734 if (scan_env.num_call > 0) { 05735 r = unset_addr_list_init(&uslist, scan_env.num_call); 05736 if (r != 0) goto err; 05737 scan_env.unset_addr_list = &uslist; 05738 r = setup_subexp_call(root, &scan_env); 05739 if (r != 0) goto err_unset; 05740 r = subexp_recursive_check_trav(root, &scan_env); 05741 if (r < 0) goto err_unset; 05742 r = subexp_inf_recursive_check_trav(root, &scan_env); 05743 if (r != 0) goto err_unset; 05744 05745 reg->num_call = scan_env.num_call; 05746 } 05747 else 05748 reg->num_call = 0; 05749 #endif 05750 05751 r = setup_tree(root, reg, IN_ROOT, &scan_env); 05752 if (r != 0) goto err_unset; 05753 05754 #ifdef ONIG_DEBUG_PARSE_TREE 05755 if (!onig_is_prelude()) print_tree(stderr, root); 05756 #endif 05757 05758 reg->capture_history = scan_env.capture_history; 05759 reg->bt_mem_start = scan_env.bt_mem_start; 05760 reg->bt_mem_start |= reg->capture_history; 05761 if (IS_FIND_CONDITION(reg->options)) 05762 BIT_STATUS_ON_ALL(reg->bt_mem_end); 05763 else { 05764 reg->bt_mem_end = scan_env.bt_mem_end; 05765 reg->bt_mem_end |= reg->capture_history; 05766 } 05767 05768 #ifdef USE_COMBINATION_EXPLOSION_CHECK 05769 if (scan_env.backrefed_mem == 0 05770 #ifdef USE_SUBEXP_CALL 05771 || scan_env.num_call == 0 05772 #endif 05773 ) { 05774 setup_comb_exp_check(root, 0, &scan_env); 05775 #ifdef USE_SUBEXP_CALL 05776 if (scan_env.has_recursion != 0) { 05777 scan_env.num_comb_exp_check = 0; 05778 } 05779 else 05780 #endif 05781 if (scan_env.comb_exp_max_regnum > 0) { 05782 int i; 05783 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { 05784 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { 05785 scan_env.num_comb_exp_check = 0; 05786 break; 05787 } 05788 } 05789 } 05790 } 05791 05792 reg->num_comb_exp_check = scan_env.num_comb_exp_check; 05793 #endif 05794 05795 clear_optimize_info(reg); 05796 #ifndef ONIG_DONT_OPTIMIZE 05797 r = set_optimize_info_from_tree(root, reg, &scan_env); 05798 if (r != 0) goto err_unset; 05799 #endif 05800 05801 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { 05802 xfree(scan_env.mem_nodes_dynamic); 05803 scan_env.mem_nodes_dynamic = (Node** )NULL; 05804 } 05805 05806 r = compile_tree(root, reg); 05807 if (r == 0) { 05808 r = add_opcode(reg, OP_END); 05809 #ifdef USE_SUBEXP_CALL 05810 if (scan_env.num_call > 0) { 05811 r = unset_addr_list_fix(&uslist, reg); 05812 unset_addr_list_end(&uslist); 05813 if (r) goto err; 05814 } 05815 #endif 05816 05817 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) 05818 reg->stack_pop_level = STACK_POP_LEVEL_ALL; 05819 else { 05820 if (reg->bt_mem_start != 0) 05821 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; 05822 else 05823 reg->stack_pop_level = STACK_POP_LEVEL_FREE; 05824 } 05825 } 05826 #ifdef USE_SUBEXP_CALL 05827 else if (scan_env.num_call > 0) { 05828 unset_addr_list_end(&uslist); 05829 } 05830 #endif 05831 onig_node_free(root); 05832 05833 #ifdef ONIG_DEBUG_COMPILE 05834 #ifdef USE_NAMED_GROUP 05835 if (!onig_is_prelude()) onig_print_names(stderr, reg); 05836 #endif 05837 if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg); 05838 #endif 05839 05840 end: 05841 reg->state = ONIG_STATE_NORMAL; 05842 return r; 05843 05844 err_unset: 05845 #ifdef USE_SUBEXP_CALL 05846 if (scan_env.num_call > 0) { 05847 unset_addr_list_end(&uslist); 05848 } 05849 #endif 05850 err: 05851 if (IS_NOT_NULL(scan_env.error)) { 05852 if (IS_NOT_NULL(einfo)) { 05853 einfo->enc = scan_env.enc; 05854 einfo->par = scan_env.error; 05855 einfo->par_end = scan_env.error_end; 05856 } 05857 } 05858 05859 onig_node_free(root); 05860 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) 05861 xfree(scan_env.mem_nodes_dynamic); 05862 return r; 05863 } 05864 05865 #ifdef USE_RECOMPILE_API 05866 extern int 05867 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, 05868 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, 05869 OnigErrorInfo* einfo) 05870 { 05871 int r; 05872 regex_t *new_reg; 05873 05874 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); 05875 if (r) return r; 05876 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 05877 onig_transfer(reg, new_reg); 05878 } 05879 else { 05880 onig_chain_link_add(reg, new_reg); 05881 } 05882 return 0; 05883 } 05884 #endif 05885 05886 static int onig_inited = 0; 05887 05888 extern int 05889 onig_reg_init(regex_t* reg, OnigOptionType option, 05890 OnigCaseFoldType case_fold_flag, 05891 OnigEncoding enc, const OnigSyntaxType* syntax) 05892 { 05893 if (! onig_inited) 05894 onig_init(); 05895 05896 if (IS_NULL(reg)) 05897 return ONIGERR_INVALID_ARGUMENT; 05898 05899 if (ONIGENC_IS_UNDEF(enc)) 05900 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET; 05901 05902 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) 05903 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { 05904 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; 05905 } 05906 05907 (reg)->state = ONIG_STATE_MODIFY; 05908 05909 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { 05910 option |= syntax->options; 05911 option &= ~ONIG_OPTION_SINGLELINE; 05912 } 05913 else 05914 option |= syntax->options; 05915 05916 (reg)->enc = enc; 05917 (reg)->options = option; 05918 (reg)->syntax = syntax; 05919 (reg)->optimize = 0; 05920 (reg)->exact = (UChar* )NULL; 05921 (reg)->int_map = (int* )NULL; 05922 (reg)->int_map_backward = (int* )NULL; 05923 (reg)->chain = (regex_t* )NULL; 05924 05925 (reg)->p = (UChar* )NULL; 05926 (reg)->alloc = 0; 05927 (reg)->used = 0; 05928 (reg)->name_table = (void* )NULL; 05929 05930 (reg)->case_fold_flag = case_fold_flag; 05931 return 0; 05932 } 05933 05934 extern int 05935 onig_new_without_alloc(regex_t* reg, const UChar* pattern, 05936 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, 05937 OnigSyntaxType* syntax, OnigErrorInfo* einfo) 05938 { 05939 int r; 05940 05941 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 05942 if (r) return r; 05943 05944 r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0); 05945 return r; 05946 } 05947 05948 extern int 05949 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, 05950 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, 05951 OnigErrorInfo* einfo) 05952 { 05953 int r; 05954 05955 *reg = (regex_t* )xmalloc(sizeof(regex_t)); 05956 if (IS_NULL(*reg)) return ONIGERR_MEMORY; 05957 05958 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); 05959 if (r) goto err; 05960 05961 r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0); 05962 if (r) { 05963 err: 05964 onig_free(*reg); 05965 *reg = NULL; 05966 } 05967 return r; 05968 } 05969 05970 05971 extern int 05972 onig_init(void) 05973 { 05974 if (onig_inited != 0) 05975 return 0; 05976 05977 THREAD_SYSTEM_INIT; 05978 THREAD_ATOMIC_START; 05979 05980 onig_inited = 1; 05981 05982 onigenc_init(); 05983 /* onigenc_set_default_caseconv_table((UChar* )0); */ 05984 05985 #ifdef ONIG_DEBUG_STATISTICS 05986 onig_statistics_init(); 05987 #endif 05988 05989 THREAD_ATOMIC_END; 05990 return 0; 05991 } 05992 05993 05994 extern int 05995 onig_end(void) 05996 { 05997 THREAD_ATOMIC_START; 05998 05999 #ifdef ONIG_DEBUG_STATISTICS 06000 if (!onig_is_prelude()) onig_print_statistics(stderr); 06001 #endif 06002 06003 #ifdef USE_SHARED_CCLASS_TABLE 06004 onig_free_shared_cclass_table(); 06005 #endif 06006 06007 #ifdef USE_PARSE_TREE_NODE_RECYCLE 06008 onig_free_node_list(); 06009 #endif 06010 06011 onig_inited = 0; 06012 06013 THREAD_ATOMIC_END; 06014 THREAD_SYSTEM_END; 06015 return 0; 06016 } 06017 06018 extern int 06019 onig_is_in_code_range(const UChar* p, OnigCodePoint code) 06020 { 06021 OnigCodePoint n, *data; 06022 OnigCodePoint low, high, x; 06023 06024 GET_CODE_POINT(n, p); 06025 data = (OnigCodePoint* )p; 06026 data++; 06027 06028 for (low = 0, high = n; low < high; ) { 06029 x = (low + high) >> 1; 06030 if (code > data[x * 2 + 1]) 06031 low = x + 1; 06032 else 06033 high = x; 06034 } 06035 06036 return ((low < n && code >= data[low * 2]) ? 1 : 0); 06037 } 06038 06039 extern int 06040 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) 06041 { 06042 int found; 06043 06044 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { 06045 if (IS_NULL(cc->mbuf)) { 06046 found = 0; 06047 } 06048 else { 06049 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); 06050 } 06051 } 06052 else { 06053 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); 06054 } 06055 06056 if (IS_NCCLASS_NOT(cc)) 06057 return !found; 06058 else 06059 return found; 06060 } 06061 06062 extern int 06063 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) 06064 { 06065 int len; 06066 06067 if (ONIGENC_MBC_MINLEN(enc) > 1) { 06068 len = 2; 06069 } 06070 else { 06071 len = ONIGENC_CODE_TO_MBCLEN(enc, code); 06072 } 06073 return onig_is_code_in_cc_len(len, code, cc); 06074 } 06075 06076 06077 #ifdef ONIG_DEBUG 06078 06079 /* arguments type */ 06080 #define ARG_SPECIAL -1 06081 #define ARG_NON 0 06082 #define ARG_RELADDR 1 06083 #define ARG_ABSADDR 2 06084 #define ARG_LENGTH 3 06085 #define ARG_MEMNUM 4 06086 #define ARG_OPTION 5 06087 #define ARG_STATE_CHECK 6 06088 06089 OnigOpInfoType OnigOpInfo[] = { 06090 { OP_FINISH, "finish", ARG_NON }, 06091 { OP_END, "end", ARG_NON }, 06092 { OP_EXACT1, "exact1", ARG_SPECIAL }, 06093 { OP_EXACT2, "exact2", ARG_SPECIAL }, 06094 { OP_EXACT3, "exact3", ARG_SPECIAL }, 06095 { OP_EXACT4, "exact4", ARG_SPECIAL }, 06096 { OP_EXACT5, "exact5", ARG_SPECIAL }, 06097 { OP_EXACTN, "exactn", ARG_SPECIAL }, 06098 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, 06099 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, 06100 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, 06101 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, 06102 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, 06103 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, 06104 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, 06105 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, 06106 { OP_CCLASS, "cclass", ARG_SPECIAL }, 06107 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, 06108 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, 06109 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, 06110 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, 06111 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, 06112 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, 06113 { OP_ANYCHAR, "anychar", ARG_NON }, 06114 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, 06115 { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, 06116 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, 06117 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, 06118 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, 06119 { OP_WORD, "word", ARG_NON }, 06120 { OP_NOT_WORD, "not-word", ARG_NON }, 06121 { OP_WORD_BOUND, "word-bound", ARG_NON }, 06122 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, 06123 { OP_WORD_BEGIN, "word-begin", ARG_NON }, 06124 { OP_WORD_END, "word-end", ARG_NON }, 06125 { OP_ASCII_WORD, "ascii-word", ARG_NON }, 06126 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON }, 06127 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON }, 06128 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON }, 06129 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON }, 06130 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON }, 06131 { OP_BEGIN_BUF, "begin-buf", ARG_NON }, 06132 { OP_END_BUF, "end-buf", ARG_NON }, 06133 { OP_BEGIN_LINE, "begin-line", ARG_NON }, 06134 { OP_END_LINE, "end-line", ARG_NON }, 06135 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, 06136 { OP_BEGIN_POSITION, "begin-position", ARG_NON }, 06137 { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON }, 06138 { OP_BACKREF1, "backref1", ARG_NON }, 06139 { OP_BACKREF2, "backref2", ARG_NON }, 06140 { OP_BACKREFN, "backrefn", ARG_MEMNUM }, 06141 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, 06142 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, 06143 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, 06144 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, 06145 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, 06146 { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, 06147 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, 06148 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, 06149 { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, 06150 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, 06151 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, 06152 { OP_SET_OPTION, "set-option", ARG_OPTION }, 06153 { OP_KEEP, "keep", ARG_NON }, 06154 { OP_FAIL, "fail", ARG_NON }, 06155 { OP_JUMP, "jump", ARG_RELADDR }, 06156 { OP_PUSH, "push", ARG_RELADDR }, 06157 { OP_POP, "pop", ARG_NON }, 06158 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, 06159 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, 06160 { OP_REPEAT, "repeat", ARG_SPECIAL }, 06161 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, 06162 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, 06163 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, 06164 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, 06165 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, 06166 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, 06167 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, 06168 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, 06169 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, 06170 { OP_PUSH_POS, "push-pos", ARG_NON }, 06171 { OP_POP_POS, "pop-pos", ARG_NON }, 06172 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, 06173 { OP_FAIL_POS, "fail-pos", ARG_NON }, 06174 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, 06175 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, 06176 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, 06177 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, 06178 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, 06179 { OP_CALL, "call", ARG_ABSADDR }, 06180 { OP_RETURN, "return", ARG_NON }, 06181 { OP_CONDITION, "condition", ARG_SPECIAL }, 06182 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, 06183 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, 06184 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, 06185 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, 06186 { OP_STATE_CHECK_ANYCHAR_ML_STAR, 06187 "state-check-anychar-ml*", ARG_STATE_CHECK }, 06188 { -1, "", ARG_NON } 06189 }; 06190 06191 static const char* 06192 op2name(int opcode) 06193 { 06194 int i; 06195 06196 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 06197 if (opcode == OnigOpInfo[i].opcode) 06198 return OnigOpInfo[i].name; 06199 } 06200 return ""; 06201 } 06202 06203 static int 06204 op2arg_type(int opcode) 06205 { 06206 int i; 06207 06208 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 06209 if (opcode == OnigOpInfo[i].opcode) 06210 return OnigOpInfo[i].arg_type; 06211 } 06212 return ARG_SPECIAL; 06213 } 06214 06215 static void 06216 Indent(FILE* f, int indent) 06217 { 06218 int i; 06219 for (i = 0; i < indent; i++) putc(' ', f); 06220 } 06221 06222 static void 06223 p_string(FILE* f, int len, UChar* s) 06224 { 06225 fputs(":", f); 06226 while (len-- > 0) { fputc(*s++, f); } 06227 } 06228 06229 static void 06230 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) 06231 { 06232 int x = len * mb_len; 06233 06234 fprintf(f, ":%d:", len); 06235 while (x-- > 0) { fputc(*s++, f); } 06236 } 06237 06238 extern void 06239 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, 06240 OnigEncoding enc) 06241 { 06242 int i, n, arg_type; 06243 RelAddrType addr; 06244 LengthType len; 06245 MemNumType mem; 06246 StateCheckNumType scn; 06247 OnigCodePoint code; 06248 UChar *q; 06249 06250 fprintf(f, "[%s", op2name(*bp)); 06251 arg_type = op2arg_type(*bp); 06252 if (arg_type != ARG_SPECIAL) { 06253 bp++; 06254 switch (arg_type) { 06255 case ARG_NON: 06256 break; 06257 case ARG_RELADDR: 06258 GET_RELADDR_INC(addr, bp); 06259 fprintf(f, ":(%d)", addr); 06260 break; 06261 case ARG_ABSADDR: 06262 GET_ABSADDR_INC(addr, bp); 06263 fprintf(f, ":(%d)", addr); 06264 break; 06265 case ARG_LENGTH: 06266 GET_LENGTH_INC(len, bp); 06267 fprintf(f, ":%d", len); 06268 break; 06269 case ARG_MEMNUM: 06270 mem = *((MemNumType* )bp); 06271 bp += SIZE_MEMNUM; 06272 fprintf(f, ":%d", mem); 06273 break; 06274 case ARG_OPTION: 06275 { 06276 OnigOptionType option = *((OnigOptionType* )bp); 06277 bp += SIZE_OPTION; 06278 fprintf(f, ":%d", option); 06279 } 06280 break; 06281 06282 case ARG_STATE_CHECK: 06283 scn = *((StateCheckNumType* )bp); 06284 bp += SIZE_STATE_CHECK_NUM; 06285 fprintf(f, ":%d", scn); 06286 break; 06287 } 06288 } 06289 else { 06290 switch (*bp++) { 06291 case OP_EXACT1: 06292 case OP_ANYCHAR_STAR_PEEK_NEXT: 06293 case OP_ANYCHAR_ML_STAR_PEEK_NEXT: 06294 p_string(f, 1, bp++); break; 06295 case OP_EXACT2: 06296 p_string(f, 2, bp); bp += 2; break; 06297 case OP_EXACT3: 06298 p_string(f, 3, bp); bp += 3; break; 06299 case OP_EXACT4: 06300 p_string(f, 4, bp); bp += 4; break; 06301 case OP_EXACT5: 06302 p_string(f, 5, bp); bp += 5; break; 06303 case OP_EXACTN: 06304 GET_LENGTH_INC(len, bp); 06305 p_len_string(f, len, 1, bp); 06306 bp += len; 06307 break; 06308 06309 case OP_EXACTMB2N1: 06310 p_string(f, 2, bp); bp += 2; break; 06311 case OP_EXACTMB2N2: 06312 p_string(f, 4, bp); bp += 4; break; 06313 case OP_EXACTMB2N3: 06314 p_string(f, 6, bp); bp += 6; break; 06315 case OP_EXACTMB2N: 06316 GET_LENGTH_INC(len, bp); 06317 p_len_string(f, len, 2, bp); 06318 bp += len * 2; 06319 break; 06320 case OP_EXACTMB3N: 06321 GET_LENGTH_INC(len, bp); 06322 p_len_string(f, len, 3, bp); 06323 bp += len * 3; 06324 break; 06325 case OP_EXACTMBN: 06326 { 06327 int mb_len; 06328 06329 GET_LENGTH_INC(mb_len, bp); 06330 GET_LENGTH_INC(len, bp); 06331 fprintf(f, ":%d:%d:", mb_len, len); 06332 n = len * mb_len; 06333 while (n-- > 0) { fputc(*bp++, f); } 06334 } 06335 break; 06336 06337 case OP_EXACT1_IC: 06338 len = enclen(enc, bp, bpend); 06339 p_string(f, len, bp); 06340 bp += len; 06341 break; 06342 case OP_EXACTN_IC: 06343 GET_LENGTH_INC(len, bp); 06344 p_len_string(f, len, 1, bp); 06345 bp += len; 06346 break; 06347 06348 case OP_CCLASS: 06349 n = bitset_on_num((BitSetRef )bp); 06350 bp += SIZE_BITSET; 06351 fprintf(f, ":%d", n); 06352 break; 06353 06354 case OP_CCLASS_NOT: 06355 n = bitset_on_num((BitSetRef )bp); 06356 bp += SIZE_BITSET; 06357 fprintf(f, ":%d", n); 06358 break; 06359 06360 case OP_CCLASS_MB: 06361 case OP_CCLASS_MB_NOT: 06362 GET_LENGTH_INC(len, bp); 06363 q = bp; 06364 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 06365 ALIGNMENT_RIGHT(q); 06366 #endif 06367 GET_CODE_POINT(code, q); 06368 bp += len; 06369 fprintf(f, ":%d:%d", (int )code, len); 06370 break; 06371 06372 case OP_CCLASS_MIX: 06373 case OP_CCLASS_MIX_NOT: 06374 n = bitset_on_num((BitSetRef )bp); 06375 bp += SIZE_BITSET; 06376 GET_LENGTH_INC(len, bp); 06377 q = bp; 06378 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS 06379 ALIGNMENT_RIGHT(q); 06380 #endif 06381 GET_CODE_POINT(code, q); 06382 bp += len; 06383 fprintf(f, ":%d:%d:%d", n, (int )code, len); 06384 break; 06385 06386 case OP_CCLASS_NODE: 06387 { 06388 CClassNode *cc; 06389 06390 GET_POINTER_INC(cc, bp); 06391 n = bitset_on_num(cc->bs); 06392 fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n); 06393 } 06394 break; 06395 06396 case OP_BACKREFN_IC: 06397 mem = *((MemNumType* )bp); 06398 bp += SIZE_MEMNUM; 06399 fprintf(f, ":%d", mem); 06400 break; 06401 06402 case OP_BACKREF_MULTI_IC: 06403 case OP_BACKREF_MULTI: 06404 fputs(" ", f); 06405 GET_LENGTH_INC(len, bp); 06406 for (i = 0; i < len; i++) { 06407 GET_MEMNUM_INC(mem, bp); 06408 if (i > 0) fputs(", ", f); 06409 fprintf(f, "%d", mem); 06410 } 06411 break; 06412 06413 case OP_BACKREF_WITH_LEVEL: 06414 { 06415 OnigOptionType option; 06416 LengthType level; 06417 06418 GET_OPTION_INC(option, bp); 06419 fprintf(f, ":%d", option); 06420 GET_LENGTH_INC(level, bp); 06421 fprintf(f, ":%d", level); 06422 06423 fputs(" ", f); 06424 GET_LENGTH_INC(len, bp); 06425 for (i = 0; i < len; i++) { 06426 GET_MEMNUM_INC(mem, bp); 06427 if (i > 0) fputs(", ", f); 06428 fprintf(f, "%d", mem); 06429 } 06430 } 06431 break; 06432 06433 case OP_REPEAT: 06434 case OP_REPEAT_NG: 06435 { 06436 mem = *((MemNumType* )bp); 06437 bp += SIZE_MEMNUM; 06438 addr = *((RelAddrType* )bp); 06439 bp += SIZE_RELADDR; 06440 fprintf(f, ":%d:%d", mem, addr); 06441 } 06442 break; 06443 06444 case OP_PUSH_OR_JUMP_EXACT1: 06445 case OP_PUSH_IF_PEEK_NEXT: 06446 addr = *((RelAddrType* )bp); 06447 bp += SIZE_RELADDR; 06448 fprintf(f, ":(%d)", addr); 06449 p_string(f, 1, bp); 06450 bp += 1; 06451 break; 06452 06453 case OP_LOOK_BEHIND: 06454 GET_LENGTH_INC(len, bp); 06455 fprintf(f, ":%d", len); 06456 break; 06457 06458 case OP_PUSH_LOOK_BEHIND_NOT: 06459 GET_RELADDR_INC(addr, bp); 06460 GET_LENGTH_INC(len, bp); 06461 fprintf(f, ":%d:(%d)", len, addr); 06462 break; 06463 06464 case OP_STATE_CHECK_PUSH: 06465 case OP_STATE_CHECK_PUSH_OR_JUMP: 06466 scn = *((StateCheckNumType* )bp); 06467 bp += SIZE_STATE_CHECK_NUM; 06468 addr = *((RelAddrType* )bp); 06469 bp += SIZE_RELADDR; 06470 fprintf(f, ":%d:(%d)", scn, addr); 06471 break; 06472 06473 case OP_CONDITION: 06474 GET_MEMNUM_INC(mem, bp); 06475 GET_RELADDR_INC(addr, bp); 06476 fprintf(f, ":%d:(%d)", mem, addr); 06477 break; 06478 06479 default: 06480 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", 06481 *--bp); 06482 } 06483 } 06484 fputs("]", f); 06485 if (nextp) *nextp = bp; 06486 } 06487 06488 static void 06489 print_compiled_byte_code_list(FILE* f, regex_t* reg) 06490 { 06491 int ncode; 06492 UChar* bp = reg->p; 06493 UChar* end = reg->p + reg->used; 06494 06495 fprintf(f, "code length: %d", reg->used); 06496 06497 ncode = -1; 06498 while (bp < end) { 06499 ncode++; 06500 if (ncode % 5 == 0) 06501 fprintf(f, "\n%ld:", bp - reg->p); 06502 else 06503 fprintf(f, " %ld:", bp - reg->p); 06504 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc); 06505 } 06506 06507 fprintf(f, "\n"); 06508 } 06509 06510 static void 06511 print_indent_tree(FILE* f, Node* node, int indent) 06512 { 06513 int i, type, container_p = 0; 06514 int add = 3; 06515 UChar* p; 06516 06517 Indent(f, indent); 06518 if (IS_NULL(node)) { 06519 fprintf(f, "ERROR: null node!!!\n"); 06520 exit (0); 06521 } 06522 06523 type = NTYPE(node); 06524 switch (type) { 06525 case NT_LIST: 06526 case NT_ALT: 06527 if (NTYPE(node) == NT_LIST) 06528 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node); 06529 else 06530 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node); 06531 06532 print_indent_tree(f, NCAR(node), indent + add); 06533 while (IS_NOT_NULL(node = NCDR(node))) { 06534 if (NTYPE(node) != type) { 06535 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); 06536 exit(0); 06537 } 06538 print_indent_tree(f, NCAR(node), indent + add); 06539 } 06540 break; 06541 06542 case NT_STR: 06543 fprintf(f, "<string%s:%"PRIxPTR">", 06544 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node); 06545 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { 06546 if (*p >= 0x20 && *p < 0x7f) 06547 fputc(*p, f); 06548 else { 06549 fprintf(f, " 0x%02x", *p); 06550 } 06551 } 06552 break; 06553 06554 case NT_CCLASS: 06555 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node); 06556 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); 06557 if (NCCLASS(node)->mbuf) { 06558 BBuf* bbuf = NCCLASS(node)->mbuf; 06559 for (i = 0; i < (int )bbuf->used; i++) { 06560 if (i > 0) fprintf(f, ","); 06561 fprintf(f, "%0x", bbuf->p[i]); 06562 } 06563 } 06564 break; 06565 06566 case NT_CTYPE: 06567 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node); 06568 switch (NCTYPE(node)->ctype) { 06569 case ONIGENC_CTYPE_WORD: 06570 if (NCTYPE(node)->not != 0) 06571 fputs("not word", f); 06572 else 06573 fputs("word", f); 06574 break; 06575 06576 default: 06577 fprintf(f, "ERROR: undefined ctype.\n"); 06578 exit(0); 06579 } 06580 break; 06581 06582 case NT_CANY: 06583 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node); 06584 break; 06585 06586 case NT_ANCHOR: 06587 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node); 06588 switch (NANCHOR(node)->type) { 06589 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; 06590 case ANCHOR_END_BUF: fputs("end buf", f); break; 06591 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; 06592 case ANCHOR_END_LINE: fputs("end line", f); break; 06593 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; 06594 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; 06595 case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break; 06596 06597 case ANCHOR_WORD_BOUND: fputs("word bound", f); break; 06598 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; 06599 #ifdef USE_WORD_BEGIN_END 06600 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; 06601 case ANCHOR_WORD_END: fputs("word end", f); break; 06602 #endif 06603 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break; 06604 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break; 06605 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break; 06606 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break; 06607 case ANCHOR_KEEP: fputs("keep",f); break; 06608 06609 default: 06610 fprintf(f, "ERROR: undefined anchor type.\n"); 06611 break; 06612 } 06613 break; 06614 06615 case NT_BREF: 06616 { 06617 int* p; 06618 BRefNode* br = NBREF(node); 06619 p = BACKREFS_P(br); 06620 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node); 06621 for (i = 0; i < br->back_num; i++) { 06622 if (i > 0) fputs(", ", f); 06623 fprintf(f, "%d", p[i]); 06624 } 06625 } 06626 break; 06627 06628 #ifdef USE_SUBEXP_CALL 06629 case NT_CALL: 06630 { 06631 CallNode* cn = NCALL(node); 06632 fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node); 06633 p_string(f, cn->name_end - cn->name, cn->name); 06634 } 06635 break; 06636 #endif 06637 06638 case NT_QTFR: 06639 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node, 06640 NQTFR(node)->lower, NQTFR(node)->upper, 06641 (NQTFR(node)->greedy ? "" : "?")); 06642 print_indent_tree(f, NQTFR(node)->target, indent + add); 06643 break; 06644 06645 case NT_ENCLOSE: 06646 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node); 06647 switch (NENCLOSE(node)->type) { 06648 case ENCLOSE_OPTION: 06649 fprintf(f, "option:%d\n", NENCLOSE(node)->option); 06650 break; 06651 case ENCLOSE_MEMORY: 06652 fprintf(f, "memory:%d", NENCLOSE(node)->regnum); 06653 break; 06654 case ENCLOSE_STOP_BACKTRACK: 06655 fprintf(f, "stop-bt"); 06656 break; 06657 case ENCLOSE_CONDITION: 06658 fprintf(f, "condition:%d", NENCLOSE(node)->regnum); 06659 break; 06660 06661 default: 06662 break; 06663 } 06664 fprintf(f, "\n"); 06665 print_indent_tree(f, NENCLOSE(node)->target, indent + add); 06666 break; 06667 06668 default: 06669 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); 06670 break; 06671 } 06672 06673 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && 06674 type != NT_ENCLOSE) 06675 fprintf(f, "\n"); 06676 06677 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add); 06678 06679 fflush(f); 06680 } 06681 #endif /* ONIG_DEBUG */ 06682 06683 #ifdef ONIG_DEBUG_PARSE_TREE 06684 static void 06685 print_tree(FILE* f, Node* node) 06686 { 06687 print_indent_tree(f, node, 0); 06688 } 06689 #endif 06690