Ruby  2.0.0p247(2013-06-27revision41674)
regcomp.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regcomp.c -  Onigmo (Oniguruma-mod) (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * Copyright (c) 2011-2013  K.Takata  <kentkt AT csc DOT jp>
00007  * All rights reserved.
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  * 1. Redistributions of source code must retain the above copyright
00013  *    notice, this list of conditions and the following disclaimer.
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in the
00016  *    documentation and/or other materials provided with the distribution.
00017  *
00018  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00019  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00022  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00023  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00024  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00025  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00026  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00027  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00028  * SUCH DAMAGE.
00029  */
00030 
00031 #include "regparse.h"
00032 
00033 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00034 
00035 extern OnigCaseFoldType
00036 onig_get_default_case_fold_flag(void)
00037 {
00038   return OnigDefaultCaseFoldFlag;
00039 }
00040 
00041 extern int
00042 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00043 {
00044   OnigDefaultCaseFoldFlag = case_fold_flag;
00045   return 0;
00046 }
00047 
00048 
00049 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00050 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00051 #endif
00052 
00053 #if 0
00054 static UChar*
00055 str_dup(UChar* s, UChar* end)
00056 {
00057   ptrdiff_t len = end - s;
00058 
00059   if (len > 0) {
00060     UChar* r = (UChar* )xmalloc(len + 1);
00061     CHECK_NULL_RETURN(r);
00062     xmemcpy(r, s, len);
00063     r[len] = (UChar )0;
00064     return r;
00065   }
00066   else return NULL;
00067 }
00068 #endif
00069 
00070 static void
00071 swap_node(Node* a, Node* b)
00072 {
00073   Node c;
00074   c = *a; *a = *b; *b = c;
00075 
00076   if (NTYPE(a) == NT_STR) {
00077     StrNode* sn = NSTR(a);
00078     if (sn->capa == 0) {
00079       size_t len = sn->end - sn->s;
00080       sn->s   = sn->buf;
00081       sn->end = sn->s + len;
00082     }
00083   }
00084 
00085   if (NTYPE(b) == NT_STR) {
00086     StrNode* sn = NSTR(b);
00087     if (sn->capa == 0) {
00088       size_t len = sn->end - sn->s;
00089       sn->s   = sn->buf;
00090       sn->end = sn->s + len;
00091     }
00092   }
00093 }
00094 
00095 static OnigDistance
00096 distance_add(OnigDistance d1, OnigDistance d2)
00097 {
00098   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00099     return ONIG_INFINITE_DISTANCE;
00100   else {
00101     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00102     else return ONIG_INFINITE_DISTANCE;
00103   }
00104 }
00105 
00106 static OnigDistance
00107 distance_multiply(OnigDistance d, int m)
00108 {
00109   if (m == 0) return 0;
00110 
00111   if (d < ONIG_INFINITE_DISTANCE / m)
00112     return d * m;
00113   else
00114     return ONIG_INFINITE_DISTANCE;
00115 }
00116 
00117 static int
00118 bitset_is_empty(BitSetRef bs)
00119 {
00120   int i;
00121   for (i = 0; i < BITSET_SIZE; i++) {
00122     if (bs[i] != 0) return 0;
00123   }
00124   return 1;
00125 }
00126 
00127 #ifdef ONIG_DEBUG
00128 static int
00129 onig_is_prelude(void)
00130 {
00131     return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
00132 }
00133 
00134 static int
00135 bitset_on_num(BitSetRef bs)
00136 {
00137   int i, n;
00138 
00139   n = 0;
00140   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00141     if (BITSET_AT(bs, i)) n++;
00142   }
00143   return n;
00144 }
00145 #endif
00146 
00147 extern int
00148 onig_bbuf_init(BBuf* buf, OnigDistance size)
00149 {
00150   if (size <= 0) {
00151     size   = 0;
00152     buf->p = NULL;
00153   }
00154   else {
00155     buf->p = (UChar* )xmalloc(size);
00156     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00157   }
00158 
00159   buf->alloc = (unsigned int )size;
00160   buf->used  = 0;
00161   return 0;
00162 }
00163 
00164 
00165 #ifdef USE_SUBEXP_CALL
00166 
00167 static int
00168 unset_addr_list_init(UnsetAddrList* uslist, int size)
00169 {
00170   UnsetAddr* p;
00171 
00172   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00173   CHECK_NULL_RETURN_MEMERR(p);
00174   uslist->num   = 0;
00175   uslist->alloc = size;
00176   uslist->us    = p;
00177   return 0;
00178 }
00179 
00180 static void
00181 unset_addr_list_end(UnsetAddrList* uslist)
00182 {
00183   if (IS_NOT_NULL(uslist->us))
00184     xfree(uslist->us);
00185 }
00186 
00187 static int
00188 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00189 {
00190   UnsetAddr* p;
00191   int size;
00192 
00193   if (uslist->num >= uslist->alloc) {
00194     size = uslist->alloc * 2;
00195     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00196     CHECK_NULL_RETURN_MEMERR(p);
00197     uslist->alloc = size;
00198     uslist->us    = p;
00199   }
00200 
00201   uslist->us[uslist->num].offset = offset;
00202   uslist->us[uslist->num].target = node;
00203   uslist->num++;
00204   return 0;
00205 }
00206 #endif /* USE_SUBEXP_CALL */
00207 
00208 
00209 static int
00210 add_opcode(regex_t* reg, int opcode)
00211 {
00212   BBUF_ADD1(reg, opcode);
00213   return 0;
00214 }
00215 
00216 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00217 static int
00218 add_state_check_num(regex_t* reg, int num)
00219 {
00220   StateCheckNumType n = (StateCheckNumType )num;
00221 
00222   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00223   return 0;
00224 }
00225 #endif
00226 
00227 static int
00228 add_rel_addr(regex_t* reg, int addr)
00229 {
00230   RelAddrType ra = (RelAddrType )addr;
00231 
00232   BBUF_ADD(reg, &ra, SIZE_RELADDR);
00233   return 0;
00234 }
00235 
00236 static int
00237 add_abs_addr(regex_t* reg, int addr)
00238 {
00239   AbsAddrType ra = (AbsAddrType )addr;
00240 
00241   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00242   return 0;
00243 }
00244 
00245 static int
00246 add_length(regex_t* reg, OnigDistance len)
00247 {
00248   LengthType l = (LengthType )len;
00249 
00250   BBUF_ADD(reg, &l, SIZE_LENGTH);
00251   return 0;
00252 }
00253 
00254 static int
00255 add_mem_num(regex_t* reg, int num)
00256 {
00257   MemNumType n = (MemNumType )num;
00258 
00259   BBUF_ADD(reg, &n, SIZE_MEMNUM);
00260   return 0;
00261 }
00262 
00263 static int
00264 add_pointer(regex_t* reg, void* addr)
00265 {
00266   PointerType ptr = (PointerType )addr;
00267 
00268   BBUF_ADD(reg, &ptr, SIZE_POINTER);
00269   return 0;
00270 }
00271 
00272 static int
00273 add_option(regex_t* reg, OnigOptionType option)
00274 {
00275   BBUF_ADD(reg, &option, SIZE_OPTION);
00276   return 0;
00277 }
00278 
00279 static int
00280 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00281 {
00282   int r;
00283 
00284   r = add_opcode(reg, opcode);
00285   if (r) return r;
00286   r = add_rel_addr(reg, addr);
00287   return r;
00288 }
00289 
00290 static int
00291 add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
00292 {
00293   BBUF_ADD(reg, bytes, len);
00294   return 0;
00295 }
00296 
00297 static int
00298 add_bitset(regex_t* reg, BitSetRef bs)
00299 {
00300   BBUF_ADD(reg, bs, SIZE_BITSET);
00301   return 0;
00302 }
00303 
00304 static int
00305 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00306 {
00307   int r;
00308 
00309   r = add_opcode(reg, opcode);
00310   if (r) return r;
00311   r = add_option(reg, option);
00312   return r;
00313 }
00314 
00315 static int compile_length_tree(Node* node, regex_t* reg);
00316 static int compile_tree(Node* node, regex_t* reg);
00317 
00318 
00319 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00320    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
00321     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
00322 
00323 static int
00324 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
00325 {
00326   int op;
00327 
00328   if (ignore_case) {
00329     switch (str_len) {
00330     case 1:  op = OP_EXACT1_IC; break;
00331     default: op = OP_EXACTN_IC; break;
00332     }
00333   }
00334   else {
00335     switch (mb_len) {
00336     case 1:
00337       switch (str_len) {
00338       case 1:  op = OP_EXACT1; break;
00339       case 2:  op = OP_EXACT2; break;
00340       case 3:  op = OP_EXACT3; break;
00341       case 4:  op = OP_EXACT4; break;
00342       case 5:  op = OP_EXACT5; break;
00343       default: op = OP_EXACTN; break;
00344       }
00345       break;
00346 
00347     case 2:
00348       switch (str_len) {
00349       case 1:  op = OP_EXACTMB2N1; break;
00350       case 2:  op = OP_EXACTMB2N2; break;
00351       case 3:  op = OP_EXACTMB2N3; break;
00352       default: op = OP_EXACTMB2N;  break;
00353       }
00354       break;
00355 
00356     case 3:
00357       op = OP_EXACTMB3N;
00358       break;
00359 
00360     default:
00361       op = OP_EXACTMBN;
00362       break;
00363     }
00364   }
00365   return op;
00366 }
00367 
00368 static int
00369 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00370 {
00371   int r;
00372   int saved_num_null_check = reg->num_null_check;
00373 
00374   if (empty_info != 0) {
00375     r = add_opcode(reg, OP_NULL_CHECK_START);
00376     if (r) return r;
00377     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
00378     if (r) return r;
00379     reg->num_null_check++;
00380   }
00381 
00382   r = compile_tree(node, reg);
00383   if (r) return r;
00384 
00385   if (empty_info != 0) {
00386     if (empty_info == NQ_TARGET_IS_EMPTY)
00387       r = add_opcode(reg, OP_NULL_CHECK_END);
00388     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00389       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00390     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00391       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00392 
00393     if (r) return r;
00394     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
00395   }
00396   return r;
00397 }
00398 
00399 #ifdef USE_SUBEXP_CALL
00400 static int
00401 compile_call(CallNode* node, regex_t* reg)
00402 {
00403   int r;
00404 
00405   r = add_opcode(reg, OP_CALL);
00406   if (r) return r;
00407   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00408                           node->target);
00409   if (r) return r;
00410   r = add_abs_addr(reg, 0 /*dummy addr.*/);
00411   return r;
00412 }
00413 #endif
00414 
00415 static int
00416 compile_tree_n_times(Node* node, int n, regex_t* reg)
00417 {
00418   int i, r;
00419 
00420   for (i = 0; i < n; i++) {
00421     r = compile_tree(node, reg);
00422     if (r) return r;
00423   }
00424   return 0;
00425 }
00426 
00427 static int
00428 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00429                           regex_t* reg ARG_UNUSED, int ignore_case)
00430 {
00431   int len;
00432   int op = select_str_opcode(mb_len, str_len, ignore_case);
00433 
00434   len = SIZE_OPCODE;
00435 
00436   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
00437   if (IS_NEED_STR_LEN_OP_EXACT(op))
00438     len += SIZE_LENGTH;
00439 
00440   len += mb_len * (int )str_len;
00441   return len;
00442 }
00443 
00444 static int
00445 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
00446                    regex_t* reg, int ignore_case)
00447 {
00448   int op = select_str_opcode(mb_len, str_len, ignore_case);
00449   add_opcode(reg, op);
00450 
00451   if (op == OP_EXACTMBN)
00452     add_length(reg, mb_len);
00453 
00454   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00455     if (op == OP_EXACTN_IC)
00456       add_length(reg, mb_len * str_len);
00457     else
00458       add_length(reg, str_len);
00459   }
00460 
00461   add_bytes(reg, s, mb_len * str_len);
00462   return 0;
00463 }
00464 
00465 
00466 static int
00467 compile_length_string_node(Node* node, regex_t* reg)
00468 {
00469   int rlen, r, len, prev_len, slen, ambig;
00470   OnigEncoding enc = reg->enc;
00471   UChar *p, *prev;
00472   StrNode* sn;
00473 
00474   sn = NSTR(node);
00475   if (sn->end <= sn->s)
00476     return 0;
00477 
00478   ambig = NSTRING_IS_AMBIG(node);
00479 
00480   p = prev = sn->s;
00481   prev_len = enclen(enc, p, sn->end);
00482   p += prev_len;
00483   slen = 1;
00484   rlen = 0;
00485 
00486   for (; p < sn->end; ) {
00487     len = enclen(enc, p, sn->end);
00488     if (len == prev_len) {
00489       slen++;
00490     }
00491     else {
00492       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00493       rlen += r;
00494       prev = p;
00495       slen = 1;
00496       prev_len = len;
00497     }
00498     p += len;
00499   }
00500   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00501   rlen += r;
00502   return rlen;
00503 }
00504 
00505 static int
00506 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00507 {
00508   if (sn->end <= sn->s)
00509     return 0;
00510 
00511   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00512 }
00513 
00514 static int
00515 compile_string_node(Node* node, regex_t* reg)
00516 {
00517   int r, len, prev_len, slen, ambig;
00518   OnigEncoding enc = reg->enc;
00519   UChar *p, *prev, *end;
00520   StrNode* sn;
00521 
00522   sn = NSTR(node);
00523   if (sn->end <= sn->s)
00524     return 0;
00525 
00526   end = sn->end;
00527   ambig = NSTRING_IS_AMBIG(node);
00528 
00529   p = prev = sn->s;
00530   prev_len = enclen(enc, p, end);
00531   p += prev_len;
00532   slen = 1;
00533 
00534   for (; p < end; ) {
00535     len = enclen(enc, p, end);
00536     if (len == prev_len) {
00537       slen++;
00538     }
00539     else {
00540       r = add_compile_string(prev, prev_len, slen, reg, ambig);
00541       if (r) return r;
00542 
00543       prev  = p;
00544       slen  = 1;
00545       prev_len = len;
00546     }
00547 
00548     p += len;
00549   }
00550   return add_compile_string(prev, prev_len, slen, reg, ambig);
00551 }
00552 
00553 static int
00554 compile_string_raw_node(StrNode* sn, regex_t* reg)
00555 {
00556   if (sn->end <= sn->s)
00557     return 0;
00558 
00559   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00560 }
00561 
00562 static int
00563 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00564 {
00565 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00566   add_length(reg, mbuf->used);
00567   return add_bytes(reg, mbuf->p, mbuf->used);
00568 #else
00569   int r, pad_size;
00570   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00571 
00572   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00573   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00574   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00575 
00576   r = add_bytes(reg, mbuf->p, mbuf->used);
00577 
00578   /* padding for return value from compile_length_cclass_node() to be fix. */
00579   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00580   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00581   return r;
00582 #endif
00583 }
00584 
00585 static int
00586 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00587 {
00588   int len;
00589 
00590   if (IS_NCCLASS_SHARE(cc)) {
00591     len = SIZE_OPCODE + SIZE_POINTER;
00592     return len;
00593   }
00594 
00595   if (IS_NULL(cc->mbuf)) {
00596     len = SIZE_OPCODE + SIZE_BITSET;
00597   }
00598   else {
00599     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00600       len = SIZE_OPCODE;
00601     }
00602     else {
00603       len = SIZE_OPCODE + SIZE_BITSET;
00604     }
00605 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00606     len += SIZE_LENGTH + cc->mbuf->used;
00607 #else
00608     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00609 #endif
00610   }
00611 
00612   return len;
00613 }
00614 
00615 static int
00616 compile_cclass_node(CClassNode* cc, regex_t* reg)
00617 {
00618   int r;
00619 
00620   if (IS_NCCLASS_SHARE(cc)) {
00621     add_opcode(reg, OP_CCLASS_NODE);
00622     r = add_pointer(reg, cc);
00623     return r;
00624   }
00625 
00626   if (IS_NULL(cc->mbuf)) {
00627     if (IS_NCCLASS_NOT(cc))
00628       add_opcode(reg, OP_CCLASS_NOT);
00629     else
00630       add_opcode(reg, OP_CCLASS);
00631 
00632     r = add_bitset(reg, cc->bs);
00633   }
00634   else {
00635     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00636       if (IS_NCCLASS_NOT(cc))
00637         add_opcode(reg, OP_CCLASS_MB_NOT);
00638       else
00639         add_opcode(reg, OP_CCLASS_MB);
00640 
00641       r = add_multi_byte_cclass(cc->mbuf, reg);
00642     }
00643     else {
00644       if (IS_NCCLASS_NOT(cc))
00645         add_opcode(reg, OP_CCLASS_MIX_NOT);
00646       else
00647         add_opcode(reg, OP_CCLASS_MIX);
00648 
00649       r = add_bitset(reg, cc->bs);
00650       if (r) return r;
00651       r = add_multi_byte_cclass(cc->mbuf, reg);
00652     }
00653   }
00654 
00655   return r;
00656 }
00657 
00658 static int
00659 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00660 {
00661 #define REPEAT_RANGE_ALLOC  4
00662 
00663   OnigRepeatRange* p;
00664 
00665   if (reg->repeat_range_alloc == 0) {
00666     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00667     CHECK_NULL_RETURN_MEMERR(p);
00668     reg->repeat_range = p;
00669     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00670   }
00671   else if (reg->repeat_range_alloc <= id) {
00672     int n;
00673     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00674     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00675                                     sizeof(OnigRepeatRange) * n);
00676     CHECK_NULL_RETURN_MEMERR(p);
00677     reg->repeat_range = p;
00678     reg->repeat_range_alloc = n;
00679   }
00680   else {
00681     p = reg->repeat_range;
00682   }
00683 
00684   p[id].lower = lower;
00685   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00686   return 0;
00687 }
00688 
00689 static int
00690 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00691                           regex_t* reg)
00692 {
00693   int r;
00694   int num_repeat = reg->num_repeat;
00695 
00696   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00697   if (r) return r;
00698   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00699   reg->num_repeat++;
00700   if (r) return r;
00701   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00702   if (r) return r;
00703 
00704   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00705   if (r) return r;
00706 
00707   r = compile_tree_empty_check(qn->target, reg, empty_info);
00708   if (r) return r;
00709 
00710   if (
00711 #ifdef USE_SUBEXP_CALL
00712       reg->num_call > 0 ||
00713 #endif
00714       IS_QUANTIFIER_IN_REPEAT(qn)) {
00715     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00716   }
00717   else {
00718     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00719   }
00720   if (r) return r;
00721   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00722   return r;
00723 }
00724 
00725 static int
00726 is_anychar_star_quantifier(QtfrNode* qn)
00727 {
00728   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00729       NTYPE(qn->target) == NT_CANY)
00730     return 1;
00731   else
00732     return 0;
00733 }
00734 
00735 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
00736 #define CKN_ON   (ckn > 0)
00737 
00738 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00739 
00740 static int
00741 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00742 {
00743   int len, mod_tlen, cklen;
00744   int ckn;
00745   int infinite = IS_REPEAT_INFINITE(qn->upper);
00746   int empty_info = qn->target_empty_info;
00747   int tlen = compile_length_tree(qn->target, reg);
00748 
00749   if (tlen < 0) return tlen;
00750 
00751   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00752 
00753   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00754 
00755   /* anychar repeat */
00756   if (NTYPE(qn->target) == NT_CANY) {
00757     if (qn->greedy && infinite) {
00758       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00759         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00760       else
00761         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00762     }
00763   }
00764 
00765   if (empty_info != 0)
00766     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00767   else
00768     mod_tlen = tlen;
00769 
00770   if (infinite && qn->lower <= 1) {
00771     if (qn->greedy) {
00772       if (qn->lower == 1)
00773         len = SIZE_OP_JUMP;
00774       else
00775         len = 0;
00776 
00777       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00778     }
00779     else {
00780       if (qn->lower == 0)
00781         len = SIZE_OP_JUMP;
00782       else
00783         len = 0;
00784 
00785       len += mod_tlen + SIZE_OP_PUSH + cklen;
00786     }
00787   }
00788   else if (qn->upper == 0) {
00789     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
00790       len = SIZE_OP_JUMP + tlen;
00791     else
00792       len = 0;
00793   }
00794   else if (qn->upper == 1 && qn->greedy) {
00795     if (qn->lower == 0) {
00796       if (CKN_ON) {
00797         len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00798       }
00799       else {
00800         len = SIZE_OP_PUSH + tlen;
00801       }
00802     }
00803     else {
00804       len = tlen;
00805     }
00806   }
00807   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00808     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00809   }
00810   else {
00811     len = SIZE_OP_REPEAT_INC
00812         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00813     if (CKN_ON)
00814       len += SIZE_OP_STATE_CHECK;
00815   }
00816 
00817   return len;
00818 }
00819 
00820 static int
00821 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00822 {
00823   int r, mod_tlen;
00824   int ckn;
00825   int infinite = IS_REPEAT_INFINITE(qn->upper);
00826   int empty_info = qn->target_empty_info;
00827   int tlen = compile_length_tree(qn->target, reg);
00828 
00829   if (tlen < 0) return tlen;
00830 
00831   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00832 
00833   if (is_anychar_star_quantifier(qn)) {
00834     r = compile_tree_n_times(qn->target, qn->lower, reg);
00835     if (r) return r;
00836     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00837       if (IS_MULTILINE(reg->options))
00838         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00839       else
00840         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00841       if (r) return r;
00842       if (CKN_ON) {
00843         r = add_state_check_num(reg, ckn);
00844         if (r) return r;
00845       }
00846 
00847       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00848     }
00849     else {
00850       if (IS_MULTILINE(reg->options)) {
00851         r = add_opcode(reg, (CKN_ON ?
00852                                OP_STATE_CHECK_ANYCHAR_ML_STAR
00853                              : OP_ANYCHAR_ML_STAR));
00854       }
00855       else {
00856         r = add_opcode(reg, (CKN_ON ?
00857                                OP_STATE_CHECK_ANYCHAR_STAR
00858                              : OP_ANYCHAR_STAR));
00859       }
00860       if (r) return r;
00861       if (CKN_ON)
00862         r = add_state_check_num(reg, ckn);
00863 
00864       return r;
00865     }
00866   }
00867 
00868   if (empty_info != 0)
00869     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00870   else
00871     mod_tlen = tlen;
00872 
00873   if (infinite && qn->lower <= 1) {
00874     if (qn->greedy) {
00875       if (qn->lower == 1) {
00876         r = add_opcode_rel_addr(reg, OP_JUMP,
00877                         (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00878         if (r) return r;
00879       }
00880 
00881       if (CKN_ON) {
00882         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00883         if (r) return r;
00884         r = add_state_check_num(reg, ckn);
00885         if (r) return r;
00886         r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00887       }
00888       else {
00889         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00890       }
00891       if (r) return r;
00892       r = compile_tree_empty_check(qn->target, reg, empty_info);
00893       if (r) return r;
00894       r = add_opcode_rel_addr(reg, OP_JUMP,
00895               -(mod_tlen + (int )SIZE_OP_JUMP
00896                 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00897     }
00898     else {
00899       if (qn->lower == 0) {
00900         r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00901         if (r) return r;
00902       }
00903       r = compile_tree_empty_check(qn->target, reg, empty_info);
00904       if (r) return r;
00905       if (CKN_ON) {
00906         r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00907         if (r) return r;
00908         r = add_state_check_num(reg, ckn);
00909         if (r) return r;
00910         r = add_rel_addr(reg,
00911                  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00912       }
00913       else
00914         r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00915     }
00916   }
00917   else if (qn->upper == 0) {
00918     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
00919       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00920       if (r) return r;
00921       r = compile_tree(qn->target, reg);
00922     }
00923     else
00924       r = 0;
00925   }
00926   else if (qn->upper == 1 && qn->greedy) {
00927     if (qn->lower == 0) {
00928       if (CKN_ON) {
00929         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00930         if (r) return r;
00931         r = add_state_check_num(reg, ckn);
00932         if (r) return r;
00933         r = add_rel_addr(reg, tlen);
00934       }
00935       else {
00936         r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00937       }
00938       if (r) return r;
00939     }
00940 
00941     r = compile_tree(qn->target, reg);
00942   }
00943   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00944     if (CKN_ON) {
00945       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00946       if (r) return r;
00947       r = add_state_check_num(reg, ckn);
00948       if (r) return r;
00949       r = add_rel_addr(reg, SIZE_OP_JUMP);
00950     }
00951     else {
00952       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00953     }
00954 
00955     if (r) return r;
00956     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00957     if (r) return r;
00958     r = compile_tree(qn->target, reg);
00959   }
00960   else {
00961     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00962     if (CKN_ON) {
00963       if (r) return r;
00964       r = add_opcode(reg, OP_STATE_CHECK);
00965       if (r) return r;
00966       r = add_state_check_num(reg, ckn);
00967     }
00968   }
00969   return r;
00970 }
00971 
00972 #else /* USE_COMBINATION_EXPLOSION_CHECK */
00973 
00974 static int
00975 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00976 {
00977   int len, mod_tlen;
00978   int infinite = IS_REPEAT_INFINITE(qn->upper);
00979   int empty_info = qn->target_empty_info;
00980   int tlen = compile_length_tree(qn->target, reg);
00981 
00982   if (tlen < 0) return tlen;
00983 
00984   /* anychar repeat */
00985   if (NTYPE(qn->target) == NT_CANY) {
00986     if (qn->greedy && infinite) {
00987       if (IS_NOT_NULL(qn->next_head_exact))
00988         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00989       else
00990         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00991     }
00992   }
00993 
00994   if (empty_info != 0)
00995     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00996   else
00997     mod_tlen = tlen;
00998 
00999   if (infinite &&
01000       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01001     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01002       len = SIZE_OP_JUMP;
01003     }
01004     else {
01005       len = tlen * qn->lower;
01006     }
01007 
01008     if (qn->greedy) {
01009       if (IS_NOT_NULL(qn->head_exact))
01010         len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01011       else if (IS_NOT_NULL(qn->next_head_exact))
01012         len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01013       else
01014         len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01015     }
01016     else
01017       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01018   }
01019   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01020     len = SIZE_OP_JUMP + tlen;
01021   }
01022   else if (!infinite && qn->greedy &&
01023            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01024                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01025     len = tlen * qn->lower;
01026     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01027   }
01028   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01029     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01030   }
01031   else {
01032     len = SIZE_OP_REPEAT_INC
01033         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01034   }
01035 
01036   return len;
01037 }
01038 
01039 static int
01040 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01041 {
01042   int i, r, mod_tlen;
01043   int infinite = IS_REPEAT_INFINITE(qn->upper);
01044   int empty_info = qn->target_empty_info;
01045   int tlen = compile_length_tree(qn->target, reg);
01046 
01047   if (tlen < 0) return tlen;
01048 
01049   if (is_anychar_star_quantifier(qn)) {
01050     r = compile_tree_n_times(qn->target, qn->lower, reg);
01051     if (r) return r;
01052     if (IS_NOT_NULL(qn->next_head_exact)) {
01053       if (IS_MULTILINE(reg->options))
01054         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01055       else
01056         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01057       if (r) return r;
01058       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01059     }
01060     else {
01061       if (IS_MULTILINE(reg->options))
01062         return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01063       else
01064         return add_opcode(reg, OP_ANYCHAR_STAR);
01065     }
01066   }
01067 
01068   if (empty_info != 0)
01069     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01070   else
01071     mod_tlen = tlen;
01072 
01073   if (infinite &&
01074       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01075     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01076       if (qn->greedy) {
01077         if (IS_NOT_NULL(qn->head_exact))
01078           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01079         else if (IS_NOT_NULL(qn->next_head_exact))
01080           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01081         else
01082           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01083       }
01084       else {
01085         r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01086       }
01087       if (r) return r;
01088     }
01089     else {
01090       r = compile_tree_n_times(qn->target, qn->lower, reg);
01091       if (r) return r;
01092     }
01093 
01094     if (qn->greedy) {
01095       if (IS_NOT_NULL(qn->head_exact)) {
01096         r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01097                              mod_tlen + SIZE_OP_JUMP);
01098         if (r) return r;
01099         add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01100         r = compile_tree_empty_check(qn->target, reg, empty_info);
01101         if (r) return r;
01102         r = add_opcode_rel_addr(reg, OP_JUMP,
01103         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01104       }
01105       else if (IS_NOT_NULL(qn->next_head_exact)) {
01106         r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01107                                 mod_tlen + SIZE_OP_JUMP);
01108         if (r) return r;
01109         add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01110         r = compile_tree_empty_check(qn->target, reg, empty_info);
01111         if (r) return r;
01112         r = add_opcode_rel_addr(reg, OP_JUMP,
01113           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01114       }
01115       else {
01116         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01117         if (r) return r;
01118         r = compile_tree_empty_check(qn->target, reg, empty_info);
01119         if (r) return r;
01120         r = add_opcode_rel_addr(reg, OP_JUMP,
01121                      -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01122       }
01123     }
01124     else {
01125       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01126       if (r) return r;
01127       r = compile_tree_empty_check(qn->target, reg, empty_info);
01128       if (r) return r;
01129       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01130     }
01131   }
01132   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01133     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01134     if (r) return r;
01135     r = compile_tree(qn->target, reg);
01136   }
01137   else if (!infinite && qn->greedy &&
01138            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01139                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01140     int n = qn->upper - qn->lower;
01141 
01142     r = compile_tree_n_times(qn->target, qn->lower, reg);
01143     if (r) return r;
01144 
01145     for (i = 0; i < n; i++) {
01146       r = add_opcode_rel_addr(reg, OP_PUSH,
01147                            (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01148       if (r) return r;
01149       r = compile_tree(qn->target, reg);
01150       if (r) return r;
01151     }
01152   }
01153   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01154     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01155     if (r) return r;
01156     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01157     if (r) return r;
01158     r = compile_tree(qn->target, reg);
01159   }
01160   else {
01161     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01162   }
01163   return r;
01164 }
01165 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
01166 
01167 static int
01168 compile_length_option_node(EncloseNode* node, regex_t* reg)
01169 {
01170   int tlen;
01171   OnigOptionType prev = reg->options;
01172 
01173   reg->options = node->option;
01174   tlen = compile_length_tree(node->target, reg);
01175   reg->options = prev;
01176 
01177   if (tlen < 0) return tlen;
01178 
01179   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01180     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01181            + tlen + SIZE_OP_SET_OPTION;
01182   }
01183   else
01184     return tlen;
01185 }
01186 
01187 static int
01188 compile_option_node(EncloseNode* node, regex_t* reg)
01189 {
01190   int r;
01191   OnigOptionType prev = reg->options;
01192 
01193   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01194     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01195     if (r) return r;
01196     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01197     if (r) return r;
01198     r = add_opcode(reg, OP_FAIL);
01199     if (r) return r;
01200   }
01201 
01202   reg->options = node->option;
01203   r = compile_tree(node->target, reg);
01204   reg->options = prev;
01205 
01206   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01207     if (r) return r;
01208     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01209   }
01210   return r;
01211 }
01212 
01213 static int
01214 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01215 {
01216   int len;
01217   int tlen;
01218 
01219   if (node->type == ENCLOSE_OPTION)
01220     return compile_length_option_node(node, reg);
01221 
01222   if (node->target) {
01223     tlen = compile_length_tree(node->target, reg);
01224     if (tlen < 0) return tlen;
01225   }
01226   else
01227     tlen = 0;
01228 
01229   switch (node->type) {
01230   case ENCLOSE_MEMORY:
01231 #ifdef USE_SUBEXP_CALL
01232     if (IS_ENCLOSE_CALLED(node)) {
01233       len = SIZE_OP_MEMORY_START_PUSH + tlen
01234           + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01235       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01236         len += (IS_ENCLOSE_RECURSION(node)
01237                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01238       else
01239         len += (IS_ENCLOSE_RECURSION(node)
01240                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01241     }
01242     else
01243 #endif
01244     {
01245       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01246         len = SIZE_OP_MEMORY_START_PUSH;
01247       else
01248         len = SIZE_OP_MEMORY_START;
01249 
01250       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01251                      ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01252     }
01253     break;
01254 
01255   case ENCLOSE_STOP_BACKTRACK:
01256     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01257       QtfrNode* qn = NQTFR(node->target);
01258       tlen = compile_length_tree(qn->target, reg);
01259       if (tlen < 0) return tlen;
01260 
01261       len = tlen * qn->lower
01262           + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01263     }
01264     else {
01265       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01266     }
01267     break;
01268 
01269   case ENCLOSE_CONDITION:
01270     len = SIZE_OP_CONDITION;
01271     if (NTYPE(node->target) == NT_ALT) {
01272       Node* x = node->target;
01273 
01274       tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
01275       if (tlen < 0) return tlen;
01276       len += tlen + SIZE_OP_JUMP;
01277       if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
01278       x = NCDR(x);
01279       tlen = compile_length_tree(NCAR(x), reg); /* no-node */
01280       if (tlen < 0) return tlen;
01281       len += tlen;
01282       if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
01283     }
01284     else {
01285       return ONIGERR_PARSER_BUG;
01286     }
01287     break;
01288 
01289   default:
01290     return ONIGERR_TYPE_BUG;
01291     break;
01292   }
01293 
01294   return len;
01295 }
01296 
01297 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01298 
01299 static int
01300 compile_enclose_node(EncloseNode* node, regex_t* reg)
01301 {
01302   int r, len;
01303 
01304   if (node->type == ENCLOSE_OPTION)
01305     return compile_option_node(node, reg);
01306 
01307   switch (node->type) {
01308   case ENCLOSE_MEMORY:
01309 #ifdef USE_SUBEXP_CALL
01310     if (IS_ENCLOSE_CALLED(node)) {
01311       r = add_opcode(reg, OP_CALL);
01312       if (r) return r;
01313       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01314       node->state |= NST_ADDR_FIXED;
01315       r = add_abs_addr(reg, (int )node->call_addr);
01316       if (r) return r;
01317       len = compile_length_tree(node->target, reg);
01318       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01319       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01320         len += (IS_ENCLOSE_RECURSION(node)
01321                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01322       else
01323         len += (IS_ENCLOSE_RECURSION(node)
01324                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01325 
01326       r = add_opcode_rel_addr(reg, OP_JUMP, len);
01327       if (r) return r;
01328     }
01329 #endif
01330     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01331       r = add_opcode(reg, OP_MEMORY_START_PUSH);
01332     else
01333       r = add_opcode(reg, OP_MEMORY_START);
01334     if (r) return r;
01335     r = add_mem_num(reg, node->regnum);
01336     if (r) return r;
01337     r = compile_tree(node->target, reg);
01338     if (r) return r;
01339 #ifdef USE_SUBEXP_CALL
01340     if (IS_ENCLOSE_CALLED(node)) {
01341       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01342         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01343                              ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01344       else
01345         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01346                              ? OP_MEMORY_END_REC : OP_MEMORY_END));
01347 
01348       if (r) return r;
01349       r = add_mem_num(reg, node->regnum);
01350       if (r) return r;
01351       r = add_opcode(reg, OP_RETURN);
01352     }
01353     else
01354 #endif
01355     {
01356       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01357         r = add_opcode(reg, OP_MEMORY_END_PUSH);
01358       else
01359         r = add_opcode(reg, OP_MEMORY_END);
01360       if (r) return r;
01361       r = add_mem_num(reg, node->regnum);
01362     }
01363     break;
01364 
01365   case ENCLOSE_STOP_BACKTRACK:
01366     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01367       QtfrNode* qn = NQTFR(node->target);
01368       r = compile_tree_n_times(qn->target, qn->lower, reg);
01369       if (r) return r;
01370 
01371       len = compile_length_tree(qn->target, reg);
01372       if (len < 0) return len;
01373 
01374       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01375       if (r) return r;
01376       r = compile_tree(qn->target, reg);
01377       if (r) return r;
01378       r = add_opcode(reg, OP_POP);
01379       if (r) return r;
01380       r = add_opcode_rel_addr(reg, OP_JUMP,
01381          -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01382     }
01383     else {
01384       r = add_opcode(reg, OP_PUSH_STOP_BT);
01385       if (r) return r;
01386       r = compile_tree(node->target, reg);
01387       if (r) return r;
01388       r = add_opcode(reg, OP_POP_STOP_BT);
01389     }
01390     break;
01391 
01392   case ENCLOSE_CONDITION:
01393     r = add_opcode(reg, OP_CONDITION);
01394     if (r) return r;
01395     r = add_mem_num(reg, node->regnum);
01396     if (r) return r;
01397 
01398     if (NTYPE(node->target) == NT_ALT) {
01399       Node* x = node->target;
01400       int len2;
01401 
01402       len = compile_length_tree(NCAR(x), reg);  /* yes-node */
01403       if (len < 0) return len;
01404       if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
01405       x = NCDR(x);
01406       len2 = compile_length_tree(NCAR(x), reg); /* no-node */
01407       if (len2 < 0) return len2;
01408       if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
01409 
01410       x = node->target;
01411       r = add_rel_addr(reg, len + SIZE_OP_JUMP);
01412       if (r) return r;
01413       r = compile_tree(NCAR(x), reg);   /* yes-node */
01414       if (r) return r;
01415       r = add_opcode_rel_addr(reg, OP_JUMP, len2);
01416       if (r) return r;
01417       x = NCDR(x);
01418       r = compile_tree(NCAR(x), reg);   /* no-node */
01419     }
01420     else {
01421       return ONIGERR_PARSER_BUG;
01422     }
01423     break;
01424 
01425   default:
01426     return ONIGERR_TYPE_BUG;
01427     break;
01428   }
01429 
01430   return r;
01431 }
01432 
01433 static int
01434 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01435 {
01436   int len;
01437   int tlen = 0;
01438 
01439   if (node->target) {
01440     tlen = compile_length_tree(node->target, reg);
01441     if (tlen < 0) return tlen;
01442   }
01443 
01444   switch (node->type) {
01445   case ANCHOR_PREC_READ:
01446     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01447     break;
01448   case ANCHOR_PREC_READ_NOT:
01449     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01450     break;
01451   case ANCHOR_LOOK_BEHIND:
01452     len = SIZE_OP_LOOK_BEHIND + tlen;
01453     break;
01454   case ANCHOR_LOOK_BEHIND_NOT:
01455     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01456     break;
01457 
01458   default:
01459     len = SIZE_OPCODE;
01460     break;
01461   }
01462 
01463   return len;
01464 }
01465 
01466 static int
01467 compile_anchor_node(AnchorNode* node, regex_t* reg)
01468 {
01469   int r, len;
01470 
01471   switch (node->type) {
01472   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
01473   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
01474   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
01475   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
01476   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
01477   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01478 
01479   /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
01480   case ANCHOR_ANYCHAR_STAR:   r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
01481 
01482   case ANCHOR_WORD_BOUND:
01483     if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BOUND);
01484     else                      r = add_opcode(reg, OP_WORD_BOUND);
01485     break;
01486   case ANCHOR_NOT_WORD_BOUND:
01487     if (node->ascii_range)    r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
01488     else                      r = add_opcode(reg, OP_NOT_WORD_BOUND);
01489     break;
01490 #ifdef USE_WORD_BEGIN_END
01491   case ANCHOR_WORD_BEGIN:
01492     if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
01493     else                      r = add_opcode(reg, OP_WORD_BEGIN);
01494     break;
01495   case ANCHOR_WORD_END:
01496     if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_END);
01497     else                      r = add_opcode(reg, OP_WORD_END);
01498     break;
01499 #endif
01500   case ANCHOR_KEEP:           r = add_opcode(reg, OP_KEEP);           break;
01501 
01502   case ANCHOR_PREC_READ:
01503     r = add_opcode(reg, OP_PUSH_POS);
01504     if (r) return r;
01505     r = compile_tree(node->target, reg);
01506     if (r) return r;
01507     r = add_opcode(reg, OP_POP_POS);
01508     break;
01509 
01510   case ANCHOR_PREC_READ_NOT:
01511     len = compile_length_tree(node->target, reg);
01512     if (len < 0) return len;
01513     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01514     if (r) return r;
01515     r = compile_tree(node->target, reg);
01516     if (r) return r;
01517     r = add_opcode(reg, OP_FAIL_POS);
01518     break;
01519 
01520   case ANCHOR_LOOK_BEHIND:
01521     {
01522       int n;
01523       r = add_opcode(reg, OP_LOOK_BEHIND);
01524       if (r) return r;
01525       if (node->char_len < 0) {
01526         r = get_char_length_tree(node->target, reg, &n);
01527         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01528       }
01529       else
01530         n = node->char_len;
01531       r = add_length(reg, n);
01532       if (r) return r;
01533       r = compile_tree(node->target, reg);
01534     }
01535     break;
01536 
01537   case ANCHOR_LOOK_BEHIND_NOT:
01538     {
01539       int n;
01540       len = compile_length_tree(node->target, reg);
01541       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01542                            len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01543       if (r) return r;
01544       if (node->char_len < 0) {
01545         r = get_char_length_tree(node->target, reg, &n);
01546         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01547       }
01548       else
01549         n = node->char_len;
01550       r = add_length(reg, n);
01551       if (r) return r;
01552       r = compile_tree(node->target, reg);
01553       if (r) return r;
01554       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01555     }
01556     break;
01557 
01558   default:
01559     return ONIGERR_TYPE_BUG;
01560     break;
01561   }
01562 
01563   return r;
01564 }
01565 
01566 static int
01567 compile_length_tree(Node* node, regex_t* reg)
01568 {
01569   int len, type, r;
01570 
01571   type = NTYPE(node);
01572   switch (type) {
01573   case NT_LIST:
01574     len = 0;
01575     do {
01576       r = compile_length_tree(NCAR(node), reg);
01577       if (r < 0) return r;
01578       len += r;
01579     } while (IS_NOT_NULL(node = NCDR(node)));
01580     r = len;
01581     break;
01582 
01583   case NT_ALT:
01584     {
01585       int n;
01586 
01587       n = r = 0;
01588       do {
01589         r += compile_length_tree(NCAR(node), reg);
01590         n++;
01591       } while (IS_NOT_NULL(node = NCDR(node)));
01592       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01593     }
01594     break;
01595 
01596   case NT_STR:
01597     if (NSTRING_IS_RAW(node))
01598       r = compile_length_string_raw_node(NSTR(node), reg);
01599     else
01600       r = compile_length_string_node(node, reg);
01601     break;
01602 
01603   case NT_CCLASS:
01604     r = compile_length_cclass_node(NCCLASS(node), reg);
01605     break;
01606 
01607   case NT_CTYPE:
01608   case NT_CANY:
01609     r = SIZE_OPCODE;
01610     break;
01611 
01612   case NT_BREF:
01613     {
01614       BRefNode* br = NBREF(node);
01615 
01616 #ifdef USE_BACKREF_WITH_LEVEL
01617       if (IS_BACKREF_NEST_LEVEL(br)) {
01618         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01619             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01620       }
01621       else
01622 #endif
01623       if (br->back_num == 1) {
01624         r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01625              ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01626       }
01627       else {
01628         r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01629       }
01630     }
01631     break;
01632 
01633 #ifdef USE_SUBEXP_CALL
01634   case NT_CALL:
01635     r = SIZE_OP_CALL;
01636     break;
01637 #endif
01638 
01639   case NT_QTFR:
01640     r = compile_length_quantifier_node(NQTFR(node), reg);
01641     break;
01642 
01643   case NT_ENCLOSE:
01644     r = compile_length_enclose_node(NENCLOSE(node), reg);
01645     break;
01646 
01647   case NT_ANCHOR:
01648     r = compile_length_anchor_node(NANCHOR(node), reg);
01649     break;
01650 
01651   default:
01652     return ONIGERR_TYPE_BUG;
01653     break;
01654   }
01655 
01656   return r;
01657 }
01658 
01659 static int
01660 compile_tree(Node* node, regex_t* reg)
01661 {
01662   int n, type, len, pos, r = 0;
01663 
01664   type = NTYPE(node);
01665   switch (type) {
01666   case NT_LIST:
01667     do {
01668       r = compile_tree(NCAR(node), reg);
01669     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01670     break;
01671 
01672   case NT_ALT:
01673     {
01674       Node* x = node;
01675       len = 0;
01676       do {
01677         len += compile_length_tree(NCAR(x), reg);
01678         if (NCDR(x) != NULL) {
01679           len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01680         }
01681       } while (IS_NOT_NULL(x = NCDR(x)));
01682       pos = reg->used + len;  /* goal position */
01683 
01684       do {
01685         len = compile_length_tree(NCAR(node), reg);
01686         if (IS_NOT_NULL(NCDR(node))) {
01687           r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01688           if (r) break;
01689         }
01690         r = compile_tree(NCAR(node), reg);
01691         if (r) break;
01692         if (IS_NOT_NULL(NCDR(node))) {
01693           len = pos - (reg->used + SIZE_OP_JUMP);
01694           r = add_opcode_rel_addr(reg, OP_JUMP, len);
01695           if (r) break;
01696         }
01697       } while (IS_NOT_NULL(node = NCDR(node)));
01698     }
01699     break;
01700 
01701   case NT_STR:
01702     if (NSTRING_IS_RAW(node))
01703       r = compile_string_raw_node(NSTR(node), reg);
01704     else
01705       r = compile_string_node(node, reg);
01706     break;
01707 
01708   case NT_CCLASS:
01709     r = compile_cclass_node(NCCLASS(node), reg);
01710     break;
01711 
01712   case NT_CTYPE:
01713     {
01714       int op;
01715 
01716       switch (NCTYPE(node)->ctype) {
01717       case ONIGENC_CTYPE_WORD:
01718         if (NCTYPE(node)->ascii_range != 0) {
01719           if (NCTYPE(node)->not != 0)  op = OP_NOT_ASCII_WORD;
01720           else                         op = OP_ASCII_WORD;
01721         }
01722         else {
01723           if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
01724           else                         op = OP_WORD;
01725         }
01726         break;
01727       default:
01728         return ONIGERR_TYPE_BUG;
01729         break;
01730       }
01731       r = add_opcode(reg, op);
01732     }
01733     break;
01734 
01735   case NT_CANY:
01736     if (IS_MULTILINE(reg->options))
01737       r = add_opcode(reg, OP_ANYCHAR_ML);
01738     else
01739       r = add_opcode(reg, OP_ANYCHAR);
01740     break;
01741 
01742   case NT_BREF:
01743     {
01744       BRefNode* br = NBREF(node);
01745 
01746 #ifdef USE_BACKREF_WITH_LEVEL
01747       if (IS_BACKREF_NEST_LEVEL(br)) {
01748         r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01749         if (r) return r;
01750         r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01751         if (r) return r;
01752         r = add_length(reg, br->nest_level);
01753         if (r) return r;
01754 
01755         goto add_bacref_mems;
01756       }
01757       else
01758 #endif
01759       if (br->back_num == 1) {
01760         n = br->back_static[0];
01761         if (IS_IGNORECASE(reg->options)) {
01762           r = add_opcode(reg, OP_BACKREFN_IC);
01763           if (r) return r;
01764           r = add_mem_num(reg, n);
01765         }
01766         else {
01767           switch (n) {
01768           case 1:  r = add_opcode(reg, OP_BACKREF1); break;
01769           case 2:  r = add_opcode(reg, OP_BACKREF2); break;
01770           default:
01771             r = add_opcode(reg, OP_BACKREFN);
01772             if (r) return r;
01773             r = add_mem_num(reg, n);
01774             break;
01775           }
01776         }
01777       }
01778       else {
01779         int i;
01780         int* p;
01781 
01782         if (IS_IGNORECASE(reg->options)) {
01783           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01784         }
01785         else {
01786           r = add_opcode(reg, OP_BACKREF_MULTI);
01787         }
01788         if (r) return r;
01789 
01790 #ifdef USE_BACKREF_WITH_LEVEL
01791       add_bacref_mems:
01792 #endif
01793         r = add_length(reg, br->back_num);
01794         if (r) return r;
01795         p = BACKREFS_P(br);
01796         for (i = br->back_num - 1; i >= 0; i--) {
01797           r = add_mem_num(reg, p[i]);
01798           if (r) return r;
01799         }
01800       }
01801     }
01802     break;
01803 
01804 #ifdef USE_SUBEXP_CALL
01805   case NT_CALL:
01806     r = compile_call(NCALL(node), reg);
01807     break;
01808 #endif
01809 
01810   case NT_QTFR:
01811     r = compile_quantifier_node(NQTFR(node), reg);
01812     break;
01813 
01814   case NT_ENCLOSE:
01815     r = compile_enclose_node(NENCLOSE(node), reg);
01816     break;
01817 
01818   case NT_ANCHOR:
01819     r = compile_anchor_node(NANCHOR(node), reg);
01820     break;
01821 
01822   default:
01823 #ifdef ONIG_DEBUG
01824     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01825 #endif
01826     break;
01827   }
01828 
01829   return r;
01830 }
01831 
01832 #ifdef USE_NAMED_GROUP
01833 
01834 static int
01835 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01836 {
01837   int r = 0;
01838   Node* node = *plink;
01839 
01840   switch (NTYPE(node)) {
01841   case NT_LIST:
01842   case NT_ALT:
01843     do {
01844       r = noname_disable_map(&(NCAR(node)), map, counter);
01845     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01846     break;
01847 
01848   case NT_QTFR:
01849     {
01850       Node** ptarget = &(NQTFR(node)->target);
01851       Node*  old = *ptarget;
01852       r = noname_disable_map(ptarget, map, counter);
01853       if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01854         onig_reduce_nested_quantifier(node, *ptarget);
01855       }
01856     }
01857     break;
01858 
01859   case NT_ENCLOSE:
01860     {
01861       EncloseNode* en = NENCLOSE(node);
01862       if (en->type == ENCLOSE_MEMORY) {
01863         if (IS_ENCLOSE_NAMED_GROUP(en)) {
01864           (*counter)++;
01865           map[en->regnum].new_val = *counter;
01866           en->regnum = *counter;
01867           r = noname_disable_map(&(en->target), map, counter);
01868         }
01869         else {
01870           *plink = en->target;
01871           en->target = NULL_NODE;
01872           onig_node_free(node);
01873           r = noname_disable_map(plink, map, counter);
01874         }
01875       }
01876       else
01877         r = noname_disable_map(&(en->target), map, counter);
01878     }
01879     break;
01880 
01881   case NT_ANCHOR:
01882     {
01883       AnchorNode* an = NANCHOR(node);
01884       switch (an->type) {
01885       case ANCHOR_PREC_READ:
01886       case ANCHOR_PREC_READ_NOT:
01887       case ANCHOR_LOOK_BEHIND:
01888       case ANCHOR_LOOK_BEHIND_NOT:
01889         r = noname_disable_map(&(an->target), map, counter);
01890         break;
01891       }
01892     }
01893     break;
01894 
01895   default:
01896     break;
01897   }
01898 
01899   return r;
01900 }
01901 
01902 static int
01903 renumber_node_backref(Node* node, GroupNumRemap* map)
01904 {
01905   int i, pos, n, old_num;
01906   int *backs;
01907   BRefNode* bn = NBREF(node);
01908 
01909   if (! IS_BACKREF_NAME_REF(bn))
01910     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01911 
01912   old_num = bn->back_num;
01913   if (IS_NULL(bn->back_dynamic))
01914     backs = bn->back_static;
01915   else
01916     backs = bn->back_dynamic;
01917 
01918   for (i = 0, pos = 0; i < old_num; i++) {
01919     n = map[backs[i]].new_val;
01920     if (n > 0) {
01921       backs[pos] = n;
01922       pos++;
01923     }
01924   }
01925 
01926   bn->back_num = pos;
01927   return 0;
01928 }
01929 
01930 static int
01931 renumber_by_map(Node* node, GroupNumRemap* map)
01932 {
01933   int r = 0;
01934 
01935   switch (NTYPE(node)) {
01936   case NT_LIST:
01937   case NT_ALT:
01938     do {
01939       r = renumber_by_map(NCAR(node), map);
01940     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01941     break;
01942   case NT_QTFR:
01943     r = renumber_by_map(NQTFR(node)->target, map);
01944     break;
01945   case NT_ENCLOSE:
01946     r = renumber_by_map(NENCLOSE(node)->target, map);
01947     break;
01948 
01949   case NT_BREF:
01950     r = renumber_node_backref(node, map);
01951     break;
01952 
01953   case NT_ANCHOR:
01954     {
01955       AnchorNode* an = NANCHOR(node);
01956       switch (an->type) {
01957       case ANCHOR_PREC_READ:
01958       case ANCHOR_PREC_READ_NOT:
01959       case ANCHOR_LOOK_BEHIND:
01960       case ANCHOR_LOOK_BEHIND_NOT:
01961         r = renumber_by_map(an->target, map);
01962         break;
01963       }
01964     }
01965     break;
01966 
01967   default:
01968     break;
01969   }
01970 
01971   return r;
01972 }
01973 
01974 static int
01975 numbered_ref_check(Node* node)
01976 {
01977   int r = 0;
01978 
01979   switch (NTYPE(node)) {
01980   case NT_LIST:
01981   case NT_ALT:
01982     do {
01983       r = numbered_ref_check(NCAR(node));
01984     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01985     break;
01986   case NT_QTFR:
01987     r = numbered_ref_check(NQTFR(node)->target);
01988     break;
01989   case NT_ENCLOSE:
01990     r = numbered_ref_check(NENCLOSE(node)->target);
01991     break;
01992 
01993   case NT_BREF:
01994     if (! IS_BACKREF_NAME_REF(NBREF(node)))
01995       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01996     break;
01997 
01998   default:
01999     break;
02000   }
02001 
02002   return r;
02003 }
02004 
02005 static int
02006 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
02007 {
02008   int r, i, pos, counter;
02009   BitStatusType loc;
02010   GroupNumRemap* map;
02011 
02012   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
02013   CHECK_NULL_RETURN_MEMERR(map);
02014   for (i = 1; i <= env->num_mem; i++) {
02015     map[i].new_val = 0;
02016   }
02017   counter = 0;
02018   r = noname_disable_map(root, map, &counter);
02019   if (r != 0) return r;
02020 
02021   r = renumber_by_map(*root, map);
02022   if (r != 0) return r;
02023 
02024   for (i = 1, pos = 1; i <= env->num_mem; i++) {
02025     if (map[i].new_val > 0) {
02026       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
02027       pos++;
02028     }
02029   }
02030 
02031   loc = env->capture_history;
02032   BIT_STATUS_CLEAR(env->capture_history);
02033   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
02034     if (BIT_STATUS_AT(loc, i)) {
02035       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
02036     }
02037   }
02038 
02039   env->num_mem = env->num_named;
02040   reg->num_mem = env->num_named;
02041 
02042   return onig_renumber_name_table(reg, map);
02043 }
02044 #endif /* USE_NAMED_GROUP */
02045 
02046 #ifdef USE_SUBEXP_CALL
02047 static int
02048 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
02049 {
02050   int i, offset;
02051   EncloseNode* en;
02052   AbsAddrType addr;
02053 
02054   for (i = 0; i < uslist->num; i++) {
02055     en = NENCLOSE(uslist->us[i].target);
02056     if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
02057     addr = en->call_addr;
02058     offset = uslist->us[i].offset;
02059 
02060     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
02061   }
02062   return 0;
02063 }
02064 #endif
02065 
02066 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
02067 static int
02068 quantifiers_memory_node_info(Node* node)
02069 {
02070   int r = 0;
02071 
02072   switch (NTYPE(node)) {
02073   case NT_LIST:
02074   case NT_ALT:
02075     {
02076       int v;
02077       do {
02078         v = quantifiers_memory_node_info(NCAR(node));
02079         if (v > r) r = v;
02080       } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
02081     }
02082     break;
02083 
02084 #ifdef USE_SUBEXP_CALL
02085   case NT_CALL:
02086     if (IS_CALL_RECURSION(NCALL(node))) {
02087       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
02088     }
02089     else
02090       r = quantifiers_memory_node_info(NCALL(node)->target);
02091     break;
02092 #endif
02093 
02094   case NT_QTFR:
02095     {
02096       QtfrNode* qn = NQTFR(node);
02097       if (qn->upper != 0) {
02098         r = quantifiers_memory_node_info(qn->target);
02099       }
02100     }
02101     break;
02102 
02103   case NT_ENCLOSE:
02104     {
02105       EncloseNode* en = NENCLOSE(node);
02106       switch (en->type) {
02107       case ENCLOSE_MEMORY:
02108         return NQ_TARGET_IS_EMPTY_MEM;
02109         break;
02110 
02111       case ENCLOSE_OPTION:
02112       case ENCLOSE_STOP_BACKTRACK:
02113       case ENCLOSE_CONDITION:
02114         r = quantifiers_memory_node_info(en->target);
02115         break;
02116       default:
02117         break;
02118       }
02119     }
02120     break;
02121 
02122   case NT_BREF:
02123   case NT_STR:
02124   case NT_CTYPE:
02125   case NT_CCLASS:
02126   case NT_CANY:
02127   case NT_ANCHOR:
02128   default:
02129     break;
02130   }
02131 
02132   return r;
02133 }
02134 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
02135 
02136 static int
02137 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02138 {
02139   OnigDistance tmin;
02140   int r = 0;
02141 
02142   *min = 0;
02143   switch (NTYPE(node)) {
02144   case NT_BREF:
02145     {
02146       int i;
02147       int* backs;
02148       Node** nodes = SCANENV_MEM_NODES(env);
02149       BRefNode* br = NBREF(node);
02150       if (br->state & NST_RECURSION) break;
02151 
02152       backs = BACKREFS_P(br);
02153       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02154       r = get_min_match_length(nodes[backs[0]], min, env);
02155       if (r != 0) break;
02156       for (i = 1; i < br->back_num; i++) {
02157         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02158         r = get_min_match_length(nodes[backs[i]], &tmin, env);
02159         if (r != 0) break;
02160         if (*min > tmin) *min = tmin;
02161       }
02162     }
02163     break;
02164 
02165 #ifdef USE_SUBEXP_CALL
02166   case NT_CALL:
02167     if (IS_CALL_RECURSION(NCALL(node))) {
02168       EncloseNode* en = NENCLOSE(NCALL(node)->target);
02169       if (IS_ENCLOSE_MIN_FIXED(en))
02170         *min = en->min_len;
02171     }
02172     else
02173       r = get_min_match_length(NCALL(node)->target, min, env);
02174     break;
02175 #endif
02176 
02177   case NT_LIST:
02178     do {
02179       r = get_min_match_length(NCAR(node), &tmin, env);
02180       if (r == 0) *min += tmin;
02181     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02182     break;
02183 
02184   case NT_ALT:
02185     {
02186       Node *x, *y;
02187       y = node;
02188       do {
02189         x = NCAR(y);
02190         r = get_min_match_length(x, &tmin, env);
02191         if (r != 0) break;
02192         if (y == node) *min = tmin;
02193         else if (*min > tmin) *min = tmin;
02194       } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02195     }
02196     break;
02197 
02198   case NT_STR:
02199     {
02200       StrNode* sn = NSTR(node);
02201       *min = sn->end - sn->s;
02202     }
02203     break;
02204 
02205   case NT_CTYPE:
02206     *min = 1;
02207     break;
02208 
02209   case NT_CCLASS:
02210   case NT_CANY:
02211     *min = 1;
02212     break;
02213 
02214   case NT_QTFR:
02215     {
02216       QtfrNode* qn = NQTFR(node);
02217 
02218       if (qn->lower > 0) {
02219         r = get_min_match_length(qn->target, min, env);
02220         if (r == 0)
02221           *min = distance_multiply(*min, qn->lower);
02222       }
02223     }
02224     break;
02225 
02226   case NT_ENCLOSE:
02227     {
02228       EncloseNode* en = NENCLOSE(node);
02229       switch (en->type) {
02230       case ENCLOSE_MEMORY:
02231 #ifdef USE_SUBEXP_CALL
02232         if (IS_ENCLOSE_MIN_FIXED(en))
02233           *min = en->min_len;
02234         else {
02235           r = get_min_match_length(en->target, min, env);
02236           if (r == 0) {
02237             en->min_len = *min;
02238             SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02239           }
02240         }
02241         break;
02242 #endif
02243       case ENCLOSE_OPTION:
02244       case ENCLOSE_STOP_BACKTRACK:
02245       case ENCLOSE_CONDITION:
02246         r = get_min_match_length(en->target, min, env);
02247         break;
02248       }
02249     }
02250     break;
02251 
02252   case NT_ANCHOR:
02253   default:
02254     break;
02255   }
02256 
02257   return r;
02258 }
02259 
02260 static int
02261 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02262 {
02263   OnigDistance tmax;
02264   int r = 0;
02265 
02266   *max = 0;
02267   switch (NTYPE(node)) {
02268   case NT_LIST:
02269     do {
02270       r = get_max_match_length(NCAR(node), &tmax, env);
02271       if (r == 0)
02272         *max = distance_add(*max, tmax);
02273     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02274     break;
02275 
02276   case NT_ALT:
02277     do {
02278       r = get_max_match_length(NCAR(node), &tmax, env);
02279       if (r == 0 && *max < tmax) *max = tmax;
02280     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02281     break;
02282 
02283   case NT_STR:
02284     {
02285       StrNode* sn = NSTR(node);
02286       *max = sn->end - sn->s;
02287     }
02288     break;
02289 
02290   case NT_CTYPE:
02291     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02292     break;
02293 
02294   case NT_CCLASS:
02295   case NT_CANY:
02296     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02297     break;
02298 
02299   case NT_BREF:
02300     {
02301       int i;
02302       int* backs;
02303       Node** nodes = SCANENV_MEM_NODES(env);
02304       BRefNode* br = NBREF(node);
02305       if (br->state & NST_RECURSION) {
02306         *max = ONIG_INFINITE_DISTANCE;
02307         break;
02308       }
02309       backs = BACKREFS_P(br);
02310       for (i = 0; i < br->back_num; i++) {
02311         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02312         r = get_max_match_length(nodes[backs[i]], &tmax, env);
02313         if (r != 0) break;
02314         if (*max < tmax) *max = tmax;
02315       }
02316     }
02317     break;
02318 
02319 #ifdef USE_SUBEXP_CALL
02320   case NT_CALL:
02321     if (! IS_CALL_RECURSION(NCALL(node)))
02322       r = get_max_match_length(NCALL(node)->target, max, env);
02323     else
02324       *max = ONIG_INFINITE_DISTANCE;
02325     break;
02326 #endif
02327 
02328   case NT_QTFR:
02329     {
02330       QtfrNode* qn = NQTFR(node);
02331 
02332       if (qn->upper != 0) {
02333         r = get_max_match_length(qn->target, max, env);
02334         if (r == 0 && *max != 0) {
02335           if (! IS_REPEAT_INFINITE(qn->upper))
02336             *max = distance_multiply(*max, qn->upper);
02337           else
02338             *max = ONIG_INFINITE_DISTANCE;
02339         }
02340       }
02341     }
02342     break;
02343 
02344   case NT_ENCLOSE:
02345     {
02346       EncloseNode* en = NENCLOSE(node);
02347       switch (en->type) {
02348       case ENCLOSE_MEMORY:
02349 #ifdef USE_SUBEXP_CALL
02350         if (IS_ENCLOSE_MAX_FIXED(en))
02351           *max = en->max_len;
02352         else {
02353           r = get_max_match_length(en->target, max, env);
02354           if (r == 0) {
02355             en->max_len = *max;
02356             SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02357           }
02358         }
02359         break;
02360 #endif
02361       case ENCLOSE_OPTION:
02362       case ENCLOSE_STOP_BACKTRACK:
02363       case ENCLOSE_CONDITION:
02364         r = get_max_match_length(en->target, max, env);
02365         break;
02366       }
02367     }
02368     break;
02369 
02370   case NT_ANCHOR:
02371   default:
02372     break;
02373   }
02374 
02375   return r;
02376 }
02377 
02378 #define GET_CHAR_LEN_VARLEN           -1
02379 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
02380 
02381 /* fixed size pattern node only */
02382 static int
02383 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02384 {
02385   int tlen;
02386   int r = 0;
02387 
02388   level++;
02389   *len = 0;
02390   switch (NTYPE(node)) {
02391   case NT_LIST:
02392     do {
02393       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02394       if (r == 0)
02395         *len = (int )distance_add(*len, tlen);
02396     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02397     break;
02398 
02399   case NT_ALT:
02400     {
02401       int tlen2;
02402       int varlen = 0;
02403 
02404       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02405       while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02406         r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02407         if (r == 0) {
02408           if (tlen != tlen2)
02409             varlen = 1;
02410         }
02411       }
02412       if (r == 0) {
02413         if (varlen != 0) {
02414           if (level == 1)
02415             r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02416           else
02417             r = GET_CHAR_LEN_VARLEN;
02418         }
02419         else
02420           *len = tlen;
02421       }
02422     }
02423     break;
02424 
02425   case NT_STR:
02426     {
02427       StrNode* sn = NSTR(node);
02428       UChar *s = sn->s;
02429       while (s < sn->end) {
02430         s += enclen(reg->enc, s, sn->end);
02431         (*len)++;
02432       }
02433     }
02434     break;
02435 
02436   case NT_QTFR:
02437     {
02438       QtfrNode* qn = NQTFR(node);
02439       if (qn->lower == qn->upper) {
02440         r = get_char_length_tree1(qn->target, reg, &tlen, level);
02441         if (r == 0)
02442           *len = (int )distance_multiply(tlen, qn->lower);
02443       }
02444       else
02445         r = GET_CHAR_LEN_VARLEN;
02446     }
02447     break;
02448 
02449 #ifdef USE_SUBEXP_CALL
02450   case NT_CALL:
02451     if (! IS_CALL_RECURSION(NCALL(node)))
02452       r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02453     else
02454       r = GET_CHAR_LEN_VARLEN;
02455     break;
02456 #endif
02457 
02458   case NT_CTYPE:
02459     *len = 1;
02460     break;
02461 
02462   case NT_CCLASS:
02463   case NT_CANY:
02464     *len = 1;
02465     break;
02466 
02467   case NT_ENCLOSE:
02468     {
02469       EncloseNode* en = NENCLOSE(node);
02470       switch (en->type) {
02471       case ENCLOSE_MEMORY:
02472 #ifdef USE_SUBEXP_CALL
02473         if (IS_ENCLOSE_CLEN_FIXED(en))
02474           *len = en->char_len;
02475         else {
02476           r = get_char_length_tree1(en->target, reg, len, level);
02477           if (r == 0) {
02478             en->char_len = *len;
02479             SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02480           }
02481         }
02482         break;
02483 #endif
02484       case ENCLOSE_OPTION:
02485       case ENCLOSE_STOP_BACKTRACK:
02486       case ENCLOSE_CONDITION:
02487         r = get_char_length_tree1(en->target, reg, len, level);
02488         break;
02489       default:
02490         break;
02491       }
02492     }
02493     break;
02494 
02495   case NT_ANCHOR:
02496     break;
02497 
02498   default:
02499     r = GET_CHAR_LEN_VARLEN;
02500     break;
02501   }
02502 
02503   return r;
02504 }
02505 
02506 static int
02507 get_char_length_tree(Node* node, regex_t* reg, int* len)
02508 {
02509   return get_char_length_tree1(node, reg, len, 0);
02510 }
02511 
02512 /* x is not included y ==>  1 : 0 */
02513 static int
02514 is_not_included(Node* x, Node* y, regex_t* reg)
02515 {
02516   int i;
02517   OnigDistance len;
02518   OnigCodePoint code;
02519   UChar *p;
02520   int ytype;
02521 
02522  retry:
02523   ytype = NTYPE(y);
02524   switch (NTYPE(x)) {
02525   case NT_CTYPE:
02526     {
02527       switch (ytype) {
02528       case NT_CTYPE:
02529         if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02530             NCTYPE(y)->not   != NCTYPE(x)->not &&
02531             NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
02532           return 1;
02533         else
02534           return 0;
02535         break;
02536 
02537       case NT_CCLASS:
02538       swap:
02539         {
02540           Node* tmp;
02541           tmp = x; x = y; y = tmp;
02542           goto retry;
02543         }
02544         break;
02545 
02546       case NT_STR:
02547         goto swap;
02548         break;
02549 
02550       default:
02551         break;
02552       }
02553     }
02554     break;
02555 
02556   case NT_CCLASS:
02557     {
02558       CClassNode* xc = NCCLASS(x);
02559       switch (ytype) {
02560       case NT_CTYPE:
02561         switch (NCTYPE(y)->ctype) {
02562         case ONIGENC_CTYPE_WORD:
02563           if (NCTYPE(y)->not == 0) {
02564             if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02565               for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02566                 if (BITSET_AT(xc->bs, i)) {
02567                   if (NCTYPE(y)->ascii_range) {
02568                     if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02569                   }
02570                   else {
02571                     if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
02572                   }
02573                 }
02574               }
02575               return 1;
02576             }
02577             return 0;
02578           }
02579           else {
02580             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02581               int is_word;
02582               if (NCTYPE(y)->ascii_range)
02583                 is_word = IS_CODE_SB_WORD(reg->enc, i);
02584               else
02585                 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
02586               if (! is_word) {
02587                 if (!IS_NCCLASS_NOT(xc)) {
02588                   if (BITSET_AT(xc->bs, i))
02589                     return 0;
02590                 }
02591                 else {
02592                   if (! BITSET_AT(xc->bs, i))
02593                     return 0;
02594                 }
02595               }
02596             }
02597             return 1;
02598           }
02599           break;
02600 
02601         default:
02602           break;
02603         }
02604         break;
02605 
02606       case NT_CCLASS:
02607         {
02608           int v;
02609           CClassNode* yc = NCCLASS(y);
02610 
02611           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02612             v = BITSET_AT(xc->bs, i);
02613             if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02614                 (v == 0 && IS_NCCLASS_NOT(xc))) {
02615               v = BITSET_AT(yc->bs, i);
02616               if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02617                   (v == 0 && IS_NCCLASS_NOT(yc)))
02618                 return 0;
02619             }
02620           }
02621           if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02622               (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02623             return 1;
02624           return 0;
02625         }
02626         break;
02627 
02628       case NT_STR:
02629         goto swap;
02630         break;
02631 
02632       default:
02633         break;
02634       }
02635     }
02636     break;
02637 
02638   case NT_STR:
02639     {
02640       StrNode* xs = NSTR(x);
02641       if (NSTRING_LEN(x) == 0)
02642         break;
02643 
02644       switch (ytype) {
02645       case NT_CTYPE:
02646         switch (NCTYPE(y)->ctype) {
02647         case ONIGENC_CTYPE_WORD:
02648           if (NCTYPE(y)->ascii_range) {
02649             if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
02650               return NCTYPE(y)->not;
02651             else
02652               return !(NCTYPE(y)->not);
02653           }
02654           else {
02655             if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02656               return NCTYPE(y)->not;
02657             else
02658               return !(NCTYPE(y)->not);
02659           }
02660           break;
02661         default:
02662           break;
02663         }
02664         break;
02665 
02666       case NT_CCLASS:
02667         {
02668           CClassNode* cc = NCCLASS(y);
02669 
02670           code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02671                                      xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02672           return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02673         }
02674         break;
02675 
02676       case NT_STR:
02677         {
02678           UChar *q;
02679           StrNode* ys = NSTR(y);
02680           len = NSTRING_LEN(x);
02681           if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02682           if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02683             /* tiny version */
02684             return 0;
02685           }
02686           else {
02687             for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
02688               if (*p != *q) return 1;
02689             }
02690           }
02691         }
02692         break;
02693 
02694       default:
02695         break;
02696       }
02697     }
02698     break;
02699 
02700   default:
02701     break;
02702   }
02703 
02704   return 0;
02705 }
02706 
02707 static Node*
02708 get_head_value_node(Node* node, int exact, regex_t* reg)
02709 {
02710   Node* n = NULL_NODE;
02711 
02712   switch (NTYPE(node)) {
02713   case NT_BREF:
02714   case NT_ALT:
02715   case NT_CANY:
02716 #ifdef USE_SUBEXP_CALL
02717   case NT_CALL:
02718 #endif
02719     break;
02720 
02721   case NT_CTYPE:
02722   case NT_CCLASS:
02723     if (exact == 0) {
02724       n = node;
02725     }
02726     break;
02727 
02728   case NT_LIST:
02729     n = get_head_value_node(NCAR(node), exact, reg);
02730     break;
02731 
02732   case NT_STR:
02733     {
02734       StrNode* sn = NSTR(node);
02735 
02736       if (sn->end <= sn->s)
02737         break;
02738 
02739       if (exact != 0 &&
02740           !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02741       }
02742       else {
02743         n = node;
02744       }
02745     }
02746     break;
02747 
02748   case NT_QTFR:
02749     {
02750       QtfrNode* qn = NQTFR(node);
02751       if (qn->lower > 0) {
02752         if (IS_NOT_NULL(qn->head_exact))
02753           n = qn->head_exact;
02754         else
02755           n = get_head_value_node(qn->target, exact, reg);
02756       }
02757     }
02758     break;
02759 
02760   case NT_ENCLOSE:
02761     {
02762       EncloseNode* en = NENCLOSE(node);
02763       switch (en->type) {
02764       case ENCLOSE_OPTION:
02765         {
02766           OnigOptionType options = reg->options;
02767 
02768           reg->options = NENCLOSE(node)->option;
02769           n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02770           reg->options = options;
02771         }
02772         break;
02773 
02774       case ENCLOSE_MEMORY:
02775       case ENCLOSE_STOP_BACKTRACK:
02776       case ENCLOSE_CONDITION:
02777         n = get_head_value_node(en->target, exact, reg);
02778         break;
02779       }
02780     }
02781     break;
02782 
02783   case NT_ANCHOR:
02784     if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02785       n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02786     break;
02787 
02788   default:
02789     break;
02790   }
02791 
02792   return n;
02793 }
02794 
02795 static int
02796 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02797 {
02798   int type, r = 0;
02799 
02800   type = NTYPE(node);
02801   if ((NTYPE2BIT(type) & type_mask) == 0)
02802     return 1;
02803 
02804   switch (type) {
02805   case NT_LIST:
02806   case NT_ALT:
02807     do {
02808       r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02809                           anchor_mask);
02810     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02811     break;
02812 
02813   case NT_QTFR:
02814     r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02815                         anchor_mask);
02816     break;
02817 
02818   case NT_ENCLOSE:
02819     {
02820       EncloseNode* en = NENCLOSE(node);
02821       if ((en->type & enclose_mask) == 0)
02822         return 1;
02823 
02824       r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02825     }
02826     break;
02827 
02828   case NT_ANCHOR:
02829     type = NANCHOR(node)->type;
02830     if ((type & anchor_mask) == 0)
02831       return 1;
02832 
02833     if (NANCHOR(node)->target)
02834       r = check_type_tree(NANCHOR(node)->target,
02835                           type_mask, enclose_mask, anchor_mask);
02836     break;
02837 
02838   default:
02839     break;
02840   }
02841   return r;
02842 }
02843 
02844 #ifdef USE_SUBEXP_CALL
02845 
02846 #define RECURSION_EXIST       1
02847 #define RECURSION_INFINITE    2
02848 
02849 static int
02850 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02851 {
02852   int type;
02853   int r = 0;
02854 
02855   type = NTYPE(node);
02856   switch (type) {
02857   case NT_LIST:
02858     {
02859       Node *x;
02860       OnigDistance min;
02861       int ret;
02862 
02863       x = node;
02864       do {
02865         ret = subexp_inf_recursive_check(NCAR(x), env, head);
02866         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02867         r |= ret;
02868         if (head) {
02869           ret = get_min_match_length(NCAR(x), &min, env);
02870           if (ret != 0) return ret;
02871           if (min != 0) head = 0;
02872         }
02873       } while (IS_NOT_NULL(x = NCDR(x)));
02874     }
02875     break;
02876 
02877   case NT_ALT:
02878     {
02879       int ret;
02880       r = RECURSION_EXIST;
02881       do {
02882         ret = subexp_inf_recursive_check(NCAR(node), env, head);
02883         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02884         r &= ret;
02885       } while (IS_NOT_NULL(node = NCDR(node)));
02886     }
02887     break;
02888 
02889   case NT_QTFR:
02890     r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02891     if (r == RECURSION_EXIST) {
02892       if (NQTFR(node)->lower == 0) r = 0;
02893     }
02894     break;
02895 
02896   case NT_ANCHOR:
02897     {
02898       AnchorNode* an = NANCHOR(node);
02899       switch (an->type) {
02900       case ANCHOR_PREC_READ:
02901       case ANCHOR_PREC_READ_NOT:
02902       case ANCHOR_LOOK_BEHIND:
02903       case ANCHOR_LOOK_BEHIND_NOT:
02904         r = subexp_inf_recursive_check(an->target, env, head);
02905         break;
02906       }
02907     }
02908     break;
02909 
02910   case NT_CALL:
02911     r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02912     break;
02913 
02914   case NT_ENCLOSE:
02915     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02916       return 0;
02917     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02918       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02919     else {
02920       SET_ENCLOSE_STATUS(node, NST_MARK2);
02921       r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02922       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02923     }
02924     break;
02925 
02926   default:
02927     break;
02928   }
02929 
02930   return r;
02931 }
02932 
02933 static int
02934 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02935 {
02936   int type;
02937   int r = 0;
02938 
02939   type = NTYPE(node);
02940   switch (type) {
02941   case NT_LIST:
02942   case NT_ALT:
02943     do {
02944       r = subexp_inf_recursive_check_trav(NCAR(node), env);
02945     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02946     break;
02947 
02948   case NT_QTFR:
02949     r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02950     break;
02951 
02952   case NT_ANCHOR:
02953     {
02954       AnchorNode* an = NANCHOR(node);
02955       switch (an->type) {
02956       case ANCHOR_PREC_READ:
02957       case ANCHOR_PREC_READ_NOT:
02958       case ANCHOR_LOOK_BEHIND:
02959       case ANCHOR_LOOK_BEHIND_NOT:
02960         r = subexp_inf_recursive_check_trav(an->target, env);
02961         break;
02962       }
02963     }
02964     break;
02965 
02966   case NT_ENCLOSE:
02967     {
02968       EncloseNode* en = NENCLOSE(node);
02969 
02970       if (IS_ENCLOSE_RECURSION(en)) {
02971         SET_ENCLOSE_STATUS(node, NST_MARK1);
02972         r = subexp_inf_recursive_check(en->target, env, 1);
02973         if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02974         CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02975       }
02976       r = subexp_inf_recursive_check_trav(en->target, env);
02977     }
02978 
02979     break;
02980 
02981   default:
02982     break;
02983   }
02984 
02985   return r;
02986 }
02987 
02988 static int
02989 subexp_recursive_check(Node* node)
02990 {
02991   int r = 0;
02992 
02993   switch (NTYPE(node)) {
02994   case NT_LIST:
02995   case NT_ALT:
02996     do {
02997       r |= subexp_recursive_check(NCAR(node));
02998     } while (IS_NOT_NULL(node = NCDR(node)));
02999     break;
03000 
03001   case NT_QTFR:
03002     r = subexp_recursive_check(NQTFR(node)->target);
03003     break;
03004 
03005   case NT_ANCHOR:
03006     {
03007       AnchorNode* an = NANCHOR(node);
03008       switch (an->type) {
03009       case ANCHOR_PREC_READ:
03010       case ANCHOR_PREC_READ_NOT:
03011       case ANCHOR_LOOK_BEHIND:
03012       case ANCHOR_LOOK_BEHIND_NOT:
03013         r = subexp_recursive_check(an->target);
03014         break;
03015       }
03016     }
03017     break;
03018 
03019   case NT_CALL:
03020     r = subexp_recursive_check(NCALL(node)->target);
03021     if (r != 0) SET_CALL_RECURSION(node);
03022     break;
03023 
03024   case NT_ENCLOSE:
03025     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
03026       return 0;
03027     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
03028       return 1; /* recursion */
03029     else {
03030       SET_ENCLOSE_STATUS(node, NST_MARK2);
03031       r = subexp_recursive_check(NENCLOSE(node)->target);
03032       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
03033     }
03034     break;
03035 
03036   default:
03037     break;
03038   }
03039 
03040   return r;
03041 }
03042 
03043 
03044 static int
03045 subexp_recursive_check_trav(Node* node, ScanEnv* env)
03046 {
03047 #define FOUND_CALLED_NODE    1
03048 
03049   int type;
03050   int r = 0;
03051 
03052   type = NTYPE(node);
03053   switch (type) {
03054   case NT_LIST:
03055   case NT_ALT:
03056     {
03057       int ret;
03058       do {
03059         ret = subexp_recursive_check_trav(NCAR(node), env);
03060         if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
03061         else if (ret < 0) return ret;
03062       } while (IS_NOT_NULL(node = NCDR(node)));
03063     }
03064     break;
03065 
03066   case NT_QTFR:
03067     r = subexp_recursive_check_trav(NQTFR(node)->target, env);
03068     if (NQTFR(node)->upper == 0) {
03069       if (r == FOUND_CALLED_NODE)
03070         NQTFR(node)->is_refered = 1;
03071     }
03072     break;
03073 
03074   case NT_ANCHOR:
03075     {
03076       AnchorNode* an = NANCHOR(node);
03077       switch (an->type) {
03078       case ANCHOR_PREC_READ:
03079       case ANCHOR_PREC_READ_NOT:
03080       case ANCHOR_LOOK_BEHIND:
03081       case ANCHOR_LOOK_BEHIND_NOT:
03082         r = subexp_recursive_check_trav(an->target, env);
03083         break;
03084       }
03085     }
03086     break;
03087 
03088   case NT_ENCLOSE:
03089     {
03090       EncloseNode* en = NENCLOSE(node);
03091 
03092       if (! IS_ENCLOSE_RECURSION(en)) {
03093         if (IS_ENCLOSE_CALLED(en)) {
03094           SET_ENCLOSE_STATUS(node, NST_MARK1);
03095           r = subexp_recursive_check(en->target);
03096           if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
03097           CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
03098         }
03099       }
03100       r = subexp_recursive_check_trav(en->target, env);
03101       if (IS_ENCLOSE_CALLED(en))
03102         r |= FOUND_CALLED_NODE;
03103     }
03104     break;
03105 
03106   default:
03107     break;
03108   }
03109 
03110   return r;
03111 }
03112 
03113 static int
03114 setup_subexp_call(Node* node, ScanEnv* env)
03115 {
03116   int type;
03117   int r = 0;
03118 
03119   type = NTYPE(node);
03120   switch (type) {
03121   case NT_LIST:
03122     do {
03123       r = setup_subexp_call(NCAR(node), env);
03124     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03125     break;
03126 
03127   case NT_ALT:
03128     do {
03129       r = setup_subexp_call(NCAR(node), env);
03130     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03131     break;
03132 
03133   case NT_QTFR:
03134     r = setup_subexp_call(NQTFR(node)->target, env);
03135     break;
03136   case NT_ENCLOSE:
03137     r = setup_subexp_call(NENCLOSE(node)->target, env);
03138     break;
03139 
03140   case NT_CALL:
03141     {
03142       CallNode* cn = NCALL(node);
03143       Node** nodes = SCANENV_MEM_NODES(env);
03144 
03145       if (cn->group_num != 0) {
03146         int gnum = cn->group_num;
03147 
03148 #ifdef USE_NAMED_GROUP
03149         if (env->num_named > 0 &&
03150             IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03151             !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03152           return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03153         }
03154 #endif
03155         if (gnum > env->num_mem) {
03156           onig_scan_env_set_error_string(env,
03157                  ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03158           return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03159         }
03160 
03161 #ifdef USE_NAMED_GROUP
03162       set_call_attr:
03163 #endif
03164         cn->target = nodes[cn->group_num];
03165         if (IS_NULL(cn->target)) {
03166           onig_scan_env_set_error_string(env,
03167                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03168           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03169         }
03170         SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03171         BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03172         cn->unset_addr_list = env->unset_addr_list;
03173       }
03174 #ifdef USE_NAMED_GROUP
03175 #ifdef USE_PERL_SUBEXP_CALL
03176       else if (cn->name == cn->name_end) {
03177         goto set_call_attr;
03178       }
03179 #endif
03180       else {
03181         int *refs;
03182 
03183         int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03184                                            &refs);
03185         if (n <= 0) {
03186           onig_scan_env_set_error_string(env,
03187                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03188           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03189         }
03190         else if (n > 1 &&
03191             ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
03192           onig_scan_env_set_error_string(env,
03193             ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03194           return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03195         }
03196         else {
03197           cn->group_num = refs[0];
03198           goto set_call_attr;
03199         }
03200       }
03201 #endif
03202     }
03203     break;
03204 
03205   case NT_ANCHOR:
03206     {
03207       AnchorNode* an = NANCHOR(node);
03208 
03209       switch (an->type) {
03210       case ANCHOR_PREC_READ:
03211       case ANCHOR_PREC_READ_NOT:
03212       case ANCHOR_LOOK_BEHIND:
03213       case ANCHOR_LOOK_BEHIND_NOT:
03214         r = setup_subexp_call(an->target, env);
03215         break;
03216       }
03217     }
03218     break;
03219 
03220   default:
03221     break;
03222   }
03223 
03224   return r;
03225 }
03226 #endif
03227 
03228 /* divide different length alternatives in look-behind.
03229   (?<=A|B) ==> (?<=A)|(?<=B)
03230   (?<!A|B) ==> (?<!A)(?<!B)
03231 */
03232 static int
03233 divide_look_behind_alternatives(Node* node)
03234 {
03235   Node *head, *np, *insert_node;
03236   AnchorNode* an = NANCHOR(node);
03237   int anc_type = an->type;
03238 
03239   head = an->target;
03240   np = NCAR(head);
03241   swap_node(node, head);
03242   NCAR(node) = head;
03243   NANCHOR(head)->target = np;
03244 
03245   np = node;
03246   while ((np = NCDR(np)) != NULL_NODE) {
03247     insert_node = onig_node_new_anchor(anc_type);
03248     CHECK_NULL_RETURN_MEMERR(insert_node);
03249     NANCHOR(insert_node)->target = NCAR(np);
03250     NCAR(np) = insert_node;
03251   }
03252 
03253   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03254     np = node;
03255     do {
03256       SET_NTYPE(np, NT_LIST);  /* alt -> list */
03257     } while ((np = NCDR(np)) != NULL_NODE);
03258   }
03259   return 0;
03260 }
03261 
03262 static int
03263 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03264 {
03265   int r, len;
03266   AnchorNode* an = NANCHOR(node);
03267 
03268   r = get_char_length_tree(an->target, reg, &len);
03269   if (r == 0)
03270     an->char_len = len;
03271   else if (r == GET_CHAR_LEN_VARLEN)
03272     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03273   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03274     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03275       r = divide_look_behind_alternatives(node);
03276     else
03277       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03278   }
03279 
03280   return r;
03281 }
03282 
03283 static int
03284 next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
03285 {
03286   int type;
03287 
03288  retry:
03289   type = NTYPE(node);
03290   if (type == NT_QTFR) {
03291     QtfrNode* qn = NQTFR(node);
03292     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03293 #ifdef USE_QTFR_PEEK_NEXT
03294       Node* n = get_head_value_node(next_node, 1, reg);
03295       /* '\0': for UTF-16BE etc... */
03296       if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03297         qn->next_head_exact = n;
03298       }
03299 #endif
03300       /* automatic possessivation a*b ==> (?>a*)b */
03301       if (qn->lower <= 1) {
03302         int ttype = NTYPE(qn->target);
03303         if (IS_NODE_TYPE_SIMPLE(ttype)) {
03304           Node *x, *y;
03305           x = get_head_value_node(qn->target, 0, reg);
03306           if (IS_NOT_NULL(x)) {
03307             y = get_head_value_node(next_node,  0, reg);
03308             if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03309               Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03310               CHECK_NULL_RETURN_MEMERR(en);
03311               SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03312               swap_node(node, en);
03313               NENCLOSE(node)->target = en;
03314             }
03315           }
03316         }
03317       }
03318 
03319 #ifndef ONIG_DONT_OPTIMIZE
03320       if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
03321           in_root && /* qn->lower == 0 && */
03322           NTYPE(qn->target) == NT_CANY &&
03323           ! IS_MULTILINE(reg->options)) {
03324         /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
03325         Node *np;
03326         np = onig_node_new_list(NULL_NODE, NULL_NODE);
03327         CHECK_NULL_RETURN_MEMERR(np);
03328         swap_node(node, np);
03329         NCDR(node) = onig_node_new_list(np, NULL_NODE);
03330         if (IS_NULL(NCDR(node))) {
03331           onig_node_free(np);
03332           return ONIGERR_MEMORY;
03333         }
03334         np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR);   /* (?:^|\G) */
03335         CHECK_NULL_RETURN_MEMERR(np);
03336         NCAR(node) = np;
03337       }
03338 #endif
03339     }
03340   }
03341   else if (type == NT_ENCLOSE) {
03342     EncloseNode* en = NENCLOSE(node);
03343     in_root = 0;
03344     if (en->type == ENCLOSE_MEMORY) {
03345       node = en->target;
03346       goto retry;
03347     }
03348   }
03349   return 0;
03350 }
03351 
03352 
03353 static int
03354 update_string_node_case_fold(regex_t* reg, Node *node)
03355 {
03356   UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03357   UChar *sbuf, *ebuf, *sp;
03358   int r, i, len;
03359   OnigDistance sbuf_size;
03360   StrNode* sn = NSTR(node);
03361 
03362   end = sn->end;
03363   sbuf_size = (end - sn->s) * 2;
03364   sbuf = (UChar* )xmalloc(sbuf_size);
03365   CHECK_NULL_RETURN_MEMERR(sbuf);
03366   ebuf = sbuf + sbuf_size;
03367 
03368   sp = sbuf;
03369   p = sn->s;
03370   while (p < end) {
03371     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03372     for (i = 0; i < len; i++) {
03373       if (sp >= ebuf) {
03374         UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03375         if (IS_NULL(p)) {
03376           xfree(sbuf);
03377           return ONIGERR_MEMORY;
03378         }
03379         sbuf = p;
03380         sp = sbuf + sbuf_size;
03381         sbuf_size *= 2;
03382         ebuf = sbuf + sbuf_size;
03383       }
03384 
03385       *sp++ = buf[i];
03386     }
03387   }
03388 
03389   r = onig_node_str_set(node, sbuf, sp);
03390   if (r != 0) {
03391     xfree(sbuf);
03392     return r;
03393   }
03394 
03395   xfree(sbuf);
03396   return 0;
03397 }
03398 
03399 static int
03400 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03401                                  regex_t* reg)
03402 {
03403   int r;
03404   Node *node;
03405 
03406   node = onig_node_new_str(s, end);
03407   if (IS_NULL(node)) return ONIGERR_MEMORY;
03408 
03409   r = update_string_node_case_fold(reg, node);
03410   if (r != 0) {
03411     onig_node_free(node);
03412     return r;
03413   }
03414 
03415   NSTRING_SET_AMBIG(node);
03416   NSTRING_SET_DONT_GET_OPT_INFO(node);
03417   *rnode = node;
03418   return 0;
03419 }
03420 
03421 static int
03422 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03423                             UChar *p, int slen, UChar *end,
03424                             regex_t* reg, Node **rnode)
03425 {
03426   int r, i, j, len, varlen, varclen;
03427   Node *anode, *var_anode, *snode, *xnode, *an;
03428   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03429 
03430   *rnode = var_anode = NULL_NODE;
03431 
03432   varlen = 0;
03433   varclen = 0;
03434   for (i = 0; i < item_num; i++) {
03435     if (items[i].byte_len != slen) {
03436       varlen = 1;
03437       break;
03438     }
03439     if (items[i].code_len != 1) {
03440       varclen = 1;
03441     }
03442   }
03443 
03444   if (varlen != 0) {
03445     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03446     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03447 
03448     xnode = onig_node_new_list(NULL, NULL);
03449     if (IS_NULL(xnode)) goto mem_err;
03450     NCAR(var_anode) = xnode;
03451 
03452     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03453     if (IS_NULL(anode)) goto mem_err;
03454     NCAR(xnode) = anode;
03455   }
03456   else {
03457     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03458     if (IS_NULL(anode)) return ONIGERR_MEMORY;
03459   }
03460 
03461   snode = onig_node_new_str(p, p + slen);
03462   if (IS_NULL(snode)) goto mem_err;
03463 
03464   NCAR(anode) = snode;
03465 
03466   for (i = 0; i < item_num; i++) {
03467     snode = onig_node_new_str(NULL, NULL);
03468     if (IS_NULL(snode)) goto mem_err;
03469 
03470     for (j = 0; j < items[i].code_len; j++) {
03471       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03472       if (len < 0) {
03473         r = len;
03474         goto mem_err2;
03475       }
03476 
03477       r = onig_node_str_cat(snode, buf, buf + len);
03478       if (r != 0) goto mem_err2;
03479     }
03480 
03481     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03482     if (IS_NULL(an)) {
03483       goto mem_err2;
03484     }
03485 
03486     if (items[i].byte_len != slen) {
03487       Node *rem;
03488       UChar *q = p + items[i].byte_len;
03489 
03490       if (q < end) {
03491         r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03492         if (r != 0) {
03493           onig_node_free(an);
03494           goto mem_err2;
03495         }
03496 
03497         xnode = onig_node_list_add(NULL_NODE, snode);
03498         if (IS_NULL(xnode)) {
03499           onig_node_free(an);
03500           onig_node_free(rem);
03501           goto mem_err2;
03502         }
03503         if (IS_NULL(onig_node_list_add(xnode, rem))) {
03504           onig_node_free(an);
03505           onig_node_free(xnode);
03506           onig_node_free(rem);
03507           goto mem_err;
03508         }
03509 
03510         NCAR(an) = xnode;
03511       }
03512       else {
03513         NCAR(an) = snode;
03514       }
03515 
03516       NCDR(var_anode) = an;
03517       var_anode = an;
03518     }
03519     else {
03520       NCAR(an)     = snode;
03521       NCDR(anode) = an;
03522       anode = an;
03523     }
03524   }
03525 
03526   if (varclen && !varlen)
03527     return 2;
03528   return varlen;
03529 
03530  mem_err2:
03531   onig_node_free(snode);
03532 
03533  mem_err:
03534   onig_node_free(*rnode);
03535 
03536   return ONIGERR_MEMORY;
03537 }
03538 
03539 static int
03540 expand_case_fold_string(Node* node, regex_t* reg)
03541 {
03542 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
03543 
03544   int r, n, len, alt_num;
03545   int varlen = 0;
03546   UChar *start, *end, *p;
03547   Node *top_root, *root, *snode, *prev_node;
03548   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03549   StrNode* sn = NSTR(node);
03550 
03551   if (NSTRING_IS_AMBIG(node)) return 0;
03552 
03553   start = sn->s;
03554   end   = sn->end;
03555   if (start >= end) return 0;
03556 
03557   r = 0;
03558   top_root = root = prev_node = snode = NULL_NODE;
03559   alt_num = 1;
03560   p = start;
03561   while (p < end) {
03562     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03563                                            p, end, items);
03564     if (n < 0) {
03565       r = n;
03566       goto err;
03567     }
03568 
03569     len = enclen(reg->enc, p, end);
03570 
03571     if (n == 0) {
03572       if (IS_NULL(snode)) {
03573         if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03574           top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03575           if (IS_NULL(root)) {
03576             onig_node_free(prev_node);
03577             goto mem_err;
03578           }
03579         }
03580 
03581         prev_node = snode = onig_node_new_str(NULL, NULL);
03582         if (IS_NULL(snode)) goto mem_err;
03583         if (IS_NOT_NULL(root)) {
03584           if (IS_NULL(onig_node_list_add(root, snode))) {
03585             onig_node_free(snode);
03586             goto mem_err;
03587           }
03588         }
03589       }
03590 
03591       r = onig_node_str_cat(snode, p, p + len);
03592       if (r != 0) goto err;
03593     }
03594     else {
03595       alt_num *= (n + 1);
03596       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03597 
03598       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03599         top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03600         if (IS_NULL(root)) {
03601           onig_node_free(prev_node);
03602           goto mem_err;
03603         }
03604       }
03605 
03606       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03607       if (r < 0) goto mem_err;
03608       if (r > 0) varlen = 1;
03609       if (r == 1) {
03610         if (IS_NULL(root)) {
03611           top_root = prev_node;
03612         }
03613         else {
03614           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03615             onig_node_free(prev_node);
03616             goto mem_err;
03617           }
03618         }
03619 
03620         root = NCAR(prev_node);
03621       }
03622       else { /* r == 0 || r == 2 */
03623         if (IS_NOT_NULL(root)) {
03624           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03625             onig_node_free(prev_node);
03626             goto mem_err;
03627           }
03628         }
03629       }
03630 
03631       snode = NULL_NODE;
03632     }
03633 
03634     p += len;
03635   }
03636 
03637   if (p < end) {
03638     Node *srem;
03639 
03640     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03641     if (r != 0) goto mem_err;
03642 
03643     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03644       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03645       if (IS_NULL(root)) {
03646         onig_node_free(srem);
03647         onig_node_free(prev_node);
03648         goto mem_err;
03649       }
03650     }
03651 
03652     if (IS_NULL(root)) {
03653       prev_node = srem;
03654     }
03655     else {
03656       if (IS_NULL(onig_node_list_add(root, srem))) {
03657         onig_node_free(srem);
03658         goto mem_err;
03659       }
03660     }
03661   }
03662 
03663   /* ending */
03664   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03665   if (!varlen) {
03666     /* When all expanded strings are same length, case-insensitive
03667        BM search will be used. */
03668     r = update_string_node_case_fold(reg, node);
03669     if (r == 0) {
03670       NSTRING_SET_AMBIG(node);
03671     }
03672   }
03673   else {
03674     swap_node(node, top_root);
03675     r = 0;
03676   }
03677   onig_node_free(top_root);
03678   return r;
03679 
03680  mem_err:
03681   r = ONIGERR_MEMORY;
03682 
03683  err:
03684   onig_node_free(top_root);
03685   return r;
03686 }
03687 
03688 
03689 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03690 
03691 #define CEC_THRES_NUM_BIG_REPEAT         512
03692 #define CEC_INFINITE_NUM          0x7fffffff
03693 
03694 #define CEC_IN_INFINITE_REPEAT    (1<<0)
03695 #define CEC_IN_FINITE_REPEAT      (1<<1)
03696 #define CEC_CONT_BIG_REPEAT       (1<<2)
03697 
03698 static int
03699 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03700 {
03701   int type;
03702   int r = state;
03703 
03704   type = NTYPE(node);
03705   switch (type) {
03706   case NT_LIST:
03707     {
03708       Node* prev = NULL_NODE;
03709       do {
03710         r = setup_comb_exp_check(NCAR(node), r, env);
03711         prev = NCAR(node);
03712       } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03713     }
03714     break;
03715 
03716   case NT_ALT:
03717     {
03718       int ret;
03719       do {
03720         ret = setup_comb_exp_check(NCAR(node), state, env);
03721         r |= ret;
03722       } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03723     }
03724     break;
03725 
03726   case NT_QTFR:
03727     {
03728       int child_state = state;
03729       int add_state = 0;
03730       QtfrNode* qn = NQTFR(node);
03731       Node* target = qn->target;
03732       int var_num;
03733 
03734       if (! IS_REPEAT_INFINITE(qn->upper)) {
03735         if (qn->upper > 1) {
03736           /* {0,1}, {1,1} are allowed */
03737           child_state |= CEC_IN_FINITE_REPEAT;
03738 
03739           /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
03740           if (env->backrefed_mem == 0) {
03741             if (NTYPE(qn->target) == NT_ENCLOSE) {
03742               EncloseNode* en = NENCLOSE(qn->target);
03743               if (en->type == ENCLOSE_MEMORY) {
03744                 if (NTYPE(en->target) == NT_QTFR) {
03745                   QtfrNode* q = NQTFR(en->target);
03746                   if (IS_REPEAT_INFINITE(q->upper)
03747                       && q->greedy == qn->greedy) {
03748                     qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03749                     if (qn->upper == 1)
03750                       child_state = state;
03751                   }
03752                 }
03753               }
03754             }
03755           }
03756         }
03757       }
03758 
03759       if (state & CEC_IN_FINITE_REPEAT) {
03760         qn->comb_exp_check_num = -1;
03761       }
03762       else {
03763         if (IS_REPEAT_INFINITE(qn->upper)) {
03764           var_num = CEC_INFINITE_NUM;
03765           child_state |= CEC_IN_INFINITE_REPEAT;
03766         }
03767         else {
03768           var_num = qn->upper - qn->lower;
03769         }
03770 
03771         if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03772           add_state |= CEC_CONT_BIG_REPEAT;
03773 
03774         if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03775             ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03776              var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03777           if (qn->comb_exp_check_num == 0) {
03778             env->num_comb_exp_check++;
03779             qn->comb_exp_check_num = env->num_comb_exp_check;
03780             if (env->curr_max_regnum > env->comb_exp_max_regnum)
03781               env->comb_exp_max_regnum = env->curr_max_regnum;
03782           }
03783         }
03784       }
03785 
03786       r = setup_comb_exp_check(target, child_state, env);
03787       r |= add_state;
03788     }
03789     break;
03790 
03791   case NT_ENCLOSE:
03792     {
03793       EncloseNode* en = NENCLOSE(node);
03794 
03795       switch (en->type) {
03796       case ENCLOSE_MEMORY:
03797         {
03798           if (env->curr_max_regnum < en->regnum)
03799             env->curr_max_regnum = en->regnum;
03800 
03801           r = setup_comb_exp_check(en->target, state, env);
03802         }
03803         break;
03804 
03805       default:
03806         r = setup_comb_exp_check(en->target, state, env);
03807         break;
03808       }
03809     }
03810     break;
03811 
03812 #ifdef USE_SUBEXP_CALL
03813   case NT_CALL:
03814     if (IS_CALL_RECURSION(NCALL(node)))
03815       env->has_recursion = 1;
03816     else
03817       r = setup_comb_exp_check(NCALL(node)->target, state, env);
03818     break;
03819 #endif
03820 
03821   default:
03822     break;
03823   }
03824 
03825   return r;
03826 }
03827 #endif
03828 
03829 #define IN_ALT        (1<<0)
03830 #define IN_NOT        (1<<1)
03831 #define IN_REPEAT     (1<<2)
03832 #define IN_VAR_REPEAT (1<<3)
03833 #define IN_ROOT       (1<<4)
03834 
03835 /* setup_tree does the following work.
03836  1. check empty loop. (set qn->target_empty_info)
03837  2. expand ignore-case in char class.
03838  3. set memory status bit flags. (reg->mem_stats)
03839  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
03840  5. find invalid patterns in look-behind.
03841  6. expand repeated string.
03842  */
03843 static int
03844 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03845 {
03846   int type;
03847   int r = 0;
03848   int in_root = state & IN_ROOT;
03849 
03850   state &= ~IN_ROOT;
03851 restart:
03852   type = NTYPE(node);
03853   switch (type) {
03854   case NT_LIST:
03855     {
03856       Node* prev = NULL_NODE;
03857       int prev_in_root = 0;
03858       state |= in_root;
03859       do {
03860         r = setup_tree(NCAR(node), reg, state, env);
03861         if (IS_NOT_NULL(prev) && r == 0) {
03862           r = next_setup(prev, NCAR(node), prev_in_root, reg);
03863         }
03864         prev = NCAR(node);
03865         prev_in_root = state & IN_ROOT;
03866         state &= ~IN_ROOT;
03867       } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03868     }
03869     break;
03870 
03871   case NT_ALT:
03872     do {
03873       r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03874     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03875     break;
03876 
03877   case NT_CCLASS:
03878     break;
03879 
03880   case NT_STR:
03881     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03882       r = expand_case_fold_string(node, reg);
03883     }
03884     break;
03885 
03886   case NT_CTYPE:
03887   case NT_CANY:
03888     break;
03889 
03890 #ifdef USE_SUBEXP_CALL
03891   case NT_CALL:
03892     break;
03893 #endif
03894 
03895   case NT_BREF:
03896     {
03897       int i;
03898       int* p;
03899       Node** nodes = SCANENV_MEM_NODES(env);
03900       BRefNode* br = NBREF(node);
03901       p = BACKREFS_P(br);
03902       for (i = 0; i < br->back_num; i++) {
03903         if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
03904         BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03905         BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03906 #ifdef USE_BACKREF_WITH_LEVEL
03907         if (IS_BACKREF_NEST_LEVEL(br)) {
03908           BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03909         }
03910 #endif
03911         SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03912       }
03913     }
03914     break;
03915 
03916   case NT_QTFR:
03917     {
03918       OnigDistance d;
03919       QtfrNode* qn = NQTFR(node);
03920       Node* target = qn->target;
03921 
03922       if ((state & IN_REPEAT) != 0) {
03923         qn->state |= NST_IN_REPEAT;
03924       }
03925 
03926       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03927         r = get_min_match_length(target, &d, env);
03928         if (r) break;
03929         if (d == 0) {
03930           qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03931 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03932           r = quantifiers_memory_node_info(target);
03933           if (r < 0) break;
03934           if (r > 0) {
03935             qn->target_empty_info = r;
03936           }
03937 #endif
03938 #if 0
03939           r = get_max_match_length(target, &d, env);
03940           if (r == 0 && d == 0) {
03941             /*  ()* ==> ()?, ()+ ==> ()  */
03942             qn->upper = 1;
03943             if (qn->lower > 1) qn->lower = 1;
03944             if (NTYPE(target) == NT_STR) {
03945               qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
03946             }
03947           }
03948 #endif
03949         }
03950       }
03951 
03952       state |= IN_REPEAT;
03953       if (qn->lower != qn->upper)
03954         state |= IN_VAR_REPEAT;
03955       r = setup_tree(target, reg, state, env);
03956       if (r) break;
03957 
03958       /* expand string */
03959 #define EXPAND_STRING_MAX_LENGTH  100
03960       if (NTYPE(target) == NT_STR) {
03961         if (qn->lower > 1) {
03962           int i, n = qn->lower;
03963           OnigDistance len = NSTRING_LEN(target);
03964           StrNode* sn = NSTR(target);
03965           Node* np;
03966 
03967           np = onig_node_new_str(sn->s, sn->end);
03968           if (IS_NULL(np)) return ONIGERR_MEMORY;
03969           NSTR(np)->flag = sn->flag;
03970 
03971           for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
03972             r = onig_node_str_cat(np, sn->s, sn->end);
03973             if (r) {
03974               onig_node_free(np);
03975               return r;
03976             }
03977           }
03978           if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
03979             Node *np1, *np2;
03980 
03981             qn->lower -= i;
03982             if (! IS_REPEAT_INFINITE(qn->upper))
03983               qn->upper -= i;
03984 
03985             np1 = onig_node_new_list(np, NULL);
03986             if (IS_NULL(np1)) {
03987               onig_node_free(np);
03988               return ONIGERR_MEMORY;
03989             }
03990             swap_node(np1, node);
03991             np2 = onig_node_list_add(node, np1);
03992             if (IS_NULL(np2)) {
03993               onig_node_free(np1);
03994               return ONIGERR_MEMORY;
03995             }
03996           }
03997           else {
03998             swap_node(np, node);
03999             onig_node_free(np);
04000           }
04001           break; /* break case NT_QTFR: */
04002         }
04003       }
04004 
04005 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
04006       if (qn->greedy && (qn->target_empty_info != 0)) {
04007         if (NTYPE(target) == NT_QTFR) {
04008           QtfrNode* tqn = NQTFR(target);
04009           if (IS_NOT_NULL(tqn->head_exact)) {
04010             qn->head_exact  = tqn->head_exact;
04011             tqn->head_exact = NULL;
04012           }
04013         }
04014         else {
04015           qn->head_exact = get_head_value_node(qn->target, 1, reg);
04016         }
04017       }
04018 #endif
04019     }
04020     break;
04021 
04022   case NT_ENCLOSE:
04023     {
04024       EncloseNode* en = NENCLOSE(node);
04025 
04026       switch (en->type) {
04027       case ENCLOSE_OPTION:
04028         {
04029           OnigOptionType options = reg->options;
04030           state |= in_root;
04031           reg->options = NENCLOSE(node)->option;
04032           r = setup_tree(NENCLOSE(node)->target, reg, state, env);
04033           reg->options = options;
04034         }
04035         break;
04036 
04037       case ENCLOSE_MEMORY:
04038         if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
04039           BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
04040           /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
04041         }
04042         r = setup_tree(en->target, reg, state, env);
04043         break;
04044 
04045       case ENCLOSE_STOP_BACKTRACK:
04046         {
04047           Node* target = en->target;
04048           r = setup_tree(target, reg, state, env);
04049           if (NTYPE(target) == NT_QTFR) {
04050             QtfrNode* tqn = NQTFR(target);
04051             if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
04052                 tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
04053               int qtype = NTYPE(tqn->target);
04054               if (IS_NODE_TYPE_SIMPLE(qtype))
04055                 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
04056             }
04057           }
04058         }
04059         break;
04060 
04061       case ENCLOSE_CONDITION:
04062 #ifdef USE_NAMED_GROUP
04063         if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
04064             env->num_named > 0 &&
04065             IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
04066             !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
04067           return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
04068         }
04069 #endif
04070         r = setup_tree(NENCLOSE(node)->target, reg, state, env);
04071         break;
04072       }
04073     }
04074     break;
04075 
04076   case NT_ANCHOR:
04077     {
04078       AnchorNode* an = NANCHOR(node);
04079 
04080       switch (an->type) {
04081       case ANCHOR_PREC_READ:
04082         r = setup_tree(an->target, reg, state, env);
04083         break;
04084       case ANCHOR_PREC_READ_NOT:
04085         r = setup_tree(an->target, reg, (state | IN_NOT), env);
04086         break;
04087 
04088 /* allowed node types in look-behind */
04089 #define ALLOWED_TYPE_IN_LB  \
04090   ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
04091     BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
04092 
04093 #define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY )
04094 #define ALLOWED_ENCLOSE_IN_LB_NOT   0
04095 
04096 #define ALLOWED_ANCHOR_IN_LB \
04097 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
04098   ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
04099   ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
04100   ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
04101 #define ALLOWED_ANCHOR_IN_LB_NOT \
04102 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
04103   ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
04104   ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
04105   ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
04106 
04107       case ANCHOR_LOOK_BEHIND:
04108         {
04109           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
04110                               ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
04111           if (r < 0) return r;
04112           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
04113           r = setup_look_behind(node, reg, env);
04114           if (r != 0) return r;
04115           if (NTYPE(node) != NT_ANCHOR) goto restart;
04116           r = setup_tree(an->target, reg, state, env);
04117         }
04118         break;
04119 
04120       case ANCHOR_LOOK_BEHIND_NOT:
04121         {
04122           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
04123                       ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
04124           if (r < 0) return r;
04125           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
04126           r = setup_look_behind(node, reg, env);
04127           if (r != 0) return r;
04128           if (NTYPE(node) != NT_ANCHOR) goto restart;
04129           r = setup_tree(an->target, reg, (state | IN_NOT), env);
04130         }
04131         break;
04132       }
04133     }
04134     break;
04135 
04136   default:
04137     break;
04138   }
04139 
04140   return r;
04141 }
04142 
04143 #ifndef USE_SUNDAY_QUICK_SEARCH
04144 /* set skip map for Boyer-Moore search */
04145 static int
04146 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
04147             UChar skip[], int** int_skip, int ignore_case)
04148 {
04149   OnigDistance i, len;
04150   int clen, flen, n, j, k;
04151   UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
04152   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04153   OnigEncoding enc = reg->enc;
04154 
04155   len = end - s;
04156   if (len < ONIG_CHAR_TABLE_SIZE) {
04157     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
04158 
04159     n = 0;
04160     for (i = 0; i < len - 1; i += clen) {
04161       p = s + i;
04162       if (ignore_case)
04163         n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04164                                                p, end, items);
04165       clen = enclen(enc, p, end);
04166 
04167       for (j = 0; j < n; j++) {
04168         if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04169           return 1;  /* different length isn't supported. */
04170         flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04171         if (flen != clen)
04172           return 1;  /* different length isn't supported. */
04173       }
04174       for (j = 0; j < clen; j++) {
04175         skip[s[i + j]] = (UChar )(len - 1 - i - j);
04176         for (k = 0; k < n; k++) {
04177           skip[buf[k][j]] = (UChar )(len - 1 - i - j);
04178         }
04179       }
04180     }
04181   }
04182   else {
04183     if (IS_NULL(*int_skip)) {
04184       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
04185       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
04186     }
04187     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
04188 
04189     n = 0;
04190     for (i = 0; i < len - 1; i += clen) {
04191       p = s + i;
04192       if (ignore_case)
04193         n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04194                                                p, end, items);
04195       clen = enclen(enc, p, end);
04196 
04197       for (j = 0; j < n; j++) {
04198         if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04199           return 1;  /* different length isn't supported. */
04200         flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04201         if (flen != clen)
04202           return 1;  /* different length isn't supported. */
04203       }
04204       for (j = 0; j < clen; j++) {
04205         (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
04206         for (k = 0; k < n; k++) {
04207           (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
04208         }
04209       }
04210     }
04211   }
04212   return 0;
04213 }
04214 
04215 #else /* USE_SUNDAY_QUICK_SEARCH */
04216 
04217 /* set skip map for Sunday's quick search */
04218 static int
04219 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
04220             UChar skip[], int** int_skip, int ignore_case)
04221 {
04222   OnigDistance i, len;
04223   int clen, flen, n, j, k;
04224   UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
04225   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04226   OnigEncoding enc = reg->enc;
04227 
04228   len = end - s;
04229   if (len < ONIG_CHAR_TABLE_SIZE) {
04230     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
04231 
04232     n = 0;
04233     for (i = 0; i < len; i += clen) {
04234       p = s + i;
04235       if (ignore_case)
04236         n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04237                                                p, end, items);
04238       clen = enclen(enc, p, end);
04239 
04240       for (j = 0; j < n; j++) {
04241         if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04242           return 1;  /* different length isn't supported. */
04243         flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04244         if (flen != clen)
04245           return 1;  /* different length isn't supported. */
04246       }
04247       for (j = 0; j < clen; j++) {
04248         skip[s[i + j]] = (UChar )(len - i - j);
04249         for (k = 0; k < n; k++) {
04250           skip[buf[k][j]] = (UChar )(len - i - j);
04251         }
04252       }
04253     }
04254   }
04255   else {
04256     if (IS_NULL(*int_skip)) {
04257       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
04258       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
04259     }
04260     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
04261 
04262     n = 0;
04263     for (i = 0; i < len; i += clen) {
04264       p = s + i;
04265       if (ignore_case)
04266         n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
04267                                                p, end, items);
04268       clen = enclen(enc, p, end);
04269 
04270       for (j = 0; j < n; j++) {
04271         if ((items[j].code_len != 1) || (items[j].byte_len != clen))
04272           return 1;  /* different length isn't supported. */
04273         flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
04274         if (flen != clen)
04275           return 1;  /* different length isn't supported. */
04276       }
04277       for (j = 0; j < clen; j++) {
04278         (*int_skip)[s[i + j]] = (int )(len - i - j);
04279         for (k = 0; k < n; k++) {
04280           (*int_skip)[buf[k][j]] = (int )(len - i - j);
04281         }
04282       }
04283     }
04284   }
04285   return 0;
04286 }
04287 #endif /* USE_SUNDAY_QUICK_SEARCH */
04288 
04289 #define OPT_EXACT_MAXLEN   24
04290 
04291 typedef struct {
04292   OnigDistance min;  /* min byte length */
04293   OnigDistance max;  /* max byte length */
04294 } MinMaxLen;
04295 
04296 typedef struct {
04297   MinMaxLen        mmd;
04298   OnigEncoding     enc;
04299   OnigOptionType   options;
04300   OnigCaseFoldType case_fold_flag;
04301   ScanEnv*         scan_env;
04302 } OptEnv;
04303 
04304 typedef struct {
04305   int left_anchor;
04306   int right_anchor;
04307 } OptAncInfo;
04308 
04309 typedef struct {
04310   MinMaxLen  mmd; /* info position */
04311   OptAncInfo anc;
04312 
04313   int   reach_end;
04314   int   ignore_case;  /* -1: unset, 0: case sensitive, 1: ignore case */
04315   int   len;
04316   UChar s[OPT_EXACT_MAXLEN];
04317 } OptExactInfo;
04318 
04319 typedef struct {
04320   MinMaxLen mmd; /* info position */
04321   OptAncInfo anc;
04322 
04323   int   value;      /* weighted value */
04324   UChar map[ONIG_CHAR_TABLE_SIZE];
04325 } OptMapInfo;
04326 
04327 typedef struct {
04328   MinMaxLen    len;
04329 
04330   OptAncInfo   anc;
04331   OptExactInfo exb;    /* boundary */
04332   OptExactInfo exm;    /* middle */
04333   OptExactInfo expr;   /* prec read (?=...) */
04334 
04335   OptMapInfo   map;   /* boundary */
04336 } NodeOptInfo;
04337 
04338 
04339 static int
04340 map_position_value(OnigEncoding enc, int i)
04341 {
04342   static const short int ByteValTable[] = {
04343      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
04344      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
04345     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
04346      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
04347      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04348      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
04349      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04350      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
04351   };
04352 
04353   if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04354     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04355       return 20;
04356     else
04357       return (int )ByteValTable[i];
04358   }
04359   else
04360     return 4;   /* Take it easy. */
04361 }
04362 
04363 static int
04364 distance_value(MinMaxLen* mm)
04365 {
04366   /* 1000 / (min-max-dist + 1) */
04367   static const short int dist_vals[] = {
04368     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
04369       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
04370       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
04371       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
04372       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
04373       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
04374       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
04375       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
04376       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
04377       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
04378   };
04379 
04380   OnigDistance d;
04381 
04382   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04383 
04384   d = mm->max - mm->min;
04385   if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
04386     /* return dist_vals[d] * 16 / (mm->min + 12); */
04387     return (int )dist_vals[d];
04388   else
04389     return 1;
04390 }
04391 
04392 static int
04393 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04394 {
04395   if (v2 <= 0) return -1;
04396   if (v1 <= 0) return  1;
04397 
04398   v1 *= distance_value(d1);
04399   v2 *= distance_value(d2);
04400 
04401   if (v2 > v1) return  1;
04402   if (v2 < v1) return -1;
04403 
04404   if (d2->min < d1->min) return  1;
04405   if (d2->min > d1->min) return -1;
04406   return 0;
04407 }
04408 
04409 static int
04410 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04411 {
04412   return (a->min == b->min && a->max == b->max) ? 1 : 0;
04413 }
04414 
04415 
04416 static void
04417 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04418 {
04419   mml->min = min;
04420   mml->max = max;
04421 }
04422 
04423 static void
04424 clear_mml(MinMaxLen* mml)
04425 {
04426   mml->min = mml->max = 0;
04427 }
04428 
04429 static void
04430 copy_mml(MinMaxLen* to, MinMaxLen* from)
04431 {
04432   to->min = from->min;
04433   to->max = from->max;
04434 }
04435 
04436 static void
04437 add_mml(MinMaxLen* to, MinMaxLen* from)
04438 {
04439   to->min = distance_add(to->min, from->min);
04440   to->max = distance_add(to->max, from->max);
04441 }
04442 
04443 #if 0
04444 static void
04445 add_len_mml(MinMaxLen* to, OnigDistance len)
04446 {
04447   to->min = distance_add(to->min, len);
04448   to->max = distance_add(to->max, len);
04449 }
04450 #endif
04451 
04452 static void
04453 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04454 {
04455   if (to->min > from->min) to->min = from->min;
04456   if (to->max < from->max) to->max = from->max;
04457 }
04458 
04459 static void
04460 copy_opt_env(OptEnv* to, OptEnv* from)
04461 {
04462   *to = *from;
04463 }
04464 
04465 static void
04466 clear_opt_anc_info(OptAncInfo* anc)
04467 {
04468   anc->left_anchor  = 0;
04469   anc->right_anchor = 0;
04470 }
04471 
04472 static void
04473 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04474 {
04475   *to = *from;
04476 }
04477 
04478 static void
04479 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04480                     OnigDistance left_len, OnigDistance right_len)
04481 {
04482   clear_opt_anc_info(to);
04483 
04484   to->left_anchor = left->left_anchor;
04485   if (left_len == 0) {
04486     to->left_anchor |= right->left_anchor;
04487   }
04488 
04489   to->right_anchor = right->right_anchor;
04490   if (right_len == 0) {
04491     to->right_anchor |= left->right_anchor;
04492   }
04493 }
04494 
04495 static int
04496 is_left_anchor(int anc)
04497 {
04498   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04499       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04500       anc == ANCHOR_PREC_READ_NOT)
04501     return 0;
04502 
04503   return 1;
04504 }
04505 
04506 static int
04507 is_set_opt_anc_info(OptAncInfo* to, int anc)
04508 {
04509   if ((to->left_anchor & anc) != 0) return 1;
04510 
04511   return ((to->right_anchor & anc) != 0 ? 1 : 0);
04512 }
04513 
04514 static void
04515 add_opt_anc_info(OptAncInfo* to, int anc)
04516 {
04517   if (is_left_anchor(anc))
04518     to->left_anchor |= anc;
04519   else
04520     to->right_anchor |= anc;
04521 }
04522 
04523 static void
04524 remove_opt_anc_info(OptAncInfo* to, int anc)
04525 {
04526   if (is_left_anchor(anc))
04527     to->left_anchor &= ~anc;
04528   else
04529     to->right_anchor &= ~anc;
04530 }
04531 
04532 static void
04533 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04534 {
04535   to->left_anchor  &= add->left_anchor;
04536   to->right_anchor &= add->right_anchor;
04537 }
04538 
04539 static int
04540 is_full_opt_exact_info(OptExactInfo* ex)
04541 {
04542   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04543 }
04544 
04545 static void
04546 clear_opt_exact_info(OptExactInfo* ex)
04547 {
04548   clear_mml(&ex->mmd);
04549   clear_opt_anc_info(&ex->anc);
04550   ex->reach_end   = 0;
04551   ex->ignore_case = -1;   /* unset */
04552   ex->len         = 0;
04553   ex->s[0]        = '\0';
04554 }
04555 
04556 static void
04557 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04558 {
04559   *to = *from;
04560 }
04561 
04562 static void
04563 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04564 {
04565   int i, j, len;
04566   UChar *p, *end;
04567   OptAncInfo tanc;
04568 
04569   if (to->ignore_case < 0)
04570     to->ignore_case = add->ignore_case;
04571   else if (to->ignore_case != add->ignore_case)
04572     return ;  /* avoid */
04573 
04574   p = add->s;
04575   end = p + add->len;
04576   for (i = to->len; p < end; ) {
04577     len = enclen(enc, p, end);
04578     if (i + len > OPT_EXACT_MAXLEN) break;
04579     for (j = 0; j < len && p < end; j++)
04580       to->s[i++] = *p++;
04581   }
04582 
04583   to->len = i;
04584   to->reach_end = (p == end ? add->reach_end : 0);
04585 
04586   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04587   if (! to->reach_end) tanc.right_anchor = 0;
04588   copy_opt_anc_info(&to->anc, &tanc);
04589 }
04590 
04591 static void
04592 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04593                           int raw ARG_UNUSED, OnigEncoding enc)
04594 {
04595   int i, j, len;
04596   UChar *p;
04597 
04598   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04599     len = enclen(enc, p, end);
04600     if (i + len > OPT_EXACT_MAXLEN) break;
04601     for (j = 0; j < len && p < end; j++)
04602       to->s[i++] = *p++;
04603   }
04604 
04605   to->len = i;
04606 }
04607 
04608 static void
04609 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04610 {
04611   int i, j, len;
04612 
04613   if (add->len == 0 || to->len == 0) {
04614     clear_opt_exact_info(to);
04615     return ;
04616   }
04617 
04618   if (! is_equal_mml(&to->mmd, &add->mmd)) {
04619     clear_opt_exact_info(to);
04620     return ;
04621   }
04622 
04623   for (i = 0; i < to->len && i < add->len; ) {
04624     if (to->s[i] != add->s[i]) break;
04625     len = enclen(env->enc, to->s + i, to->s + to->len);
04626 
04627     for (j = 1; j < len; j++) {
04628       if (to->s[i+j] != add->s[i+j]) break;
04629     }
04630     if (j < len) break;
04631     i += len;
04632   }
04633 
04634   if (! add->reach_end || i < add->len || i < to->len) {
04635     to->reach_end = 0;
04636   }
04637   to->len = i;
04638   if (to->ignore_case < 0)
04639     to->ignore_case = add->ignore_case;
04640   else if (add->ignore_case >= 0)
04641     to->ignore_case |= add->ignore_case;
04642 
04643   alt_merge_opt_anc_info(&to->anc, &add->anc);
04644   if (! to->reach_end) to->anc.right_anchor = 0;
04645 }
04646 
04647 static void
04648 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04649 {
04650   int v1, v2;
04651 
04652   v1 = now->len;
04653   v2 = alt->len;
04654 
04655   if (v2 == 0) {
04656     return ;
04657   }
04658   else if (v1 == 0) {
04659     copy_opt_exact_info(now, alt);
04660     return ;
04661   }
04662   else if (v1 <= 2 && v2 <= 2) {
04663     /* ByteValTable[x] is big value --> low price */
04664     v2 = map_position_value(enc, now->s[0]);
04665     v1 = map_position_value(enc, alt->s[0]);
04666 
04667     if (now->len > 1) v1 += 5;
04668     if (alt->len > 1) v2 += 5;
04669   }
04670 
04671   if (now->ignore_case <= 0) v1 *= 2;
04672   if (alt->ignore_case <= 0) v2 *= 2;
04673 
04674   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04675     copy_opt_exact_info(now, alt);
04676 }
04677 
04678 static void
04679 clear_opt_map_info(OptMapInfo* map)
04680 {
04681   static const OptMapInfo clean_info = {
04682     {0, 0}, {0, 0}, 0,
04683     {
04684       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04685       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04686       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04687       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04688       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04689       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04690       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04691       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04692       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04693       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04694       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04695       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04696       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04697       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04698       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04699       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04700     }
04701   };
04702 
04703   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04704 }
04705 
04706 static void
04707 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04708 {
04709   *to = *from;
04710 }
04711 
04712 static void
04713 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04714 {
04715   if (map->map[c] == 0) {
04716     map->map[c] = 1;
04717     map->value += map_position_value(enc, c);
04718   }
04719 }
04720 
04721 static int
04722 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04723                           OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04724 {
04725   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04726   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04727   int i, n;
04728 
04729   add_char_opt_map_info(map, p[0], enc);
04730 
04731   case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04732   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04733   if (n < 0) return n;
04734 
04735   for (i = 0; i < n; i++) {
04736     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04737     add_char_opt_map_info(map, buf[0], enc);
04738   }
04739 
04740   return 0;
04741 }
04742 
04743 static void
04744 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04745 {
04746   const int z = 1<<15; /* 32768: something big value */
04747 
04748   int v1, v2;
04749 
04750   if (alt->value == 0) return ;
04751   if (now->value == 0) {
04752     copy_opt_map_info(now, alt);
04753     return ;
04754   }
04755 
04756   v1 = z / now->value;
04757   v2 = z / alt->value;
04758   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04759     copy_opt_map_info(now, alt);
04760 }
04761 
04762 static int
04763 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04764 {
04765 #define COMP_EM_BASE  20
04766   int ve, vm;
04767 
04768   if (m->value <= 0) return -1;
04769 
04770   ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
04771   vm = COMP_EM_BASE * 5 * 2 / m->value;
04772   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04773 }
04774 
04775 static void
04776 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04777 {
04778   int i, val;
04779 
04780   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
04781   if (to->value == 0) return ;
04782   if (add->value == 0 || to->mmd.max < add->mmd.min) {
04783     clear_opt_map_info(to);
04784     return ;
04785   }
04786 
04787   alt_merge_mml(&to->mmd, &add->mmd);
04788 
04789   val = 0;
04790   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04791     if (add->map[i])
04792       to->map[i] = 1;
04793 
04794     if (to->map[i])
04795       val += map_position_value(enc, i);
04796   }
04797   to->value = val;
04798 
04799   alt_merge_opt_anc_info(&to->anc, &add->anc);
04800 }
04801 
04802 static void
04803 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04804 {
04805   copy_mml(&(opt->exb.mmd),  mmd);
04806   copy_mml(&(opt->expr.mmd), mmd);
04807   copy_mml(&(opt->map.mmd),  mmd);
04808 }
04809 
04810 static void
04811 clear_node_opt_info(NodeOptInfo* opt)
04812 {
04813   clear_mml(&opt->len);
04814   clear_opt_anc_info(&opt->anc);
04815   clear_opt_exact_info(&opt->exb);
04816   clear_opt_exact_info(&opt->exm);
04817   clear_opt_exact_info(&opt->expr);
04818   clear_opt_map_info(&opt->map);
04819 }
04820 
04821 static void
04822 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04823 {
04824   *to = *from;
04825 }
04826 
04827 static void
04828 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04829 {
04830   int exb_reach, exm_reach;
04831   OptAncInfo tanc;
04832 
04833   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04834   copy_opt_anc_info(&to->anc, &tanc);
04835 
04836   if (add->exb.len > 0 && to->len.max == 0) {
04837     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04838                         to->len.max, add->len.max);
04839     copy_opt_anc_info(&add->exb.anc, &tanc);
04840   }
04841 
04842   if (add->map.value > 0 && to->len.max == 0) {
04843     if (add->map.mmd.max == 0)
04844       add->map.anc.left_anchor |= to->anc.left_anchor;
04845   }
04846 
04847   exb_reach = to->exb.reach_end;
04848   exm_reach = to->exm.reach_end;
04849 
04850   if (add->len.max != 0)
04851     to->exb.reach_end = to->exm.reach_end = 0;
04852 
04853   if (add->exb.len > 0) {
04854     if (exb_reach) {
04855       concat_opt_exact_info(&to->exb, &add->exb, enc);
04856       clear_opt_exact_info(&add->exb);
04857     }
04858     else if (exm_reach) {
04859       concat_opt_exact_info(&to->exm, &add->exb, enc);
04860       clear_opt_exact_info(&add->exb);
04861     }
04862   }
04863   select_opt_exact_info(enc, &to->exm, &add->exb);
04864   select_opt_exact_info(enc, &to->exm, &add->exm);
04865 
04866   if (to->expr.len > 0) {
04867     if (add->len.max > 0) {
04868       if (to->expr.len > (int )add->len.max)
04869         to->expr.len = (int )add->len.max;
04870 
04871       if (to->expr.mmd.max == 0)
04872         select_opt_exact_info(enc, &to->exb, &to->expr);
04873       else
04874         select_opt_exact_info(enc, &to->exm, &to->expr);
04875     }
04876   }
04877   else if (add->expr.len > 0) {
04878     copy_opt_exact_info(&to->expr, &add->expr);
04879   }
04880 
04881   select_opt_map_info(&to->map, &add->map);
04882 
04883   add_mml(&to->len, &add->len);
04884 }
04885 
04886 static void
04887 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04888 {
04889   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
04890   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
04891   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
04892   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04893   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
04894 
04895   alt_merge_mml(&to->len, &add->len);
04896 }
04897 
04898 
04899 #define MAX_NODE_OPT_INFO_REF_COUNT    5
04900 
04901 static int
04902 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04903 {
04904   int type;
04905   int r = 0;
04906 
04907   clear_node_opt_info(opt);
04908   set_bound_node_opt_info(opt, &env->mmd);
04909 
04910   type = NTYPE(node);
04911   switch (type) {
04912   case NT_LIST:
04913     {
04914       OptEnv nenv;
04915       NodeOptInfo nopt;
04916       Node* nd = node;
04917 
04918       copy_opt_env(&nenv, env);
04919       do {
04920         r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04921         if (r == 0) {
04922           add_mml(&nenv.mmd, &nopt.len);
04923           concat_left_node_opt_info(env->enc, opt, &nopt);
04924         }
04925       } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04926     }
04927     break;
04928 
04929   case NT_ALT:
04930     {
04931       NodeOptInfo nopt;
04932       Node* nd = node;
04933 
04934       do {
04935         r = optimize_node_left(NCAR(nd), &nopt, env);
04936         if (r == 0) {
04937           if (nd == node) copy_node_opt_info(opt, &nopt);
04938           else            alt_merge_node_opt_info(opt, &nopt, env);
04939         }
04940       } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04941     }
04942     break;
04943 
04944   case NT_STR:
04945     {
04946       StrNode* sn = NSTR(node);
04947       OnigDistance slen = sn->end - sn->s;
04948       int is_raw = NSTRING_IS_RAW(node);
04949 
04950       if (! NSTRING_IS_AMBIG(node)) {
04951         concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04952                                   is_raw, env->enc);
04953         opt->exb.ignore_case = 0;
04954         if (slen > 0) {
04955           add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04956         }
04957         set_mml(&opt->len, slen, slen);
04958       }
04959       else {
04960         OnigDistance max;
04961 
04962         if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04963           int n = onigenc_strlen(env->enc, sn->s, sn->end);
04964           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04965         }
04966         else {
04967           concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04968                                     is_raw, env->enc);
04969           opt->exb.ignore_case = 1;
04970 
04971           if (slen > 0) {
04972             r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04973                                           env->enc, env->case_fold_flag);
04974             if (r != 0) break;
04975           }
04976 
04977           max = slen;
04978         }
04979 
04980         set_mml(&opt->len, slen, max);
04981       }
04982 
04983       if ((OnigDistance )opt->exb.len == slen)
04984         opt->exb.reach_end = 1;
04985     }
04986     break;
04987 
04988   case NT_CCLASS:
04989     {
04990       int i, z;
04991       CClassNode* cc = NCCLASS(node);
04992 
04993       /* no need to check ignore case. (set in setup_tree()) */
04994 
04995       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
04996         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04997         OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04998 
04999         set_mml(&opt->len, min, max);
05000       }
05001       else {
05002         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
05003           z = BITSET_AT(cc->bs, i);
05004           if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
05005             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05006           }
05007         }
05008         set_mml(&opt->len, 1, 1);
05009       }
05010     }
05011     break;
05012 
05013   case NT_CTYPE:
05014     {
05015       int i, min, max;
05016       int maxcode;
05017 
05018       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
05019 
05020       if (max == 1) {
05021         min = 1;
05022 
05023         maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
05024         switch (NCTYPE(node)->ctype) {
05025         case ONIGENC_CTYPE_WORD:
05026           if (NCTYPE(node)->not != 0) {
05027             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
05028               if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
05029                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05030               }
05031             }
05032           }
05033           else {
05034             for (i = 0; i < maxcode; i++) {
05035               if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
05036                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
05037               }
05038             }
05039           }
05040           break;
05041         }
05042       }
05043       else {
05044         min = ONIGENC_MBC_MINLEN(env->enc);
05045       }
05046       set_mml(&opt->len, min, max);
05047     }
05048     break;
05049 
05050   case NT_CANY:
05051     {
05052       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
05053       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
05054       set_mml(&opt->len, min, max);
05055     }
05056     break;
05057 
05058   case NT_ANCHOR:
05059     switch (NANCHOR(node)->type) {
05060     case ANCHOR_BEGIN_BUF:
05061     case ANCHOR_BEGIN_POSITION:
05062     case ANCHOR_BEGIN_LINE:
05063     case ANCHOR_END_BUF:
05064     case ANCHOR_SEMI_END_BUF:
05065     case ANCHOR_END_LINE:
05066     case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
05067       add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
05068       break;
05069 
05070     case ANCHOR_PREC_READ:
05071       {
05072         NodeOptInfo nopt;
05073 
05074         r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
05075         if (r == 0) {
05076           if (nopt.exb.len > 0)
05077             copy_opt_exact_info(&opt->expr, &nopt.exb);
05078           else if (nopt.exm.len > 0)
05079             copy_opt_exact_info(&opt->expr, &nopt.exm);
05080 
05081           opt->expr.reach_end = 0;
05082 
05083           if (nopt.map.value > 0)
05084             copy_opt_map_info(&opt->map, &nopt.map);
05085         }
05086       }
05087       break;
05088 
05089     case ANCHOR_PREC_READ_NOT:
05090     case ANCHOR_LOOK_BEHIND_NOT:
05091       break;
05092     }
05093     break;
05094 
05095   case NT_BREF:
05096     {
05097       int i;
05098       int* backs;
05099       OnigDistance min, max, tmin, tmax;
05100       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
05101       BRefNode* br = NBREF(node);
05102 
05103       if (br->state & NST_RECURSION) {
05104         set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
05105         break;
05106       }
05107       backs = BACKREFS_P(br);
05108       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
05109       if (r != 0) break;
05110       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
05111       if (r != 0) break;
05112       for (i = 1; i < br->back_num; i++) {
05113         r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
05114         if (r != 0) break;
05115         r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
05116         if (r != 0) break;
05117         if (min > tmin) min = tmin;
05118         if (max < tmax) max = tmax;
05119       }
05120       if (r == 0) set_mml(&opt->len, min, max);
05121     }
05122     break;
05123 
05124 #ifdef USE_SUBEXP_CALL
05125   case NT_CALL:
05126     if (IS_CALL_RECURSION(NCALL(node)))
05127       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
05128     else {
05129       OnigOptionType save = env->options;
05130       env->options = NENCLOSE(NCALL(node)->target)->option;
05131       r = optimize_node_left(NCALL(node)->target, opt, env);
05132       env->options = save;
05133     }
05134     break;
05135 #endif
05136 
05137   case NT_QTFR:
05138     {
05139       int i;
05140       OnigDistance min, max;
05141       NodeOptInfo nopt;
05142       QtfrNode* qn = NQTFR(node);
05143 
05144       r = optimize_node_left(qn->target, &nopt, env);
05145       if (r) break;
05146 
05147       if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
05148         if (env->mmd.max == 0 &&
05149             NTYPE(qn->target) == NT_CANY && qn->greedy) {
05150           if (IS_MULTILINE(env->options))
05151             /* implicit anchor: /.*a/ ==> /\A.*a/ */
05152             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
05153           else
05154             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
05155         }
05156       }
05157       else {
05158         if (qn->lower > 0) {
05159           copy_node_opt_info(opt, &nopt);
05160           if (nopt.exb.len > 0) {
05161             if (nopt.exb.reach_end) {
05162               for (i = 2; i <= qn->lower &&
05163                           ! is_full_opt_exact_info(&opt->exb); i++) {
05164                 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
05165               }
05166               if (i < qn->lower) {
05167                 opt->exb.reach_end = 0;
05168               }
05169             }
05170           }
05171 
05172           if (qn->lower != qn->upper) {
05173             opt->exb.reach_end = 0;
05174             opt->exm.reach_end = 0;
05175           }
05176           if (qn->lower > 1)
05177             opt->exm.reach_end = 0;
05178         }
05179       }
05180 
05181       min = distance_multiply(nopt.len.min, qn->lower);
05182       if (IS_REPEAT_INFINITE(qn->upper))
05183         max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
05184       else
05185         max = distance_multiply(nopt.len.max, qn->upper);
05186 
05187       set_mml(&opt->len, min, max);
05188     }
05189     break;
05190 
05191   case NT_ENCLOSE:
05192     {
05193       EncloseNode* en = NENCLOSE(node);
05194 
05195       switch (en->type) {
05196       case ENCLOSE_OPTION:
05197         {
05198           OnigOptionType save = env->options;
05199 
05200           env->options = en->option;
05201           r = optimize_node_left(en->target, opt, env);
05202           env->options = save;
05203         }
05204         break;
05205 
05206       case ENCLOSE_MEMORY:
05207 #ifdef USE_SUBEXP_CALL
05208         en->opt_count++;
05209         if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
05210           OnigDistance min, max;
05211 
05212           min = 0;
05213           max = ONIG_INFINITE_DISTANCE;
05214           if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
05215           if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
05216           set_mml(&opt->len, min, max);
05217         }
05218         else
05219 #endif
05220         {
05221           r = optimize_node_left(en->target, opt, env);
05222 
05223           if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
05224             if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
05225               remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
05226           }
05227         }
05228         break;
05229 
05230       case ENCLOSE_STOP_BACKTRACK:
05231       case ENCLOSE_CONDITION:
05232         r = optimize_node_left(en->target, opt, env);
05233         break;
05234       }
05235     }
05236     break;
05237 
05238   default:
05239 #ifdef ONIG_DEBUG
05240     if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
05241             NTYPE(node));
05242 #endif
05243     r = ONIGERR_TYPE_BUG;
05244     break;
05245   }
05246 
05247   return r;
05248 }
05249 
05250 static int
05251 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
05252 {
05253   int r;
05254   int allow_reverse;
05255 
05256   if (e->len == 0) return 0;
05257 
05258   reg->exact = (UChar* )xmalloc(e->len);
05259   CHECK_NULL_RETURN_MEMERR(reg->exact);
05260   xmemcpy(reg->exact, e->s, e->len);
05261   reg->exact_end = reg->exact + e->len;
05262 
05263   allow_reverse =
05264         ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
05265 
05266   if (e->ignore_case > 0) {
05267     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
05268       r = set_bm_skip(reg->exact, reg->exact_end, reg,
05269                       reg->map, &(reg->int_map), 1);
05270       if (r == 0) {
05271         reg->optimize = (allow_reverse != 0
05272                          ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
05273       }
05274       else {
05275         reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
05276       }
05277     }
05278     else {
05279       reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
05280     }
05281   }
05282   else {
05283     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
05284       r = set_bm_skip(reg->exact, reg->exact_end, reg,
05285                       reg->map, &(reg->int_map), 0);
05286       if (r) return r;
05287 
05288       reg->optimize = (allow_reverse != 0
05289                        ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
05290     }
05291     else {
05292       reg->optimize = ONIG_OPTIMIZE_EXACT;
05293     }
05294   }
05295 
05296   reg->dmin = e->mmd.min;
05297   reg->dmax = e->mmd.max;
05298 
05299   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
05300     reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
05301   }
05302 
05303   return 0;
05304 }
05305 
05306 static void
05307 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
05308 {
05309   int i;
05310 
05311   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05312     reg->map[i] = m->map[i];
05313 
05314   reg->optimize   = ONIG_OPTIMIZE_MAP;
05315   reg->dmin       = m->mmd.min;
05316   reg->dmax       = m->mmd.max;
05317 
05318   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
05319     reg->threshold_len = (int )(reg->dmin + 1);
05320   }
05321 }
05322 
05323 static void
05324 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
05325 {
05326   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
05327   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
05328 }
05329 
05330 #ifdef ONIG_DEBUG
05331 static void print_optimize_info(FILE* f, regex_t* reg);
05332 #endif
05333 
05334 static int
05335 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
05336 {
05337 
05338   int r;
05339   NodeOptInfo opt;
05340   OptEnv env;
05341 
05342   env.enc            = reg->enc;
05343   env.options        = reg->options;
05344   env.case_fold_flag = reg->case_fold_flag;
05345   env.scan_env   = scan_env;
05346   clear_mml(&env.mmd);
05347 
05348   r = optimize_node_left(node, &opt, &env);
05349   if (r) return r;
05350 
05351   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05352         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
05353         ANCHOR_LOOK_BEHIND);
05354 
05355   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
05356 
05357   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05358     reg->anchor_dmin = opt.len.min;
05359     reg->anchor_dmax = opt.len.max;
05360   }
05361 
05362   if (opt.exb.len > 0 || opt.exm.len > 0) {
05363     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05364     if (opt.map.value > 0 &&
05365         comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05366       goto set_map;
05367     }
05368     else {
05369       r = set_optimize_exact_info(reg, &opt.exb);
05370       set_sub_anchor(reg, &opt.exb.anc);
05371     }
05372   }
05373   else if (opt.map.value > 0) {
05374   set_map:
05375     set_optimize_map_info(reg, &opt.map);
05376     set_sub_anchor(reg, &opt.map.anc);
05377   }
05378   else {
05379     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05380     if (opt.len.max == 0)
05381       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05382   }
05383 
05384 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05385   if (!onig_is_prelude()) print_optimize_info(stderr, reg);
05386 #endif
05387   return r;
05388 }
05389 
05390 static void
05391 clear_optimize_info(regex_t* reg)
05392 {
05393   reg->optimize      = ONIG_OPTIMIZE_NONE;
05394   reg->anchor        = 0;
05395   reg->anchor_dmin   = 0;
05396   reg->anchor_dmax   = 0;
05397   reg->sub_anchor    = 0;
05398   reg->exact_end     = (UChar* )NULL;
05399   reg->threshold_len = 0;
05400   if (IS_NOT_NULL(reg->exact)) {
05401     xfree(reg->exact);
05402     reg->exact = (UChar* )NULL;
05403   }
05404 }
05405 
05406 #ifdef ONIG_DEBUG
05407 
05408 static void print_enc_string(FILE* fp, OnigEncoding enc,
05409                              const UChar *s, const UChar *end)
05410 {
05411   fprintf(fp, "\nPATTERN: /");
05412 
05413   if (ONIGENC_MBC_MINLEN(enc) > 1) {
05414     const UChar *p;
05415     OnigCodePoint code;
05416 
05417     p = s;
05418     while (p < end) {
05419       code = ONIGENC_MBC_TO_CODE(enc, p, end);
05420       if (code >= 0x80) {
05421         fprintf(fp, " 0x%04x ", (int )code);
05422       }
05423       else {
05424         fputc((int )code, fp);
05425       }
05426 
05427       p += enclen(enc, p, end);
05428     }
05429   }
05430   else {
05431     while (s < end) {
05432       fputc((int )*s, fp);
05433       s++;
05434     }
05435   }
05436 
05437   fprintf(fp, "/ (%s)\n", enc->name);
05438 }
05439 
05440 static void
05441 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05442 {
05443   if (a == ONIG_INFINITE_DISTANCE)
05444     fputs("inf", f);
05445   else
05446     fprintf(f, "(%"PRIuSIZE")", a);
05447 
05448   fputs("-", f);
05449 
05450   if (b == ONIG_INFINITE_DISTANCE)
05451     fputs("inf", f);
05452   else
05453     fprintf(f, "(%"PRIuSIZE")", b);
05454 }
05455 
05456 static void
05457 print_anchor(FILE* f, int anchor)
05458 {
05459   int q = 0;
05460 
05461   fprintf(f, "[");
05462 
05463   if (anchor & ANCHOR_BEGIN_BUF) {
05464     fprintf(f, "begin-buf");
05465     q = 1;
05466   }
05467   if (anchor & ANCHOR_BEGIN_LINE) {
05468     if (q) fprintf(f, ", ");
05469     q = 1;
05470     fprintf(f, "begin-line");
05471   }
05472   if (anchor & ANCHOR_BEGIN_POSITION) {
05473     if (q) fprintf(f, ", ");
05474     q = 1;
05475     fprintf(f, "begin-pos");
05476   }
05477   if (anchor & ANCHOR_END_BUF) {
05478     if (q) fprintf(f, ", ");
05479     q = 1;
05480     fprintf(f, "end-buf");
05481   }
05482   if (anchor & ANCHOR_SEMI_END_BUF) {
05483     if (q) fprintf(f, ", ");
05484     q = 1;
05485     fprintf(f, "semi-end-buf");
05486   }
05487   if (anchor & ANCHOR_END_LINE) {
05488     if (q) fprintf(f, ", ");
05489     q = 1;
05490     fprintf(f, "end-line");
05491   }
05492   if (anchor & ANCHOR_ANYCHAR_STAR) {
05493     if (q) fprintf(f, ", ");
05494     q = 1;
05495     fprintf(f, "anychar-star");
05496   }
05497   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05498     if (q) fprintf(f, ", ");
05499     fprintf(f, "anychar-star-ml");
05500   }
05501 
05502   fprintf(f, "]");
05503 }
05504 
05505 static void
05506 print_optimize_info(FILE* f, regex_t* reg)
05507 {
05508   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05509                               "EXACT_IC", "MAP",
05510                               "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
05511 
05512   fprintf(f, "optimize: %s\n", on[reg->optimize]);
05513   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
05514   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05515     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05516   fprintf(f, "\n");
05517 
05518   if (reg->optimize) {
05519     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
05520     fprintf(f, "\n");
05521   }
05522   fprintf(f, "\n");
05523 
05524   if (reg->exact) {
05525     UChar *p;
05526     fprintf(f, "exact: [");
05527     for (p = reg->exact; p < reg->exact_end; p++) {
05528       fputc(*p, f);
05529     }
05530     fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
05531   }
05532   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05533     int c, i, n = 0;
05534 
05535     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05536       if (reg->map[i]) n++;
05537 
05538     fprintf(f, "map: n=%d\n", n);
05539     if (n > 0) {
05540       c = 0;
05541       fputc('[', f);
05542       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05543         if (reg->map[i] != 0) {
05544           if (c > 0)  fputs(", ", f);
05545           c++;
05546           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05547               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05548             fputc(i, f);
05549           else
05550             fprintf(f, "%d", i);
05551         }
05552       }
05553       fprintf(f, "]\n");
05554     }
05555   }
05556 }
05557 #endif /* ONIG_DEBUG */
05558 
05559 
05560 extern void
05561 onig_free_body(regex_t* reg)
05562 {
05563   if (IS_NOT_NULL(reg)) {
05564     if (IS_NOT_NULL(reg->p))                xfree(reg->p);
05565     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
05566     if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
05567     if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05568     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
05569     if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
05570 
05571 #ifdef USE_NAMED_GROUP
05572     onig_names_free(reg);
05573 #endif
05574   }
05575 }
05576 
05577 extern void
05578 onig_free(regex_t* reg)
05579 {
05580   if (IS_NOT_NULL(reg)) {
05581     onig_free_body(reg);
05582     xfree(reg);
05583   }
05584 }
05585 
05586 size_t
05587 onig_memsize(const regex_t *reg)
05588 {
05589     size_t size = sizeof(regex_t);
05590     if (IS_NULL(reg)) return 0;
05591     if (IS_NOT_NULL(reg->p))                size += reg->alloc;
05592     if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
05593     if (IS_NOT_NULL(reg->int_map))          size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05594     if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05595     if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05596     if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
05597 
05598     return size;
05599 }
05600 
05601 size_t
05602 onig_region_memsize(const OnigRegion *regs)
05603 {
05604     size_t size = sizeof(*regs);
05605     if (IS_NULL(regs)) return 0;
05606     size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
05607     return size;
05608 }
05609 
05610 #define REGEX_TRANSFER(to,from) do {\
05611   (to)->state = ONIG_STATE_MODIFY;\
05612   onig_free_body(to);\
05613   xmemcpy(to, from, sizeof(regex_t));\
05614   xfree(from);\
05615 } while (0)
05616 
05617 extern void
05618 onig_transfer(regex_t* to, regex_t* from)
05619 {
05620   THREAD_ATOMIC_START;
05621   REGEX_TRANSFER(to, from);
05622   THREAD_ATOMIC_END;
05623 }
05624 
05625 #define REGEX_CHAIN_HEAD(reg) do {\
05626   while (IS_NOT_NULL((reg)->chain)) {\
05627     (reg) = (reg)->chain;\
05628   }\
05629 } while (0)
05630 
05631 extern void
05632 onig_chain_link_add(regex_t* to, regex_t* add)
05633 {
05634   THREAD_ATOMIC_START;
05635   REGEX_CHAIN_HEAD(to);
05636   to->chain = add;
05637   THREAD_ATOMIC_END;
05638 }
05639 
05640 extern void
05641 onig_chain_reduce(regex_t* reg)
05642 {
05643   regex_t *head, *prev;
05644 
05645   prev = reg;
05646   head = prev->chain;
05647   if (IS_NOT_NULL(head)) {
05648     reg->state = ONIG_STATE_MODIFY;
05649     while (IS_NOT_NULL(head->chain)) {
05650       prev = head;
05651       head = head->chain;
05652     }
05653     prev->chain = (regex_t* )NULL;
05654     REGEX_TRANSFER(reg, head);
05655   }
05656 }
05657 
05658 #ifdef ONIG_DEBUG
05659 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05660 #endif
05661 #ifdef ONIG_DEBUG_PARSE_TREE
05662 static void print_tree P_((FILE* f, Node* node));
05663 #endif
05664 
05665 extern int
05666 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05667               OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05668 {
05669 #define COMPILE_INIT_SIZE  20
05670 
05671   int r;
05672   OnigDistance init_size;
05673   Node*  root;
05674   ScanEnv  scan_env = {0};
05675 #ifdef USE_SUBEXP_CALL
05676   UnsetAddrList  uslist;
05677 #endif
05678 
05679   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05680 
05681   scan_env.sourcefile = sourcefile;
05682   scan_env.sourceline = sourceline;
05683   reg->state = ONIG_STATE_COMPILING;
05684 
05685 #ifdef ONIG_DEBUG
05686   if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
05687 #endif
05688 
05689   if (reg->alloc == 0) {
05690     init_size = (pattern_end - pattern) * 2;
05691     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05692     r = BBUF_INIT(reg, init_size);
05693     if (r != 0) goto end;
05694   }
05695   else
05696     reg->used = 0;
05697 
05698   reg->num_mem            = 0;
05699   reg->num_repeat         = 0;
05700   reg->num_null_check     = 0;
05701   reg->repeat_range_alloc = 0;
05702   reg->repeat_range       = (OnigRepeatRange* )NULL;
05703 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05704   reg->num_comb_exp_check = 0;
05705 #endif
05706 
05707   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05708   if (r != 0) goto err;
05709 
05710 #ifdef ONIG_DEBUG_PARSE_TREE
05711 # if 0
05712   fprintf(stderr, "ORIGINAL PARSE TREE:\n");
05713   if (!onig_is_prelude()) {
05714     print_tree(stderr, root);
05715   }
05716 # endif
05717 #endif
05718 
05719 #ifdef USE_NAMED_GROUP
05720   /* mixed use named group and no-named group */
05721   if (scan_env.num_named > 0 &&
05722       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05723       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05724     if (scan_env.num_named != scan_env.num_mem)
05725       r = disable_noname_group_capture(&root, reg, &scan_env);
05726     else
05727       r = numbered_ref_check(root);
05728 
05729     if (r != 0) goto err;
05730   }
05731 #endif
05732 
05733 #ifdef USE_SUBEXP_CALL
05734   if (scan_env.num_call > 0) {
05735     r = unset_addr_list_init(&uslist, scan_env.num_call);
05736     if (r != 0) goto err;
05737     scan_env.unset_addr_list = &uslist;
05738     r = setup_subexp_call(root, &scan_env);
05739     if (r != 0) goto err_unset;
05740     r = subexp_recursive_check_trav(root, &scan_env);
05741     if (r  < 0) goto err_unset;
05742     r = subexp_inf_recursive_check_trav(root, &scan_env);
05743     if (r != 0) goto err_unset;
05744 
05745     reg->num_call = scan_env.num_call;
05746   }
05747   else
05748     reg->num_call = 0;
05749 #endif
05750 
05751   r = setup_tree(root, reg, IN_ROOT, &scan_env);
05752   if (r != 0) goto err_unset;
05753 
05754 #ifdef ONIG_DEBUG_PARSE_TREE
05755   if (!onig_is_prelude()) print_tree(stderr, root);
05756 #endif
05757 
05758   reg->capture_history  = scan_env.capture_history;
05759   reg->bt_mem_start     = scan_env.bt_mem_start;
05760   reg->bt_mem_start    |= reg->capture_history;
05761   if (IS_FIND_CONDITION(reg->options))
05762     BIT_STATUS_ON_ALL(reg->bt_mem_end);
05763   else {
05764     reg->bt_mem_end  = scan_env.bt_mem_end;
05765     reg->bt_mem_end |= reg->capture_history;
05766   }
05767 
05768 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05769   if (scan_env.backrefed_mem == 0
05770 #ifdef USE_SUBEXP_CALL
05771       || scan_env.num_call == 0
05772 #endif
05773       ) {
05774     setup_comb_exp_check(root, 0, &scan_env);
05775 #ifdef USE_SUBEXP_CALL
05776     if (scan_env.has_recursion != 0) {
05777       scan_env.num_comb_exp_check = 0;
05778     }
05779     else
05780 #endif
05781     if (scan_env.comb_exp_max_regnum > 0) {
05782       int i;
05783       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05784         if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05785           scan_env.num_comb_exp_check = 0;
05786           break;
05787         }
05788       }
05789     }
05790   }
05791 
05792   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05793 #endif
05794 
05795   clear_optimize_info(reg);
05796 #ifndef ONIG_DONT_OPTIMIZE
05797   r = set_optimize_info_from_tree(root, reg, &scan_env);
05798   if (r != 0) goto err_unset;
05799 #endif
05800 
05801   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05802     xfree(scan_env.mem_nodes_dynamic);
05803     scan_env.mem_nodes_dynamic = (Node** )NULL;
05804   }
05805 
05806   r = compile_tree(root, reg);
05807   if (r == 0) {
05808     r = add_opcode(reg, OP_END);
05809 #ifdef USE_SUBEXP_CALL
05810     if (scan_env.num_call > 0) {
05811       r = unset_addr_list_fix(&uslist, reg);
05812       unset_addr_list_end(&uslist);
05813       if (r) goto err;
05814     }
05815 #endif
05816 
05817     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05818       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05819     else {
05820       if (reg->bt_mem_start != 0)
05821         reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05822       else
05823         reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05824     }
05825   }
05826 #ifdef USE_SUBEXP_CALL
05827   else if (scan_env.num_call > 0) {
05828     unset_addr_list_end(&uslist);
05829   }
05830 #endif
05831   onig_node_free(root);
05832 
05833 #ifdef ONIG_DEBUG_COMPILE
05834 #ifdef USE_NAMED_GROUP
05835   if (!onig_is_prelude()) onig_print_names(stderr, reg);
05836 #endif
05837   if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
05838 #endif
05839 
05840  end:
05841   reg->state = ONIG_STATE_NORMAL;
05842   return r;
05843 
05844  err_unset:
05845 #ifdef USE_SUBEXP_CALL
05846   if (scan_env.num_call > 0) {
05847     unset_addr_list_end(&uslist);
05848   }
05849 #endif
05850  err:
05851   if (IS_NOT_NULL(scan_env.error)) {
05852     if (IS_NOT_NULL(einfo)) {
05853       einfo->enc     = scan_env.enc;
05854       einfo->par     = scan_env.error;
05855       einfo->par_end = scan_env.error_end;
05856     }
05857   }
05858 
05859   onig_node_free(root);
05860   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05861       xfree(scan_env.mem_nodes_dynamic);
05862   return r;
05863 }
05864 
05865 #ifdef USE_RECOMPILE_API
05866 extern int
05867 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05868             OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05869             OnigErrorInfo* einfo)
05870 {
05871   int r;
05872   regex_t *new_reg;
05873 
05874   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05875   if (r) return r;
05876   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05877     onig_transfer(reg, new_reg);
05878   }
05879   else {
05880     onig_chain_link_add(reg, new_reg);
05881   }
05882   return 0;
05883 }
05884 #endif
05885 
05886 static int onig_inited = 0;
05887 
05888 extern int
05889 onig_reg_init(regex_t* reg, OnigOptionType option,
05890               OnigCaseFoldType case_fold_flag,
05891               OnigEncoding enc, const OnigSyntaxType* syntax)
05892 {
05893   if (! onig_inited)
05894     onig_init();
05895 
05896   if (IS_NULL(reg))
05897     return ONIGERR_INVALID_ARGUMENT;
05898 
05899   if (ONIGENC_IS_UNDEF(enc))
05900     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
05901 
05902   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05903       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05904     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05905   }
05906 
05907   (reg)->state = ONIG_STATE_MODIFY;
05908 
05909   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05910     option |= syntax->options;
05911     option &= ~ONIG_OPTION_SINGLELINE;
05912   }
05913   else
05914     option |= syntax->options;
05915 
05916   (reg)->enc              = enc;
05917   (reg)->options          = option;
05918   (reg)->syntax           = syntax;
05919   (reg)->optimize         = 0;
05920   (reg)->exact            = (UChar* )NULL;
05921   (reg)->int_map          = (int* )NULL;
05922   (reg)->int_map_backward = (int* )NULL;
05923   (reg)->chain            = (regex_t* )NULL;
05924 
05925   (reg)->p                = (UChar* )NULL;
05926   (reg)->alloc            = 0;
05927   (reg)->used             = 0;
05928   (reg)->name_table       = (void* )NULL;
05929 
05930   (reg)->case_fold_flag   = case_fold_flag;
05931   return 0;
05932 }
05933 
05934 extern int
05935 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05936           const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05937           OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05938 {
05939   int r;
05940 
05941   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05942   if (r) return r;
05943 
05944   r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05945   return r;
05946 }
05947 
05948 extern int
05949 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05950           OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05951           OnigErrorInfo* einfo)
05952 {
05953   int r;
05954 
05955   *reg = (regex_t* )xmalloc(sizeof(regex_t));
05956   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05957 
05958   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05959   if (r) goto err;
05960 
05961   r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05962   if (r) {
05963   err:
05964     onig_free(*reg);
05965     *reg = NULL;
05966   }
05967   return r;
05968 }
05969 
05970 
05971 extern int
05972 onig_init(void)
05973 {
05974   if (onig_inited != 0)
05975     return 0;
05976 
05977   THREAD_SYSTEM_INIT;
05978   THREAD_ATOMIC_START;
05979 
05980   onig_inited = 1;
05981 
05982   onigenc_init();
05983   /* onigenc_set_default_caseconv_table((UChar* )0); */
05984 
05985 #ifdef ONIG_DEBUG_STATISTICS
05986   onig_statistics_init();
05987 #endif
05988 
05989   THREAD_ATOMIC_END;
05990   return 0;
05991 }
05992 
05993 
05994 extern int
05995 onig_end(void)
05996 {
05997   THREAD_ATOMIC_START;
05998 
05999 #ifdef ONIG_DEBUG_STATISTICS
06000   if (!onig_is_prelude()) onig_print_statistics(stderr);
06001 #endif
06002 
06003 #ifdef USE_SHARED_CCLASS_TABLE
06004   onig_free_shared_cclass_table();
06005 #endif
06006 
06007 #ifdef USE_PARSE_TREE_NODE_RECYCLE
06008   onig_free_node_list();
06009 #endif
06010 
06011   onig_inited = 0;
06012 
06013   THREAD_ATOMIC_END;
06014   THREAD_SYSTEM_END;
06015   return 0;
06016 }
06017 
06018 extern int
06019 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
06020 {
06021   OnigCodePoint n, *data;
06022   OnigCodePoint low, high, x;
06023 
06024   GET_CODE_POINT(n, p);
06025   data = (OnigCodePoint* )p;
06026   data++;
06027 
06028   for (low = 0, high = n; low < high; ) {
06029     x = (low + high) >> 1;
06030     if (code > data[x * 2 + 1])
06031       low = x + 1;
06032     else
06033       high = x;
06034   }
06035 
06036   return ((low < n && code >= data[low * 2]) ? 1 : 0);
06037 }
06038 
06039 extern int
06040 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
06041 {
06042   int found;
06043 
06044   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
06045     if (IS_NULL(cc->mbuf)) {
06046       found = 0;
06047     }
06048     else {
06049       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
06050     }
06051   }
06052   else {
06053     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
06054   }
06055 
06056   if (IS_NCCLASS_NOT(cc))
06057     return !found;
06058   else
06059     return found;
06060 }
06061 
06062 extern int
06063 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
06064 {
06065   int len;
06066 
06067   if (ONIGENC_MBC_MINLEN(enc) > 1) {
06068     len = 2;
06069   }
06070   else {
06071     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
06072   }
06073   return onig_is_code_in_cc_len(len, code, cc);
06074 }
06075 
06076 
06077 #ifdef ONIG_DEBUG
06078 
06079 /* arguments type */
06080 #define ARG_SPECIAL     -1
06081 #define ARG_NON          0
06082 #define ARG_RELADDR      1
06083 #define ARG_ABSADDR      2
06084 #define ARG_LENGTH       3
06085 #define ARG_MEMNUM       4
06086 #define ARG_OPTION       5
06087 #define ARG_STATE_CHECK  6
06088 
06089 OnigOpInfoType OnigOpInfo[] = {
06090   { OP_FINISH,            "finish",          ARG_NON },
06091   { OP_END,               "end",             ARG_NON },
06092   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
06093   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
06094   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
06095   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
06096   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
06097   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
06098   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
06099   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
06100   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
06101   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
06102   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
06103   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
06104   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
06105   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
06106   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
06107   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
06108   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
06109   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
06110   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
06111   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
06112   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
06113   { OP_ANYCHAR,           "anychar",         ARG_NON },
06114   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
06115   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
06116   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
06117   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
06118   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
06119   { OP_WORD,                "word",            ARG_NON },
06120   { OP_NOT_WORD,            "not-word",        ARG_NON },
06121   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
06122   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
06123   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
06124   { OP_WORD_END,            "word-end",        ARG_NON },
06125   { OP_ASCII_WORD,          "ascii-word",           ARG_NON },
06126   { OP_NOT_ASCII_WORD,      "not-ascii-word",       ARG_NON },
06127   { OP_ASCII_WORD_BOUND,    "ascii-word-bound",     ARG_NON },
06128   { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
06129   { OP_ASCII_WORD_BEGIN,    "ascii-word-begin",     ARG_NON },
06130   { OP_ASCII_WORD_END,      "ascii-word-end",       ARG_NON },
06131   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
06132   { OP_END_BUF,             "end-buf",         ARG_NON },
06133   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
06134   { OP_END_LINE,            "end-line",        ARG_NON },
06135   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
06136   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
06137   { OP_BEGIN_POS_OR_LINE,   "begin-pos-or-line",    ARG_NON },
06138   { OP_BACKREF1,            "backref1",             ARG_NON },
06139   { OP_BACKREF2,            "backref2",             ARG_NON },
06140   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
06141   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
06142   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
06143   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
06144   { OP_BACKREF_WITH_LEVEL,  "backref_at_level",     ARG_SPECIAL },
06145   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
06146   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
06147   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
06148   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
06149   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
06150   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
06151   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
06152   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
06153   { OP_KEEP,                "keep",                 ARG_NON },
06154   { OP_FAIL,                "fail",                 ARG_NON },
06155   { OP_JUMP,                "jump",                 ARG_RELADDR },
06156   { OP_PUSH,                "push",                 ARG_RELADDR },
06157   { OP_POP,                 "pop",                  ARG_NON },
06158   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
06159   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
06160   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
06161   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
06162   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
06163   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
06164   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
06165   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
06166   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
06167   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
06168   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
06169   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
06170   { OP_PUSH_POS,             "push-pos",             ARG_NON },
06171   { OP_POP_POS,              "pop-pos",              ARG_NON },
06172   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
06173   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
06174   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
06175   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
06176   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
06177   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
06178   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
06179   { OP_CALL,                 "call",                 ARG_ABSADDR },
06180   { OP_RETURN,               "return",               ARG_NON },
06181   { OP_CONDITION,            "condition",            ARG_SPECIAL },
06182   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
06183   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
06184   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
06185   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
06186   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
06187     "state-check-anychar-ml*", ARG_STATE_CHECK },
06188   { -1, "", ARG_NON }
06189 };
06190 
06191 static const char*
06192 op2name(int opcode)
06193 {
06194   int i;
06195 
06196   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
06197     if (opcode == OnigOpInfo[i].opcode)
06198       return OnigOpInfo[i].name;
06199   }
06200   return "";
06201 }
06202 
06203 static int
06204 op2arg_type(int opcode)
06205 {
06206   int i;
06207 
06208   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
06209     if (opcode == OnigOpInfo[i].opcode)
06210       return OnigOpInfo[i].arg_type;
06211   }
06212   return ARG_SPECIAL;
06213 }
06214 
06215 static void
06216 Indent(FILE* f, int indent)
06217 {
06218   int i;
06219   for (i = 0; i < indent; i++) putc(' ', f);
06220 }
06221 
06222 static void
06223 p_string(FILE* f, int len, UChar* s)
06224 {
06225   fputs(":", f);
06226   while (len-- > 0) { fputc(*s++, f); }
06227 }
06228 
06229 static void
06230 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
06231 {
06232   int x = len * mb_len;
06233 
06234   fprintf(f, ":%d:", len);
06235   while (x-- > 0) { fputc(*s++, f); }
06236 }
06237 
06238 extern void
06239 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
06240                               OnigEncoding enc)
06241 {
06242   int i, n, arg_type;
06243   RelAddrType addr;
06244   LengthType len;
06245   MemNumType mem;
06246   StateCheckNumType scn;
06247   OnigCodePoint code;
06248   UChar *q;
06249 
06250   fprintf(f, "[%s", op2name(*bp));
06251   arg_type = op2arg_type(*bp);
06252   if (arg_type != ARG_SPECIAL) {
06253     bp++;
06254     switch (arg_type) {
06255     case ARG_NON:
06256       break;
06257     case ARG_RELADDR:
06258       GET_RELADDR_INC(addr, bp);
06259       fprintf(f, ":(%d)", addr);
06260       break;
06261     case ARG_ABSADDR:
06262       GET_ABSADDR_INC(addr, bp);
06263       fprintf(f, ":(%d)", addr);
06264       break;
06265     case ARG_LENGTH:
06266       GET_LENGTH_INC(len, bp);
06267       fprintf(f, ":%d", len);
06268       break;
06269     case ARG_MEMNUM:
06270       mem = *((MemNumType* )bp);
06271       bp += SIZE_MEMNUM;
06272       fprintf(f, ":%d", mem);
06273       break;
06274     case ARG_OPTION:
06275       {
06276         OnigOptionType option = *((OnigOptionType* )bp);
06277         bp += SIZE_OPTION;
06278         fprintf(f, ":%d", option);
06279       }
06280       break;
06281 
06282     case ARG_STATE_CHECK:
06283       scn = *((StateCheckNumType* )bp);
06284       bp += SIZE_STATE_CHECK_NUM;
06285       fprintf(f, ":%d", scn);
06286       break;
06287     }
06288   }
06289   else {
06290     switch (*bp++) {
06291     case OP_EXACT1:
06292     case OP_ANYCHAR_STAR_PEEK_NEXT:
06293     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
06294       p_string(f, 1, bp++); break;
06295     case OP_EXACT2:
06296       p_string(f, 2, bp); bp += 2; break;
06297     case OP_EXACT3:
06298       p_string(f, 3, bp); bp += 3; break;
06299     case OP_EXACT4:
06300       p_string(f, 4, bp); bp += 4; break;
06301     case OP_EXACT5:
06302       p_string(f, 5, bp); bp += 5; break;
06303     case OP_EXACTN:
06304       GET_LENGTH_INC(len, bp);
06305       p_len_string(f, len, 1, bp);
06306       bp += len;
06307       break;
06308 
06309     case OP_EXACTMB2N1:
06310       p_string(f, 2, bp); bp += 2; break;
06311     case OP_EXACTMB2N2:
06312       p_string(f, 4, bp); bp += 4; break;
06313     case OP_EXACTMB2N3:
06314       p_string(f, 6, bp); bp += 6; break;
06315     case OP_EXACTMB2N:
06316       GET_LENGTH_INC(len, bp);
06317       p_len_string(f, len, 2, bp);
06318       bp += len * 2;
06319       break;
06320     case OP_EXACTMB3N:
06321       GET_LENGTH_INC(len, bp);
06322       p_len_string(f, len, 3, bp);
06323       bp += len * 3;
06324       break;
06325     case OP_EXACTMBN:
06326       {
06327         int mb_len;
06328 
06329         GET_LENGTH_INC(mb_len, bp);
06330         GET_LENGTH_INC(len, bp);
06331         fprintf(f, ":%d:%d:", mb_len, len);
06332         n = len * mb_len;
06333         while (n-- > 0) { fputc(*bp++, f); }
06334       }
06335       break;
06336 
06337     case OP_EXACT1_IC:
06338       len = enclen(enc, bp, bpend);
06339       p_string(f, len, bp);
06340       bp += len;
06341       break;
06342     case OP_EXACTN_IC:
06343       GET_LENGTH_INC(len, bp);
06344       p_len_string(f, len, 1, bp);
06345       bp += len;
06346       break;
06347 
06348     case OP_CCLASS:
06349       n = bitset_on_num((BitSetRef )bp);
06350       bp += SIZE_BITSET;
06351       fprintf(f, ":%d", n);
06352       break;
06353 
06354     case OP_CCLASS_NOT:
06355       n = bitset_on_num((BitSetRef )bp);
06356       bp += SIZE_BITSET;
06357       fprintf(f, ":%d", n);
06358       break;
06359 
06360     case OP_CCLASS_MB:
06361     case OP_CCLASS_MB_NOT:
06362       GET_LENGTH_INC(len, bp);
06363       q = bp;
06364 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06365       ALIGNMENT_RIGHT(q);
06366 #endif
06367       GET_CODE_POINT(code, q);
06368       bp += len;
06369       fprintf(f, ":%d:%d", (int )code, len);
06370       break;
06371 
06372     case OP_CCLASS_MIX:
06373     case OP_CCLASS_MIX_NOT:
06374       n = bitset_on_num((BitSetRef )bp);
06375       bp += SIZE_BITSET;
06376       GET_LENGTH_INC(len, bp);
06377       q = bp;
06378 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
06379       ALIGNMENT_RIGHT(q);
06380 #endif
06381       GET_CODE_POINT(code, q);
06382       bp += len;
06383       fprintf(f, ":%d:%d:%d", n, (int )code, len);
06384       break;
06385 
06386     case OP_CCLASS_NODE:
06387       {
06388         CClassNode *cc;
06389 
06390         GET_POINTER_INC(cc, bp);
06391         n = bitset_on_num(cc->bs);
06392         fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
06393       }
06394       break;
06395 
06396     case OP_BACKREFN_IC:
06397       mem = *((MemNumType* )bp);
06398       bp += SIZE_MEMNUM;
06399       fprintf(f, ":%d", mem);
06400       break;
06401 
06402     case OP_BACKREF_MULTI_IC:
06403     case OP_BACKREF_MULTI:
06404       fputs(" ", f);
06405       GET_LENGTH_INC(len, bp);
06406       for (i = 0; i < len; i++) {
06407         GET_MEMNUM_INC(mem, bp);
06408         if (i > 0) fputs(", ", f);
06409         fprintf(f, "%d", mem);
06410       }
06411       break;
06412 
06413     case OP_BACKREF_WITH_LEVEL:
06414       {
06415         OnigOptionType option;
06416         LengthType level;
06417 
06418         GET_OPTION_INC(option, bp);
06419         fprintf(f, ":%d", option);
06420         GET_LENGTH_INC(level, bp);
06421         fprintf(f, ":%d", level);
06422 
06423         fputs(" ", f);
06424         GET_LENGTH_INC(len, bp);
06425         for (i = 0; i < len; i++) {
06426           GET_MEMNUM_INC(mem, bp);
06427           if (i > 0) fputs(", ", f);
06428           fprintf(f, "%d", mem);
06429         }
06430       }
06431       break;
06432 
06433     case OP_REPEAT:
06434     case OP_REPEAT_NG:
06435       {
06436         mem = *((MemNumType* )bp);
06437         bp += SIZE_MEMNUM;
06438         addr = *((RelAddrType* )bp);
06439         bp += SIZE_RELADDR;
06440         fprintf(f, ":%d:%d", mem, addr);
06441       }
06442       break;
06443 
06444     case OP_PUSH_OR_JUMP_EXACT1:
06445     case OP_PUSH_IF_PEEK_NEXT:
06446       addr = *((RelAddrType* )bp);
06447       bp += SIZE_RELADDR;
06448       fprintf(f, ":(%d)", addr);
06449       p_string(f, 1, bp);
06450       bp += 1;
06451       break;
06452 
06453     case OP_LOOK_BEHIND:
06454       GET_LENGTH_INC(len, bp);
06455       fprintf(f, ":%d", len);
06456       break;
06457 
06458     case OP_PUSH_LOOK_BEHIND_NOT:
06459       GET_RELADDR_INC(addr, bp);
06460       GET_LENGTH_INC(len, bp);
06461       fprintf(f, ":%d:(%d)", len, addr);
06462       break;
06463 
06464     case OP_STATE_CHECK_PUSH:
06465     case OP_STATE_CHECK_PUSH_OR_JUMP:
06466       scn = *((StateCheckNumType* )bp);
06467       bp += SIZE_STATE_CHECK_NUM;
06468       addr = *((RelAddrType* )bp);
06469       bp += SIZE_RELADDR;
06470       fprintf(f, ":%d:(%d)", scn, addr);
06471       break;
06472 
06473     case OP_CONDITION:
06474       GET_MEMNUM_INC(mem, bp);
06475       GET_RELADDR_INC(addr, bp);
06476       fprintf(f, ":%d:(%d)", mem, addr);
06477       break;
06478 
06479     default:
06480       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06481               *--bp);
06482     }
06483   }
06484   fputs("]", f);
06485   if (nextp) *nextp = bp;
06486 }
06487 
06488 static void
06489 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06490 {
06491   int ncode;
06492   UChar* bp = reg->p;
06493   UChar* end = reg->p + reg->used;
06494 
06495   fprintf(f, "code length: %d", reg->used);
06496 
06497   ncode = -1;
06498   while (bp < end) {
06499     ncode++;
06500     if (ncode % 5 == 0)
06501       fprintf(f, "\n%ld:", bp - reg->p);
06502     else
06503       fprintf(f, " %ld:", bp - reg->p);
06504     onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06505   }
06506 
06507   fprintf(f, "\n");
06508 }
06509 
06510 static void
06511 print_indent_tree(FILE* f, Node* node, int indent)
06512 {
06513   int i, type, container_p = 0;
06514   int add = 3;
06515   UChar* p;
06516 
06517   Indent(f, indent);
06518   if (IS_NULL(node)) {
06519     fprintf(f, "ERROR: null node!!!\n");
06520     exit (0);
06521   }
06522 
06523   type = NTYPE(node);
06524   switch (type) {
06525   case NT_LIST:
06526   case NT_ALT:
06527     if (NTYPE(node) == NT_LIST)
06528       fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
06529     else
06530       fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
06531 
06532     print_indent_tree(f, NCAR(node), indent + add);
06533     while (IS_NOT_NULL(node = NCDR(node))) {
06534       if (NTYPE(node) != type) {
06535         fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06536         exit(0);
06537       }
06538       print_indent_tree(f, NCAR(node), indent + add);
06539     }
06540     break;
06541 
06542   case NT_STR:
06543     fprintf(f, "<string%s:%"PRIxPTR">",
06544             (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
06545     for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06546       if (*p >= 0x20 && *p < 0x7f)
06547         fputc(*p, f);
06548       else {
06549         fprintf(f, " 0x%02x", *p);
06550       }
06551     }
06552     break;
06553 
06554   case NT_CCLASS:
06555     fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
06556     if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06557     if (NCCLASS(node)->mbuf) {
06558       BBuf* bbuf = NCCLASS(node)->mbuf;
06559       for (i = 0; i < (int )bbuf->used; i++) {
06560         if (i > 0) fprintf(f, ",");
06561         fprintf(f, "%0x", bbuf->p[i]);
06562       }
06563     }
06564     break;
06565 
06566   case NT_CTYPE:
06567     fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
06568     switch (NCTYPE(node)->ctype) {
06569     case ONIGENC_CTYPE_WORD:
06570       if (NCTYPE(node)->not != 0)
06571         fputs("not word",       f);
06572       else
06573         fputs("word",           f);
06574       break;
06575 
06576     default:
06577       fprintf(f, "ERROR: undefined ctype.\n");
06578       exit(0);
06579     }
06580     break;
06581 
06582   case NT_CANY:
06583     fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
06584     break;
06585 
06586   case NT_ANCHOR:
06587     fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
06588     switch (NANCHOR(node)->type) {
06589     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
06590     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
06591     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
06592     case ANCHOR_END_LINE:       fputs("end line",       f); break;
06593     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
06594     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06595     case ANCHOR_ANYCHAR_STAR:   fputs("begin position/line", f); break;
06596 
06597     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
06598     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
06599 #ifdef USE_WORD_BEGIN_END
06600     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
06601     case ANCHOR_WORD_END:        fputs("word end", f);       break;
06602 #endif
06603     case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
06604     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
06605     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
06606     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
06607     case ANCHOR_KEEP:            fputs("keep",f);            break;
06608 
06609     default:
06610       fprintf(f, "ERROR: undefined anchor type.\n");
06611       break;
06612     }
06613     break;
06614 
06615   case NT_BREF:
06616     {
06617       int* p;
06618       BRefNode* br = NBREF(node);
06619       p = BACKREFS_P(br);
06620       fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
06621       for (i = 0; i < br->back_num; i++) {
06622         if (i > 0) fputs(", ", f);
06623         fprintf(f, "%d", p[i]);
06624       }
06625     }
06626     break;
06627 
06628 #ifdef USE_SUBEXP_CALL
06629   case NT_CALL:
06630     {
06631       CallNode* cn = NCALL(node);
06632       fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
06633       p_string(f, cn->name_end - cn->name, cn->name);
06634     }
06635     break;
06636 #endif
06637 
06638   case NT_QTFR:
06639     fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
06640             NQTFR(node)->lower, NQTFR(node)->upper,
06641             (NQTFR(node)->greedy ? "" : "?"));
06642     print_indent_tree(f, NQTFR(node)->target, indent + add);
06643     break;
06644 
06645   case NT_ENCLOSE:
06646     fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
06647     switch (NENCLOSE(node)->type) {
06648     case ENCLOSE_OPTION:
06649       fprintf(f, "option:%d\n", NENCLOSE(node)->option);
06650       break;
06651     case ENCLOSE_MEMORY:
06652       fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06653       break;
06654     case ENCLOSE_STOP_BACKTRACK:
06655       fprintf(f, "stop-bt");
06656       break;
06657     case ENCLOSE_CONDITION:
06658       fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
06659       break;
06660 
06661     default:
06662       break;
06663     }
06664     fprintf(f, "\n");
06665     print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06666     break;
06667 
06668   default:
06669     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06670     break;
06671   }
06672 
06673   if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06674       type != NT_ENCLOSE)
06675     fprintf(f, "\n");
06676 
06677   if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
06678 
06679   fflush(f);
06680 }
06681 #endif /* ONIG_DEBUG */
06682 
06683 #ifdef ONIG_DEBUG_PARSE_TREE
06684 static void
06685 print_tree(FILE* f, Node* node)
06686 {
06687   print_indent_tree(f, node, 0);
06688 }
06689 #endif
06690