Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 /********************************************************************** 00002 regexec.c - Onigmo (Oniguruma-mod) (regular expression library) 00003 **********************************************************************/ 00004 /*- 00005 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 00006 * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp> 00007 * All rights reserved. 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 1. Redistributions of source code must retain the above copyright 00013 * notice, this list of conditions and the following disclaimer. 00014 * 2. Redistributions in binary form must reproduce the above copyright 00015 * notice, this list of conditions and the following disclaimer in the 00016 * documentation and/or other materials provided with the distribution. 00017 * 00018 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 00019 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00021 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 00022 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00023 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00024 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00025 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00026 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00027 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00028 * SUCH DAMAGE. 00029 */ 00030 00031 #include "regint.h" 00032 00033 /* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ 00034 00035 #ifdef USE_CRNL_AS_LINE_TERMINATOR 00036 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ 00037 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ 00038 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10) 00039 #define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ 00040 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev)) 00041 static int 00042 is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start, 00043 const UChar *end, OnigOptionType option, int check_prev) 00044 { 00045 if (IS_NEWLINE_CRLF(option)) { 00046 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) { 00047 if (check_prev) { 00048 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end); 00049 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d) 00050 return 0; 00051 else 00052 return 1; 00053 } 00054 else 00055 return 1; 00056 } 00057 else { 00058 const UChar *pnext = p + enclen(enc, p, end); 00059 if (pnext < end && 00060 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d && 00061 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a) 00062 return 1; 00063 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end)) 00064 return 1; 00065 return 0; 00066 } 00067 } 00068 else { 00069 return ONIGENC_IS_MBC_NEWLINE(enc, p, end); 00070 } 00071 } 00072 #else /* USE_CRNL_AS_LINE_TERMINATOR */ 00073 #define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ 00074 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end)) 00075 #endif /* USE_CRNL_AS_LINE_TERMINATOR */ 00076 00077 #ifdef USE_CAPTURE_HISTORY 00078 static void history_tree_free(OnigCaptureTreeNode* node); 00079 00080 static void 00081 history_tree_clear(OnigCaptureTreeNode* node) 00082 { 00083 int i; 00084 00085 if (IS_NOT_NULL(node)) { 00086 for (i = 0; i < node->num_childs; i++) { 00087 if (IS_NOT_NULL(node->childs[i])) { 00088 history_tree_free(node->childs[i]); 00089 } 00090 } 00091 for (i = 0; i < node->allocated; i++) { 00092 node->childs[i] = (OnigCaptureTreeNode* )0; 00093 } 00094 node->num_childs = 0; 00095 node->beg = ONIG_REGION_NOTPOS; 00096 node->end = ONIG_REGION_NOTPOS; 00097 node->group = -1; 00098 xfree(node->childs); 00099 node->childs = (OnigCaptureTreeNode** )0; 00100 } 00101 } 00102 00103 static void 00104 history_tree_free(OnigCaptureTreeNode* node) 00105 { 00106 history_tree_clear(node); 00107 xfree(node); 00108 } 00109 00110 static void 00111 history_root_free(OnigRegion* r) 00112 { 00113 if (IS_NOT_NULL(r->history_root)) { 00114 history_tree_free(r->history_root); 00115 r->history_root = (OnigCaptureTreeNode* )0; 00116 } 00117 } 00118 00119 static OnigCaptureTreeNode* 00120 history_node_new(void) 00121 { 00122 OnigCaptureTreeNode* node; 00123 00124 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); 00125 CHECK_NULL_RETURN(node); 00126 node->childs = (OnigCaptureTreeNode** )0; 00127 node->allocated = 0; 00128 node->num_childs = 0; 00129 node->group = -1; 00130 node->beg = ONIG_REGION_NOTPOS; 00131 node->end = ONIG_REGION_NOTPOS; 00132 00133 return node; 00134 } 00135 00136 static int 00137 history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) 00138 { 00139 #define HISTORY_TREE_INIT_ALLOC_SIZE 8 00140 00141 if (parent->num_childs >= parent->allocated) { 00142 int n, i; 00143 00144 if (IS_NULL(parent->childs)) { 00145 n = HISTORY_TREE_INIT_ALLOC_SIZE; 00146 parent->childs = 00147 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); 00148 CHECK_NULL_RETURN_MEMERR(parent->childs); 00149 } 00150 else { 00151 OnigCaptureTreeNode** tmp; 00152 n = parent->allocated * 2; 00153 tmp = 00154 (OnigCaptureTreeNode** )xrealloc(parent->childs, 00155 sizeof(OnigCaptureTreeNode*) * n); 00156 if (tmp == 0) { 00157 history_tree_clear(parent); 00158 return ONIGERR_MEMORY; 00159 } 00160 parent->childs = tmp; 00161 } 00162 for (i = parent->allocated; i < n; i++) { 00163 parent->childs[i] = (OnigCaptureTreeNode* )0; 00164 } 00165 parent->allocated = n; 00166 } 00167 00168 parent->childs[parent->num_childs] = child; 00169 parent->num_childs++; 00170 return 0; 00171 } 00172 00173 static OnigCaptureTreeNode* 00174 history_tree_clone(OnigCaptureTreeNode* node) 00175 { 00176 int i, r; 00177 OnigCaptureTreeNode *clone, *child; 00178 00179 clone = history_node_new(); 00180 CHECK_NULL_RETURN(clone); 00181 00182 clone->beg = node->beg; 00183 clone->end = node->end; 00184 for (i = 0; i < node->num_childs; i++) { 00185 child = history_tree_clone(node->childs[i]); 00186 if (IS_NULL(child)) { 00187 history_tree_free(clone); 00188 return (OnigCaptureTreeNode* )0; 00189 } 00190 r = history_tree_add_child(clone, child); 00191 if (r != 0) { 00192 history_tree_free(child); 00193 history_tree_free(clone); 00194 return (OnigCaptureTreeNode* )0; 00195 } 00196 } 00197 00198 return clone; 00199 } 00200 00201 extern OnigCaptureTreeNode* 00202 onig_get_capture_tree(OnigRegion* region) 00203 { 00204 return region->history_root; 00205 } 00206 #endif /* USE_CAPTURE_HISTORY */ 00207 00208 extern void 00209 onig_region_clear(OnigRegion* region) 00210 { 00211 int i; 00212 00213 for (i = 0; i < region->num_regs; i++) { 00214 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; 00215 } 00216 #ifdef USE_CAPTURE_HISTORY 00217 history_root_free(region); 00218 #endif 00219 } 00220 00221 extern int 00222 onig_region_resize(OnigRegion* region, int n) 00223 { 00224 region->num_regs = n; 00225 00226 if (n < ONIG_NREGION) 00227 n = ONIG_NREGION; 00228 00229 if (region->allocated == 0) { 00230 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); 00231 if (region->beg == 0) 00232 return ONIGERR_MEMORY; 00233 00234 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); 00235 if (region->end == 0) { 00236 xfree(region->beg); 00237 return ONIGERR_MEMORY; 00238 } 00239 00240 region->allocated = n; 00241 } 00242 else if (region->allocated < n) { 00243 OnigPosition *tmp; 00244 00245 region->allocated = 0; 00246 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition)); 00247 if (tmp == 0) { 00248 xfree(region->beg); 00249 xfree(region->end); 00250 return ONIGERR_MEMORY; 00251 } 00252 region->beg = tmp; 00253 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition)); 00254 if (tmp == 0) { 00255 xfree(region->beg); 00256 xfree(region->end); 00257 return ONIGERR_MEMORY; 00258 } 00259 region->end = tmp; 00260 00261 region->allocated = n; 00262 } 00263 00264 return 0; 00265 } 00266 00267 static int 00268 onig_region_resize_clear(OnigRegion* region, int n) 00269 { 00270 int r; 00271 00272 r = onig_region_resize(region, n); 00273 if (r != 0) return r; 00274 onig_region_clear(region); 00275 return 0; 00276 } 00277 00278 extern int 00279 onig_region_set(OnigRegion* region, int at, int beg, int end) 00280 { 00281 if (at < 0) return ONIGERR_INVALID_ARGUMENT; 00282 00283 if (at >= region->allocated) { 00284 int r = onig_region_resize(region, at + 1); 00285 if (r < 0) return r; 00286 } 00287 00288 region->beg[at] = beg; 00289 region->end[at] = end; 00290 return 0; 00291 } 00292 00293 extern void 00294 onig_region_init(OnigRegion* region) 00295 { 00296 region->num_regs = 0; 00297 region->allocated = 0; 00298 region->beg = (OnigPosition* )0; 00299 region->end = (OnigPosition* )0; 00300 region->history_root = (OnigCaptureTreeNode* )0; 00301 } 00302 00303 extern OnigRegion* 00304 onig_region_new(void) 00305 { 00306 OnigRegion* r; 00307 00308 r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); 00309 if (r) 00310 onig_region_init(r); 00311 return r; 00312 } 00313 00314 extern void 00315 onig_region_free(OnigRegion* r, int free_self) 00316 { 00317 if (r) { 00318 if (r->allocated > 0) { 00319 if (r->beg) xfree(r->beg); 00320 if (r->end) xfree(r->end); 00321 r->allocated = 0; 00322 } 00323 #ifdef USE_CAPTURE_HISTORY 00324 history_root_free(r); 00325 #endif 00326 if (free_self) xfree(r); 00327 } 00328 } 00329 00330 extern void 00331 onig_region_copy(OnigRegion* to, OnigRegion* from) 00332 { 00333 #define RREGC_SIZE (sizeof(int) * from->num_regs) 00334 int i, r; 00335 00336 if (to == from) return; 00337 00338 r = onig_region_resize(to, from->num_regs); 00339 if (r) return; 00340 00341 for (i = 0; i < from->num_regs; i++) { 00342 to->beg[i] = from->beg[i]; 00343 to->end[i] = from->end[i]; 00344 } 00345 to->num_regs = from->num_regs; 00346 00347 #ifdef USE_CAPTURE_HISTORY 00348 history_root_free(to); 00349 00350 if (IS_NOT_NULL(from->history_root)) { 00351 to->history_root = history_tree_clone(from->history_root); 00352 } 00353 #endif 00354 } 00355 00356 00358 #define INVALID_STACK_INDEX -1 00359 00360 /* stack type */ 00361 /* used by normal-POP */ 00362 #define STK_ALT 0x0001 00363 #define STK_LOOK_BEHIND_NOT 0x0002 00364 #define STK_POS_NOT 0x0003 00365 /* handled by normal-POP */ 00366 #define STK_MEM_START 0x0100 00367 #define STK_MEM_END 0x8200 00368 #define STK_REPEAT_INC 0x0300 00369 #define STK_STATE_CHECK_MARK 0x1000 00370 /* avoided by normal-POP */ 00371 #define STK_NULL_CHECK_START 0x3000 00372 #define STK_NULL_CHECK_END 0x5000 /* for recursive call */ 00373 #define STK_MEM_END_MARK 0x8400 00374 #define STK_POS 0x0500 /* used when POP-POS */ 00375 #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ 00376 #define STK_REPEAT 0x0700 00377 #define STK_CALL_FRAME 0x0800 00378 #define STK_RETURN 0x0900 00379 #define STK_VOID 0x0a00 /* for fill a blank */ 00380 00381 /* stack type check mask */ 00382 #define STK_MASK_POP_USED 0x00ff 00383 #define STK_MASK_TO_VOID_TARGET 0x10ff 00384 #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ 00385 00386 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 00387 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ 00388 (msa).stack_p = (void* )0;\ 00389 (msa).options = (arg_option);\ 00390 (msa).region = (arg_region);\ 00391 (msa).start = (arg_start);\ 00392 (msa).gpos = (arg_gpos);\ 00393 (msa).best_len = ONIG_MISMATCH;\ 00394 } while(0) 00395 #else 00396 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ 00397 (msa).stack_p = (void* )0;\ 00398 (msa).options = (arg_option);\ 00399 (msa).region = (arg_region);\ 00400 (msa).start = (arg_start);\ 00401 (msa).gpos = (arg_gpos);\ 00402 } while(0) 00403 #endif 00404 00405 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00406 00407 #define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 00408 00409 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ 00410 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ 00411 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ 00412 offset = ((offset) * (state_num)) >> 3;\ 00413 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ 00414 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\ 00415 (msa).state_check_buff = (void* )xmalloc(size);\ 00416 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\ 00417 }\ 00418 else \ 00419 (msa).state_check_buff = (void* )xalloca(size);\ 00420 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ 00421 (size_t )(size - (offset))); \ 00422 (msa).state_check_buff_size = size;\ 00423 }\ 00424 else {\ 00425 (msa).state_check_buff = (void* )0;\ 00426 (msa).state_check_buff_size = 0;\ 00427 }\ 00428 }\ 00429 else {\ 00430 (msa).state_check_buff = (void* )0;\ 00431 (msa).state_check_buff_size = 0;\ 00432 }\ 00433 } while(0) 00434 00435 #define MATCH_ARG_FREE(msa) do {\ 00436 if ((msa).stack_p) xfree((msa).stack_p);\ 00437 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ 00438 if ((msa).state_check_buff) xfree((msa).state_check_buff);\ 00439 }\ 00440 } while(0) 00441 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 00442 #define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) 00443 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 00444 00445 00446 00447 #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ 00448 if (msa->stack_p) {\ 00449 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\ 00450 stk_alloc = (OnigStackType* )(msa->stack_p);\ 00451 stk_base = stk_alloc;\ 00452 stk = stk_base;\ 00453 stk_end = stk_base + msa->stack_n;\ 00454 }\ 00455 else {\ 00456 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\ 00457 + sizeof(OnigStackType) * (stack_num));\ 00458 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\ 00459 stk_base = stk_alloc;\ 00460 stk = stk_base;\ 00461 stk_end = stk_base + (stack_num);\ 00462 }\ 00463 } while(0) 00464 00465 #define STACK_SAVE do{\ 00466 if (stk_base != stk_alloc) {\ 00467 msa->stack_p = stk_base;\ 00468 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\ 00469 };\ 00470 } while(0) 00471 00472 static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; 00473 00474 extern unsigned int 00475 onig_get_match_stack_limit_size(void) 00476 { 00477 return MatchStackLimitSize; 00478 } 00479 00480 extern int 00481 onig_set_match_stack_limit_size(unsigned int size) 00482 { 00483 MatchStackLimitSize = size; 00484 return 0; 00485 } 00486 00487 static int 00488 stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, 00489 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) 00490 { 00491 size_t n; 00492 OnigStackType *x, *stk_base, *stk_end, *stk; 00493 00494 stk_base = *arg_stk_base; 00495 stk_end = *arg_stk_end; 00496 stk = *arg_stk; 00497 00498 n = stk_end - stk_base; 00499 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { 00500 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); 00501 if (IS_NULL(x)) { 00502 STACK_SAVE; 00503 return ONIGERR_MEMORY; 00504 } 00505 xmemcpy(x, stk_base, n * sizeof(OnigStackType)); 00506 n *= 2; 00507 } 00508 else { 00509 unsigned int limit_size = MatchStackLimitSize; 00510 n *= 2; 00511 if (limit_size != 0 && n > limit_size) { 00512 if ((unsigned int )(stk_end - stk_base) == limit_size) 00513 return ONIGERR_MATCH_STACK_LIMIT_OVER; 00514 else 00515 n = limit_size; 00516 } 00517 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); 00518 if (IS_NULL(x)) { 00519 STACK_SAVE; 00520 return ONIGERR_MEMORY; 00521 } 00522 } 00523 *arg_stk = x + (stk - stk_base); 00524 *arg_stk_base = x; 00525 *arg_stk_end = x + n; 00526 return 0; 00527 } 00528 00529 #define STACK_ENSURE(n) do {\ 00530 if (stk_end - stk < (n)) {\ 00531 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ 00532 if (r != 0) { STACK_SAVE; return r; } \ 00533 }\ 00534 } while(0) 00535 00536 #define STACK_AT(index) (stk_base + (index)) 00537 #define GET_STACK_INDEX(stk) ((stk) - stk_base) 00538 00539 #define STACK_PUSH_TYPE(stack_type) do {\ 00540 STACK_ENSURE(1);\ 00541 stk->type = (stack_type);\ 00542 STACK_INC;\ 00543 } while(0) 00544 00545 #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) 00546 00547 #ifdef USE_COMBINATION_EXPLOSION_CHECK 00548 #define STATE_CHECK_POS(s,snum) \ 00549 (((s) - str) * num_comb_exp_check + ((snum) - 1)) 00550 #define STATE_CHECK_VAL(v,snum) do {\ 00551 if (state_check_buff != NULL) {\ 00552 int x = STATE_CHECK_POS(s,snum);\ 00553 (v) = state_check_buff[x/8] & (1<<(x%8));\ 00554 }\ 00555 else (v) = 0;\ 00556 } while(0) 00557 00558 00559 #define ELSE_IF_STATE_CHECK_MARK(stk) \ 00560 else if ((stk)->type == STK_STATE_CHECK_MARK) { \ 00561 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ 00562 state_check_buff[x/8] |= (1<<(x%8)); \ 00563 } 00564 00565 #define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ 00566 STACK_ENSURE(1);\ 00567 stk->type = (stack_type);\ 00568 stk->u.state.pcode = (pat);\ 00569 stk->u.state.pstr = (s);\ 00570 stk->u.state.pstr_prev = (sprev);\ 00571 stk->u.state.state_check = 0;\ 00572 stk->u.state.pkeep = (keep);\ 00573 STACK_INC;\ 00574 } while(0) 00575 00576 #define STACK_PUSH_ENSURED(stack_type,pat) do {\ 00577 stk->type = (stack_type);\ 00578 stk->u.state.pcode = (pat);\ 00579 stk->u.state.state_check = 0;\ 00580 STACK_INC;\ 00581 } while(0) 00582 00583 #define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\ 00584 STACK_ENSURE(1);\ 00585 stk->type = STK_ALT;\ 00586 stk->u.state.pcode = (pat);\ 00587 stk->u.state.pstr = (s);\ 00588 stk->u.state.pstr_prev = (sprev);\ 00589 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ 00590 stk->u.state.pkeep = (keep);\ 00591 STACK_INC;\ 00592 } while(0) 00593 00594 #define STACK_PUSH_STATE_CHECK(s,snum) do {\ 00595 if (state_check_buff != NULL) {\ 00596 STACK_ENSURE(1);\ 00597 stk->type = STK_STATE_CHECK_MARK;\ 00598 stk->u.state.pstr = (s);\ 00599 stk->u.state.state_check = (snum);\ 00600 STACK_INC;\ 00601 }\ 00602 } while(0) 00603 00604 #else /* USE_COMBINATION_EXPLOSION_CHECK */ 00605 00606 #define ELSE_IF_STATE_CHECK_MARK(stk) 00607 00608 #define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ 00609 STACK_ENSURE(1);\ 00610 stk->type = (stack_type);\ 00611 stk->u.state.pcode = (pat);\ 00612 stk->u.state.pstr = (s);\ 00613 stk->u.state.pstr_prev = (sprev);\ 00614 stk->u.state.pkeep = (keep);\ 00615 STACK_INC;\ 00616 } while(0) 00617 00618 #define STACK_PUSH_ENSURED(stack_type,pat) do {\ 00619 stk->type = (stack_type);\ 00620 stk->u.state.pcode = (pat);\ 00621 STACK_INC;\ 00622 } while(0) 00623 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 00624 00625 #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep) 00626 #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep) 00627 #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep) 00628 #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) 00629 #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \ 00630 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep) 00631 00632 #define STACK_PUSH_REPEAT(id, pat) do {\ 00633 STACK_ENSURE(1);\ 00634 stk->type = STK_REPEAT;\ 00635 stk->u.repeat.num = (id);\ 00636 stk->u.repeat.pcode = (pat);\ 00637 stk->u.repeat.count = 0;\ 00638 STACK_INC;\ 00639 } while(0) 00640 00641 #define STACK_PUSH_REPEAT_INC(sindex) do {\ 00642 STACK_ENSURE(1);\ 00643 stk->type = STK_REPEAT_INC;\ 00644 stk->u.repeat_inc.si = (sindex);\ 00645 STACK_INC;\ 00646 } while(0) 00647 00648 #define STACK_PUSH_MEM_START(mnum, s) do {\ 00649 STACK_ENSURE(1);\ 00650 stk->type = STK_MEM_START;\ 00651 stk->u.mem.num = (mnum);\ 00652 stk->u.mem.pstr = (s);\ 00653 stk->u.mem.start = mem_start_stk[mnum];\ 00654 stk->u.mem.end = mem_end_stk[mnum];\ 00655 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ 00656 mem_end_stk[mnum] = INVALID_STACK_INDEX;\ 00657 STACK_INC;\ 00658 } while(0) 00659 00660 #define STACK_PUSH_MEM_END(mnum, s) do {\ 00661 STACK_ENSURE(1);\ 00662 stk->type = STK_MEM_END;\ 00663 stk->u.mem.num = (mnum);\ 00664 stk->u.mem.pstr = (s);\ 00665 stk->u.mem.start = mem_start_stk[mnum];\ 00666 stk->u.mem.end = mem_end_stk[mnum];\ 00667 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ 00668 STACK_INC;\ 00669 } while(0) 00670 00671 #define STACK_PUSH_MEM_END_MARK(mnum) do {\ 00672 STACK_ENSURE(1);\ 00673 stk->type = STK_MEM_END_MARK;\ 00674 stk->u.mem.num = (mnum);\ 00675 STACK_INC;\ 00676 } while(0) 00677 00678 #define STACK_GET_MEM_START(mnum, k) do {\ 00679 int level = 0;\ 00680 k = stk;\ 00681 while (k > stk_base) {\ 00682 k--;\ 00683 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ 00684 && k->u.mem.num == (mnum)) {\ 00685 level++;\ 00686 }\ 00687 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ 00688 if (level == 0) break;\ 00689 level--;\ 00690 }\ 00691 }\ 00692 } while(0) 00693 00694 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ 00695 int level = 0;\ 00696 while (k < stk) {\ 00697 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ 00698 if (level == 0) (start) = k->u.mem.pstr;\ 00699 level++;\ 00700 }\ 00701 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ 00702 level--;\ 00703 if (level == 0) {\ 00704 (end) = k->u.mem.pstr;\ 00705 break;\ 00706 }\ 00707 }\ 00708 k++;\ 00709 }\ 00710 } while(0) 00711 00712 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ 00713 STACK_ENSURE(1);\ 00714 stk->type = STK_NULL_CHECK_START;\ 00715 stk->u.null_check.num = (cnum);\ 00716 stk->u.null_check.pstr = (s);\ 00717 STACK_INC;\ 00718 } while(0) 00719 00720 #define STACK_PUSH_NULL_CHECK_END(cnum) do {\ 00721 STACK_ENSURE(1);\ 00722 stk->type = STK_NULL_CHECK_END;\ 00723 stk->u.null_check.num = (cnum);\ 00724 STACK_INC;\ 00725 } while(0) 00726 00727 #define STACK_PUSH_CALL_FRAME(pat) do {\ 00728 STACK_ENSURE(1);\ 00729 stk->type = STK_CALL_FRAME;\ 00730 stk->u.call_frame.ret_addr = (pat);\ 00731 STACK_INC;\ 00732 } while(0) 00733 00734 #define STACK_PUSH_RETURN do {\ 00735 STACK_ENSURE(1);\ 00736 stk->type = STK_RETURN;\ 00737 STACK_INC;\ 00738 } while(0) 00739 00740 00741 #ifdef ONIG_DEBUG 00742 #define STACK_BASE_CHECK(p, at) \ 00743 if ((p) < stk_base) {\ 00744 fprintf(stderr, "at %s\n", at);\ 00745 goto stack_error;\ 00746 } 00747 #else 00748 #define STACK_BASE_CHECK(p, at) 00749 #endif 00750 00751 #define STACK_POP_ONE do {\ 00752 stk--;\ 00753 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ 00754 } while(0) 00755 00756 #define STACK_POP do {\ 00757 switch (pop_level) {\ 00758 case STACK_POP_LEVEL_FREE:\ 00759 while (1) {\ 00760 stk--;\ 00761 STACK_BASE_CHECK(stk, "STACK_POP"); \ 00762 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 00763 ELSE_IF_STATE_CHECK_MARK(stk);\ 00764 }\ 00765 break;\ 00766 case STACK_POP_LEVEL_MEM_START:\ 00767 while (1) {\ 00768 stk--;\ 00769 STACK_BASE_CHECK(stk, "STACK_POP 2"); \ 00770 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 00771 else if (stk->type == STK_MEM_START) {\ 00772 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00773 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00774 }\ 00775 ELSE_IF_STATE_CHECK_MARK(stk);\ 00776 }\ 00777 break;\ 00778 default:\ 00779 while (1) {\ 00780 stk--;\ 00781 STACK_BASE_CHECK(stk, "STACK_POP 3"); \ 00782 if ((stk->type & STK_MASK_POP_USED) != 0) break;\ 00783 else if (stk->type == STK_MEM_START) {\ 00784 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00785 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00786 }\ 00787 else if (stk->type == STK_REPEAT_INC) {\ 00788 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 00789 }\ 00790 else if (stk->type == STK_MEM_END) {\ 00791 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00792 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00793 }\ 00794 ELSE_IF_STATE_CHECK_MARK(stk);\ 00795 }\ 00796 break;\ 00797 }\ 00798 } while(0) 00799 00800 #define STACK_POP_TIL_POS_NOT do {\ 00801 while (1) {\ 00802 stk--;\ 00803 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ 00804 if (stk->type == STK_POS_NOT) break;\ 00805 else if (stk->type == STK_MEM_START) {\ 00806 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00807 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00808 }\ 00809 else if (stk->type == STK_REPEAT_INC) {\ 00810 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 00811 }\ 00812 else if (stk->type == STK_MEM_END) {\ 00813 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00814 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00815 }\ 00816 ELSE_IF_STATE_CHECK_MARK(stk);\ 00817 }\ 00818 } while(0) 00819 00820 #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ 00821 while (1) {\ 00822 stk--;\ 00823 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ 00824 if (stk->type == STK_LOOK_BEHIND_NOT) break;\ 00825 else if (stk->type == STK_MEM_START) {\ 00826 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00827 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00828 }\ 00829 else if (stk->type == STK_REPEAT_INC) {\ 00830 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ 00831 }\ 00832 else if (stk->type == STK_MEM_END) {\ 00833 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ 00834 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ 00835 }\ 00836 ELSE_IF_STATE_CHECK_MARK(stk);\ 00837 }\ 00838 } while(0) 00839 00840 #define STACK_POS_END(k) do {\ 00841 k = stk;\ 00842 while (1) {\ 00843 k--;\ 00844 STACK_BASE_CHECK(k, "STACK_POS_END"); \ 00845 if (IS_TO_VOID_TARGET(k)) {\ 00846 k->type = STK_VOID;\ 00847 }\ 00848 else if (k->type == STK_POS) {\ 00849 k->type = STK_VOID;\ 00850 break;\ 00851 }\ 00852 }\ 00853 } while(0) 00854 00855 #define STACK_STOP_BT_END do {\ 00856 OnigStackType *k = stk;\ 00857 while (1) {\ 00858 k--;\ 00859 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ 00860 if (IS_TO_VOID_TARGET(k)) {\ 00861 k->type = STK_VOID;\ 00862 }\ 00863 else if (k->type == STK_STOP_BT) {\ 00864 k->type = STK_VOID;\ 00865 break;\ 00866 }\ 00867 }\ 00868 } while(0) 00869 00870 #define STACK_NULL_CHECK(isnull,id,s) do {\ 00871 OnigStackType* k = stk;\ 00872 while (1) {\ 00873 k--;\ 00874 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ 00875 if (k->type == STK_NULL_CHECK_START) {\ 00876 if (k->u.null_check.num == (id)) {\ 00877 (isnull) = (k->u.null_check.pstr == (s));\ 00878 break;\ 00879 }\ 00880 }\ 00881 }\ 00882 } while(0) 00883 00884 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\ 00885 int level = 0;\ 00886 OnigStackType* k = stk;\ 00887 while (1) {\ 00888 k--;\ 00889 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ 00890 if (k->type == STK_NULL_CHECK_START) {\ 00891 if (k->u.null_check.num == (id)) {\ 00892 if (level == 0) {\ 00893 (isnull) = (k->u.null_check.pstr == (s));\ 00894 break;\ 00895 }\ 00896 else level--;\ 00897 }\ 00898 }\ 00899 else if (k->type == STK_NULL_CHECK_END) {\ 00900 level++;\ 00901 }\ 00902 }\ 00903 } while(0) 00904 00905 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ 00906 OnigStackType* k = stk;\ 00907 while (1) {\ 00908 k--;\ 00909 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ 00910 if (k->type == STK_NULL_CHECK_START) {\ 00911 if (k->u.null_check.num == (id)) {\ 00912 if (k->u.null_check.pstr != (s)) {\ 00913 (isnull) = 0;\ 00914 break;\ 00915 }\ 00916 else {\ 00917 UChar* endp;\ 00918 (isnull) = 1;\ 00919 while (k < stk) {\ 00920 if (k->type == STK_MEM_START) {\ 00921 if (k->u.mem.end == INVALID_STACK_INDEX) {\ 00922 (isnull) = 0; break;\ 00923 }\ 00924 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ 00925 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ 00926 else\ 00927 endp = (UChar* )k->u.mem.end;\ 00928 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ 00929 (isnull) = 0; break;\ 00930 }\ 00931 else if (endp != s) {\ 00932 (isnull) = -1; /* empty, but position changed */ \ 00933 }\ 00934 }\ 00935 k++;\ 00936 }\ 00937 break;\ 00938 }\ 00939 }\ 00940 }\ 00941 }\ 00942 } while(0) 00943 00944 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ 00945 int level = 0;\ 00946 OnigStackType* k = stk;\ 00947 while (1) {\ 00948 k--;\ 00949 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ 00950 if (k->type == STK_NULL_CHECK_START) {\ 00951 if (k->u.null_check.num == (id)) {\ 00952 if (level == 0) {\ 00953 if (k->u.null_check.pstr != (s)) {\ 00954 (isnull) = 0;\ 00955 break;\ 00956 }\ 00957 else {\ 00958 UChar* endp;\ 00959 (isnull) = 1;\ 00960 while (k < stk) {\ 00961 if (k->type == STK_MEM_START) {\ 00962 if (k->u.mem.end == INVALID_STACK_INDEX) {\ 00963 (isnull) = 0; break;\ 00964 }\ 00965 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ 00966 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ 00967 else\ 00968 endp = (UChar* )k->u.mem.end;\ 00969 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ 00970 (isnull) = 0; break;\ 00971 }\ 00972 else if (endp != s) {\ 00973 (isnull) = -1; /* empty, but position changed */ \ 00974 }\ 00975 }\ 00976 k++;\ 00977 }\ 00978 break;\ 00979 }\ 00980 }\ 00981 else {\ 00982 level--;\ 00983 }\ 00984 }\ 00985 }\ 00986 else if (k->type == STK_NULL_CHECK_END) {\ 00987 if (k->u.null_check.num == (id)) level++;\ 00988 }\ 00989 }\ 00990 } while(0) 00991 00992 #define STACK_GET_REPEAT(id, k) do {\ 00993 int level = 0;\ 00994 k = stk;\ 00995 while (1) {\ 00996 k--;\ 00997 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ 00998 if (k->type == STK_REPEAT) {\ 00999 if (level == 0) {\ 01000 if (k->u.repeat.num == (id)) {\ 01001 break;\ 01002 }\ 01003 }\ 01004 }\ 01005 else if (k->type == STK_CALL_FRAME) level--;\ 01006 else if (k->type == STK_RETURN) level++;\ 01007 }\ 01008 } while(0) 01009 01010 #define STACK_RETURN(addr) do {\ 01011 int level = 0;\ 01012 OnigStackType* k = stk;\ 01013 while (1) {\ 01014 k--;\ 01015 STACK_BASE_CHECK(k, "STACK_RETURN"); \ 01016 if (k->type == STK_CALL_FRAME) {\ 01017 if (level == 0) {\ 01018 (addr) = k->u.call_frame.ret_addr;\ 01019 break;\ 01020 }\ 01021 else level--;\ 01022 }\ 01023 else if (k->type == STK_RETURN)\ 01024 level++;\ 01025 }\ 01026 } while(0) 01027 01028 01029 #define STRING_CMP(s1,s2,len) do {\ 01030 while (len-- > 0) {\ 01031 if (*s1++ != *s2++) goto fail;\ 01032 }\ 01033 } while(0) 01034 01035 #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\ 01036 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ 01037 goto fail; \ 01038 } while(0) 01039 01040 static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, 01041 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end) 01042 { 01043 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 01044 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 01045 UChar *p1, *p2, *end1, *s2; 01046 int len1, len2; 01047 01048 s2 = *ps2; 01049 end1 = s1 + mblen; 01050 while (s1 < end1) { 01051 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1); 01052 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2); 01053 if (len1 != len2) return 0; 01054 p1 = buf1; 01055 p2 = buf2; 01056 while (len1-- > 0) { 01057 if (*p1 != *p2) return 0; 01058 p1++; 01059 p2++; 01060 } 01061 } 01062 01063 *ps2 = s2; 01064 return 1; 01065 } 01066 01067 #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ 01068 is_fail = 0;\ 01069 while (len-- > 0) {\ 01070 if (*s1++ != *s2++) {\ 01071 is_fail = 1; break;\ 01072 }\ 01073 }\ 01074 } while(0) 01075 01076 #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\ 01077 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ 01078 is_fail = 1; \ 01079 else \ 01080 is_fail = 0; \ 01081 } while(0) 01082 01083 01084 #define IS_EMPTY_STR (str == end) 01085 #define ON_STR_BEGIN(s) ((s) == str) 01086 #define ON_STR_END(s) ((s) == end) 01087 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 01088 #define DATA_ENSURE_CHECK1 (s < right_range) 01089 #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) 01090 #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail 01091 #else 01092 #define DATA_ENSURE_CHECK1 (s < end) 01093 #define DATA_ENSURE_CHECK(n) (s + (n) <= end) 01094 #define DATA_ENSURE(n) if (s + (n) > end) goto fail 01095 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ 01096 01097 01098 #ifdef USE_CAPTURE_HISTORY 01099 static int 01100 make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, 01101 OnigStackType* stk_top, UChar* str, regex_t* reg) 01102 { 01103 int n, r; 01104 OnigCaptureTreeNode* child; 01105 OnigStackType* k = *kp; 01106 01107 while (k < stk_top) { 01108 if (k->type == STK_MEM_START) { 01109 n = k->u.mem.num; 01110 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && 01111 BIT_STATUS_AT(reg->capture_history, n) != 0) { 01112 child = history_node_new(); 01113 CHECK_NULL_RETURN_MEMERR(child); 01114 child->group = n; 01115 child->beg = k->u.mem.pstr - str; 01116 r = history_tree_add_child(node, child); 01117 if (r != 0) { 01118 history_tree_free(child); 01119 return r; 01120 } 01121 *kp = (k + 1); 01122 r = make_capture_history_tree(child, kp, stk_top, str, reg); 01123 if (r != 0) return r; 01124 01125 k = *kp; 01126 child->end = k->u.mem.pstr - str; 01127 } 01128 } 01129 else if (k->type == STK_MEM_END) { 01130 if (k->u.mem.num == node->group) { 01131 node->end = k->u.mem.pstr - str; 01132 *kp = k; 01133 return 0; 01134 } 01135 } 01136 k++; 01137 } 01138 01139 return 1; /* 1: root node ending. */ 01140 } 01141 #endif /* USE_CAPTURE_HISTORY */ 01142 01143 #ifdef USE_BACKREF_WITH_LEVEL 01144 static int mem_is_in_memp(int mem, int num, UChar* memp) 01145 { 01146 int i; 01147 MemNumType m; 01148 01149 for (i = 0; i < num; i++) { 01150 GET_MEMNUM_INC(m, memp); 01151 if (mem == (int )m) return 1; 01152 } 01153 return 0; 01154 } 01155 01156 static int backref_match_at_nested_level(regex_t* reg 01157 , OnigStackType* top, OnigStackType* stk_base 01158 , int ignore_case, int case_fold_flag 01159 , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) 01160 { 01161 UChar *ss, *p, *pstart, *pend = NULL_UCHARP; 01162 int level; 01163 OnigStackType* k; 01164 01165 level = 0; 01166 k = top; 01167 k--; 01168 while (k >= stk_base) { 01169 if (k->type == STK_CALL_FRAME) { 01170 level--; 01171 } 01172 else if (k->type == STK_RETURN) { 01173 level++; 01174 } 01175 else if (level == nest) { 01176 if (k->type == STK_MEM_START) { 01177 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { 01178 pstart = k->u.mem.pstr; 01179 if (pend != NULL_UCHARP) { 01180 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ 01181 p = pstart; 01182 ss = *s; 01183 01184 if (ignore_case != 0) { 01185 if (string_cmp_ic(reg->enc, case_fold_flag, 01186 pstart, &ss, pend - pstart, send) == 0) 01187 return 0; /* or goto next_mem; */ 01188 } 01189 else { 01190 while (p < pend) { 01191 if (*p++ != *ss++) return 0; /* or goto next_mem; */ 01192 } 01193 } 01194 01195 *s = ss; 01196 return 1; 01197 } 01198 } 01199 } 01200 else if (k->type == STK_MEM_END) { 01201 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { 01202 pend = k->u.mem.pstr; 01203 } 01204 } 01205 } 01206 k--; 01207 } 01208 01209 return 0; 01210 } 01211 #endif /* USE_BACKREF_WITH_LEVEL */ 01212 01213 01214 #ifdef ONIG_DEBUG_STATISTICS 01215 01216 #define USE_TIMEOFDAY 01217 01218 #ifdef USE_TIMEOFDAY 01219 #ifdef HAVE_SYS_TIME_H 01220 #include <sys/time.h> 01221 #endif 01222 #ifdef HAVE_UNISTD_H 01223 #include <unistd.h> 01224 #endif 01225 static struct timeval ts, te; 01226 #define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) 01227 #define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ 01228 (((te).tv_sec - (ts).tv_sec)*1000000)) 01229 #else /* USE_TIMEOFDAY */ 01230 #ifdef HAVE_SYS_TIMES_H 01231 #include <sys/times.h> 01232 #endif 01233 static struct tms ts, te; 01234 #define GETTIME(t) times(&(t)) 01235 #define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) 01236 #endif /* USE_TIMEOFDAY */ 01237 01238 static int OpCounter[256]; 01239 static int OpPrevCounter[256]; 01240 static unsigned long OpTime[256]; 01241 static int OpCurr = OP_FINISH; 01242 static int OpPrevTarget = OP_FAIL; 01243 static int MaxStackDepth = 0; 01244 01245 #define MOP_IN(opcode) do {\ 01246 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ 01247 OpCurr = opcode;\ 01248 OpCounter[opcode]++;\ 01249 GETTIME(ts);\ 01250 } while(0) 01251 01252 #define MOP_OUT do {\ 01253 GETTIME(te);\ 01254 OpTime[OpCurr] += TIMEDIFF(te, ts);\ 01255 } while(0) 01256 01257 extern void 01258 onig_statistics_init(void) 01259 { 01260 int i; 01261 for (i = 0; i < 256; i++) { 01262 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; 01263 } 01264 MaxStackDepth = 0; 01265 } 01266 01267 extern void 01268 onig_print_statistics(FILE* f) 01269 { 01270 int i; 01271 fprintf(f, " count prev time\n"); 01272 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { 01273 fprintf(f, "%8d: %8d: %10ld: %s\n", 01274 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); 01275 } 01276 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); 01277 } 01278 01279 #define STACK_INC do {\ 01280 stk++;\ 01281 if (stk - stk_base > MaxStackDepth) \ 01282 MaxStackDepth = stk - stk_base;\ 01283 } while(0) 01284 01285 #else /* ONIG_DEBUG_STATISTICS */ 01286 #define STACK_INC stk++ 01287 01288 #define MOP_IN(opcode) 01289 #define MOP_OUT 01290 #endif /* ONIG_DEBUG_STATISTICS */ 01291 01292 01293 01294 /* matching region of POSIX API */ 01295 typedef int regoff_t; 01296 01297 typedef struct { 01298 regoff_t rm_so; 01299 regoff_t rm_eo; 01300 } posix_regmatch_t; 01301 01302 void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, 01303 OnigEncoding enc); 01304 01305 /* match data(str - end) from position (sstart). */ 01306 /* if sstart == str then set sprev to NULL. */ 01307 static OnigPosition 01308 match_at(regex_t* reg, const UChar* str, const UChar* end, 01309 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 01310 const UChar* right_range, 01311 #endif 01312 const UChar* sstart, UChar* sprev, OnigMatchArg* msa) 01313 { 01314 static const UChar FinishCode[] = { OP_FINISH }; 01315 01316 int i, num_mem, pop_level; 01317 ptrdiff_t n, best_len; 01318 LengthType tlen, tlen2; 01319 MemNumType mem; 01320 RelAddrType addr; 01321 OnigOptionType option = reg->options; 01322 OnigEncoding encode = reg->enc; 01323 OnigCaseFoldType case_fold_flag = reg->case_fold_flag; 01324 UChar *s, *q, *sbegin; 01325 UChar *p = reg->p; 01326 UChar *pkeep; 01327 char *alloca_base; 01328 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; 01329 OnigStackType *stkp; /* used as any purpose. */ 01330 OnigStackIndex si; 01331 OnigStackIndex *repeat_stk; 01332 OnigStackIndex *mem_start_stk, *mem_end_stk; 01333 #ifdef USE_COMBINATION_EXPLOSION_CHECK 01334 int scv; 01335 unsigned char* state_check_buff = msa->state_check_buff; 01336 int num_comb_exp_check = reg->num_comb_exp_check; 01337 #endif 01338 01339 #ifdef USE_SUBEXP_CALL 01340 /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, etc. */ 01341 n = reg->num_repeat + (reg->num_mem + 1) * 2; 01342 01343 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); 01344 pop_level = reg->stack_pop_level; 01345 num_mem = reg->num_mem; 01346 repeat_stk = (OnigStackIndex* )alloca_base; 01347 01348 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); 01349 mem_end_stk = mem_start_stk + (num_mem + 1); 01350 for (i = 0; i <= num_mem; i++) { 01351 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; 01352 } 01353 #else /* USE_SUBEXP_CALL */ 01354 /* Stack #0 not is used. */ 01355 n = reg->num_repeat + reg->num_mem * 2; 01356 01357 STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); 01358 pop_level = reg->stack_pop_level; 01359 num_mem = reg->num_mem; 01360 repeat_stk = (OnigStackIndex* )alloca_base; 01361 01362 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); 01363 mem_end_stk = mem_start_stk + num_mem; 01364 mem_start_stk--; /* for index start from 1, 01365 mem_start_stk[1]..mem_start_stk[num_mem] */ 01366 mem_end_stk--; /* for index start from 1, 01367 mem_end_stk[1]..mem_end_stk[num_mem] */ 01368 for (i = 1; i <= num_mem; i++) { 01369 mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; 01370 } 01371 #endif /* USE_SUBEXP_CALL */ 01372 01373 #ifdef ONIG_DEBUG_MATCH 01374 fprintf(stderr, "match_at: str: %"PRIdPTR" (%p), end: %"PRIdPTR" (%p), start: %"PRIdPTR" (%p), sprev: %"PRIdPTR" (%p)\n", 01375 (intptr_t)str, str, (intptr_t)end, end, (intptr_t)sstart, sstart, (intptr_t)sprev, sprev); 01376 fprintf(stderr, "size: %d, start offset: %d\n", 01377 (int )(end - str), (int )(sstart - str)); 01378 #endif 01379 01380 STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */ 01381 best_len = ONIG_MISMATCH; 01382 s = (UChar* )sstart; 01383 pkeep = (UChar* )sstart; 01384 while (1) { 01385 #ifdef ONIG_DEBUG_MATCH 01386 if (s) { 01387 UChar *q, *bp, buf[50]; 01388 int len; 01389 fprintf(stderr, "%4d> \"", (int )(s - str)); 01390 bp = buf; 01391 if (*p != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ 01392 for (i = 0, q = s; i < 7 && q < end; i++) { 01393 len = enclen(encode, q, end); 01394 while (len-- > 0) *bp++ = *q++; 01395 } 01396 } 01397 if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } 01398 else { xmemcpy(bp, "\"", 1); bp += 1; } 01399 *bp = 0; 01400 fputs((char* )buf, stderr); 01401 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); 01402 onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode); 01403 fprintf(stderr, "\n"); 01404 } 01405 #endif 01406 01407 sbegin = s; 01408 switch (*p++) { 01409 case OP_END: MOP_IN(OP_END); 01410 n = s - sstart; 01411 if (n > best_len) { 01412 OnigRegion* region; 01413 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 01414 if (IS_FIND_LONGEST(option)) { 01415 if (n > msa->best_len) { 01416 msa->best_len = n; 01417 msa->best_s = (UChar* )sstart; 01418 } 01419 else 01420 goto end_best_len; 01421 } 01422 #endif 01423 best_len = n; 01424 region = msa->region; 01425 if (region) { 01426 #ifdef USE_POSIX_API_REGION_OPTION 01427 if (IS_POSIX_REGION(msa->options)) { 01428 posix_regmatch_t* rmt = (posix_regmatch_t* )region; 01429 01430 rmt[0].rm_so = (regoff_t )(((pkeep > s) ? s : pkeep) - str); 01431 rmt[0].rm_eo = (regoff_t )(s - str); 01432 for (i = 1; i <= num_mem; i++) { 01433 if (mem_end_stk[i] != INVALID_STACK_INDEX) { 01434 if (BIT_STATUS_AT(reg->bt_mem_start, i)) 01435 rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str); 01436 else 01437 rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str); 01438 01439 rmt[i].rm_eo = (regoff_t )((BIT_STATUS_AT(reg->bt_mem_end, i) 01440 ? STACK_AT(mem_end_stk[i])->u.mem.pstr 01441 : (UChar* )((void* )mem_end_stk[i])) - str); 01442 } 01443 else { 01444 rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS; 01445 } 01446 } 01447 } 01448 else { 01449 #endif /* USE_POSIX_API_REGION_OPTION */ 01450 region->beg[0] = ((pkeep > s) ? s : pkeep) - str; 01451 region->end[0] = s - str; 01452 for (i = 1; i <= num_mem; i++) { 01453 if (mem_end_stk[i] != INVALID_STACK_INDEX) { 01454 if (BIT_STATUS_AT(reg->bt_mem_start, i)) 01455 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; 01456 else 01457 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; 01458 01459 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) 01460 ? STACK_AT(mem_end_stk[i])->u.mem.pstr 01461 : (UChar* )((void* )mem_end_stk[i])) - str; 01462 } 01463 else { 01464 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; 01465 } 01466 } 01467 01468 #ifdef USE_CAPTURE_HISTORY 01469 if (reg->capture_history != 0) { 01470 int r; 01471 OnigCaptureTreeNode* node; 01472 01473 if (IS_NULL(region->history_root)) { 01474 region->history_root = node = history_node_new(); 01475 CHECK_NULL_RETURN_MEMERR(node); 01476 } 01477 else { 01478 node = region->history_root; 01479 history_tree_clear(node); 01480 } 01481 01482 node->group = 0; 01483 node->beg = ((pkeep > s) ? s : pkeep) - str; 01484 node->end = s - str; 01485 01486 stkp = stk_base; 01487 r = make_capture_history_tree(region->history_root, &stkp, 01488 stk, (UChar* )str, reg); 01489 if (r < 0) { 01490 best_len = r; /* error code */ 01491 goto finish; 01492 } 01493 } 01494 #endif /* USE_CAPTURE_HISTORY */ 01495 #ifdef USE_POSIX_API_REGION_OPTION 01496 } /* else IS_POSIX_REGION() */ 01497 #endif 01498 } /* if (region) */ 01499 } /* n > best_len */ 01500 01501 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 01502 end_best_len: 01503 #endif 01504 MOP_OUT; 01505 01506 if (IS_FIND_CONDITION(option)) { 01507 if (IS_FIND_NOT_EMPTY(option) && s == sstart) { 01508 best_len = ONIG_MISMATCH; 01509 goto fail; /* for retry */ 01510 } 01511 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { 01512 goto fail; /* for retry */ 01513 } 01514 } 01515 01516 /* default behavior: return first-matching result. */ 01517 goto finish; 01518 break; 01519 01520 case OP_EXACT1: MOP_IN(OP_EXACT1); 01521 #if 0 01522 DATA_ENSURE(1); 01523 if (*p != *s) goto fail; 01524 p++; s++; 01525 #endif 01526 if (*p != *s++) goto fail; 01527 DATA_ENSURE(0); 01528 p++; 01529 MOP_OUT; 01530 break; 01531 01532 case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); 01533 { 01534 int len; 01535 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 01536 01537 DATA_ENSURE(1); 01538 len = ONIGENC_MBC_CASE_FOLD(encode, 01539 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ 01540 case_fold_flag, 01541 &s, end, lowbuf); 01542 DATA_ENSURE(0); 01543 q = lowbuf; 01544 while (len-- > 0) { 01545 if (*p != *q) { 01546 goto fail; 01547 } 01548 p++; q++; 01549 } 01550 } 01551 MOP_OUT; 01552 break; 01553 01554 case OP_EXACT2: MOP_IN(OP_EXACT2); 01555 DATA_ENSURE(2); 01556 if (*p != *s) goto fail; 01557 p++; s++; 01558 if (*p != *s) goto fail; 01559 sprev = s; 01560 p++; s++; 01561 MOP_OUT; 01562 continue; 01563 break; 01564 01565 case OP_EXACT3: MOP_IN(OP_EXACT3); 01566 DATA_ENSURE(3); 01567 if (*p != *s) goto fail; 01568 p++; s++; 01569 if (*p != *s) goto fail; 01570 p++; s++; 01571 if (*p != *s) goto fail; 01572 sprev = s; 01573 p++; s++; 01574 MOP_OUT; 01575 continue; 01576 break; 01577 01578 case OP_EXACT4: MOP_IN(OP_EXACT4); 01579 DATA_ENSURE(4); 01580 if (*p != *s) goto fail; 01581 p++; s++; 01582 if (*p != *s) goto fail; 01583 p++; s++; 01584 if (*p != *s) goto fail; 01585 p++; s++; 01586 if (*p != *s) goto fail; 01587 sprev = s; 01588 p++; s++; 01589 MOP_OUT; 01590 continue; 01591 break; 01592 01593 case OP_EXACT5: MOP_IN(OP_EXACT5); 01594 DATA_ENSURE(5); 01595 if (*p != *s) goto fail; 01596 p++; s++; 01597 if (*p != *s) goto fail; 01598 p++; s++; 01599 if (*p != *s) goto fail; 01600 p++; s++; 01601 if (*p != *s) goto fail; 01602 p++; s++; 01603 if (*p != *s) goto fail; 01604 sprev = s; 01605 p++; s++; 01606 MOP_OUT; 01607 continue; 01608 break; 01609 01610 case OP_EXACTN: MOP_IN(OP_EXACTN); 01611 GET_LENGTH_INC(tlen, p); 01612 DATA_ENSURE(tlen); 01613 while (tlen-- > 0) { 01614 if (*p++ != *s++) goto fail; 01615 } 01616 sprev = s - 1; 01617 MOP_OUT; 01618 continue; 01619 break; 01620 01621 case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); 01622 { 01623 int len; 01624 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 01625 01626 GET_LENGTH_INC(tlen, p); 01627 endp = p + tlen; 01628 01629 while (p < endp) { 01630 sprev = s; 01631 DATA_ENSURE(1); 01632 len = ONIGENC_MBC_CASE_FOLD(encode, 01633 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ 01634 case_fold_flag, 01635 &s, end, lowbuf); 01636 DATA_ENSURE(0); 01637 q = lowbuf; 01638 while (len-- > 0) { 01639 if (*p != *q) goto fail; 01640 p++; q++; 01641 } 01642 } 01643 } 01644 01645 MOP_OUT; 01646 continue; 01647 break; 01648 01649 case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); 01650 DATA_ENSURE(2); 01651 if (*p != *s) goto fail; 01652 p++; s++; 01653 if (*p != *s) goto fail; 01654 p++; s++; 01655 MOP_OUT; 01656 break; 01657 01658 case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); 01659 DATA_ENSURE(4); 01660 if (*p != *s) goto fail; 01661 p++; s++; 01662 if (*p != *s) goto fail; 01663 p++; s++; 01664 sprev = s; 01665 if (*p != *s) goto fail; 01666 p++; s++; 01667 if (*p != *s) goto fail; 01668 p++; s++; 01669 MOP_OUT; 01670 continue; 01671 break; 01672 01673 case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); 01674 DATA_ENSURE(6); 01675 if (*p != *s) goto fail; 01676 p++; s++; 01677 if (*p != *s) goto fail; 01678 p++; s++; 01679 if (*p != *s) goto fail; 01680 p++; s++; 01681 if (*p != *s) goto fail; 01682 p++; s++; 01683 sprev = s; 01684 if (*p != *s) goto fail; 01685 p++; s++; 01686 if (*p != *s) goto fail; 01687 p++; s++; 01688 MOP_OUT; 01689 continue; 01690 break; 01691 01692 case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); 01693 GET_LENGTH_INC(tlen, p); 01694 DATA_ENSURE(tlen * 2); 01695 while (tlen-- > 0) { 01696 if (*p != *s) goto fail; 01697 p++; s++; 01698 if (*p != *s) goto fail; 01699 p++; s++; 01700 } 01701 sprev = s - 2; 01702 MOP_OUT; 01703 continue; 01704 break; 01705 01706 case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); 01707 GET_LENGTH_INC(tlen, p); 01708 DATA_ENSURE(tlen * 3); 01709 while (tlen-- > 0) { 01710 if (*p != *s) goto fail; 01711 p++; s++; 01712 if (*p != *s) goto fail; 01713 p++; s++; 01714 if (*p != *s) goto fail; 01715 p++; s++; 01716 } 01717 sprev = s - 3; 01718 MOP_OUT; 01719 continue; 01720 break; 01721 01722 case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); 01723 GET_LENGTH_INC(tlen, p); /* mb-len */ 01724 GET_LENGTH_INC(tlen2, p); /* string len */ 01725 tlen2 *= tlen; 01726 DATA_ENSURE(tlen2); 01727 while (tlen2-- > 0) { 01728 if (*p != *s) goto fail; 01729 p++; s++; 01730 } 01731 sprev = s - tlen; 01732 MOP_OUT; 01733 continue; 01734 break; 01735 01736 case OP_CCLASS: MOP_IN(OP_CCLASS); 01737 DATA_ENSURE(1); 01738 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; 01739 p += SIZE_BITSET; 01740 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ 01741 MOP_OUT; 01742 break; 01743 01744 case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); 01745 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; 01746 01747 cclass_mb: 01748 GET_LENGTH_INC(tlen, p); 01749 { 01750 OnigCodePoint code; 01751 UChar *ss; 01752 int mb_len; 01753 01754 DATA_ENSURE(1); 01755 mb_len = enclen(encode, s, end); 01756 DATA_ENSURE(mb_len); 01757 ss = s; 01758 s += mb_len; 01759 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 01760 01761 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 01762 if (! onig_is_in_code_range(p, code)) goto fail; 01763 #else 01764 q = p; 01765 ALIGNMENT_RIGHT(q); 01766 if (! onig_is_in_code_range(q, code)) goto fail; 01767 #endif 01768 } 01769 p += tlen; 01770 MOP_OUT; 01771 break; 01772 01773 case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); 01774 DATA_ENSURE(1); 01775 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { 01776 p += SIZE_BITSET; 01777 goto cclass_mb; 01778 } 01779 else { 01780 if (BITSET_AT(((BitSetRef )p), *s) == 0) 01781 goto fail; 01782 01783 p += SIZE_BITSET; 01784 GET_LENGTH_INC(tlen, p); 01785 p += tlen; 01786 s++; 01787 } 01788 MOP_OUT; 01789 break; 01790 01791 case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); 01792 DATA_ENSURE(1); 01793 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; 01794 p += SIZE_BITSET; 01795 s += enclen(encode, s, end); 01796 MOP_OUT; 01797 break; 01798 01799 case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); 01800 DATA_ENSURE(1); 01801 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { 01802 s++; 01803 GET_LENGTH_INC(tlen, p); 01804 p += tlen; 01805 goto cc_mb_not_success; 01806 } 01807 01808 cclass_mb_not: 01809 GET_LENGTH_INC(tlen, p); 01810 { 01811 OnigCodePoint code; 01812 UChar *ss; 01813 int mb_len = enclen(encode, s, end); 01814 01815 if (! DATA_ENSURE_CHECK(mb_len)) { 01816 DATA_ENSURE(1); 01817 s = (UChar* )end; 01818 p += tlen; 01819 goto cc_mb_not_success; 01820 } 01821 01822 ss = s; 01823 s += mb_len; 01824 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 01825 01826 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS 01827 if (onig_is_in_code_range(p, code)) goto fail; 01828 #else 01829 q = p; 01830 ALIGNMENT_RIGHT(q); 01831 if (onig_is_in_code_range(q, code)) goto fail; 01832 #endif 01833 } 01834 p += tlen; 01835 01836 cc_mb_not_success: 01837 MOP_OUT; 01838 break; 01839 01840 case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); 01841 DATA_ENSURE(1); 01842 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { 01843 p += SIZE_BITSET; 01844 goto cclass_mb_not; 01845 } 01846 else { 01847 if (BITSET_AT(((BitSetRef )p), *s) != 0) 01848 goto fail; 01849 01850 p += SIZE_BITSET; 01851 GET_LENGTH_INC(tlen, p); 01852 p += tlen; 01853 s++; 01854 } 01855 MOP_OUT; 01856 break; 01857 01858 case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); 01859 { 01860 OnigCodePoint code; 01861 void *node; 01862 int mb_len; 01863 UChar *ss; 01864 01865 DATA_ENSURE(1); 01866 GET_POINTER_INC(node, p); 01867 mb_len = enclen(encode, s, end); 01868 ss = s; 01869 s += mb_len; 01870 DATA_ENSURE(0); 01871 code = ONIGENC_MBC_TO_CODE(encode, ss, s); 01872 if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; 01873 } 01874 MOP_OUT; 01875 break; 01876 01877 case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); 01878 DATA_ENSURE(1); 01879 n = enclen(encode, s, end); 01880 DATA_ENSURE(n); 01881 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; 01882 s += n; 01883 MOP_OUT; 01884 break; 01885 01886 case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); 01887 DATA_ENSURE(1); 01888 n = enclen(encode, s, end); 01889 DATA_ENSURE(n); 01890 s += n; 01891 MOP_OUT; 01892 break; 01893 01894 case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); 01895 while (DATA_ENSURE_CHECK1) { 01896 STACK_PUSH_ALT(p, s, sprev, pkeep); 01897 n = enclen(encode, s, end); 01898 DATA_ENSURE(n); 01899 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; 01900 sprev = s; 01901 s += n; 01902 } 01903 MOP_OUT; 01904 break; 01905 01906 case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); 01907 while (DATA_ENSURE_CHECK1) { 01908 STACK_PUSH_ALT(p, s, sprev, pkeep); 01909 n = enclen(encode, s, end); 01910 if (n > 1) { 01911 DATA_ENSURE(n); 01912 sprev = s; 01913 s += n; 01914 } 01915 else { 01916 sprev = s; 01917 s++; 01918 } 01919 } 01920 MOP_OUT; 01921 break; 01922 01923 case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); 01924 while (DATA_ENSURE_CHECK1) { 01925 if (*p == *s) { 01926 STACK_PUSH_ALT(p + 1, s, sprev, pkeep); 01927 } 01928 n = enclen(encode, s, end); 01929 DATA_ENSURE(n); 01930 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; 01931 sprev = s; 01932 s += n; 01933 } 01934 p++; 01935 MOP_OUT; 01936 break; 01937 01938 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); 01939 while (DATA_ENSURE_CHECK1) { 01940 if (*p == *s) { 01941 STACK_PUSH_ALT(p + 1, s, sprev, pkeep); 01942 } 01943 n = enclen(encode, s, end); 01944 if (n > 1) { 01945 DATA_ENSURE(n); 01946 sprev = s; 01947 s += n; 01948 } 01949 else { 01950 sprev = s; 01951 s++; 01952 } 01953 } 01954 p++; 01955 MOP_OUT; 01956 break; 01957 01958 #ifdef USE_COMBINATION_EXPLOSION_CHECK 01959 case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); 01960 GET_STATE_CHECK_NUM_INC(mem, p); 01961 while (DATA_ENSURE_CHECK1) { 01962 STATE_CHECK_VAL(scv, mem); 01963 if (scv) goto fail; 01964 01965 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); 01966 n = enclen(encode, s, end); 01967 DATA_ENSURE(n); 01968 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; 01969 sprev = s; 01970 s += n; 01971 } 01972 MOP_OUT; 01973 break; 01974 01975 case OP_STATE_CHECK_ANYCHAR_ML_STAR: 01976 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); 01977 01978 GET_STATE_CHECK_NUM_INC(mem, p); 01979 while (DATA_ENSURE_CHECK1) { 01980 STATE_CHECK_VAL(scv, mem); 01981 if (scv) goto fail; 01982 01983 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); 01984 n = enclen(encode, s, end); 01985 if (n > 1) { 01986 DATA_ENSURE(n); 01987 sprev = s; 01988 s += n; 01989 } 01990 else { 01991 sprev = s; 01992 s++; 01993 } 01994 } 01995 MOP_OUT; 01996 break; 01997 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 01998 01999 case OP_WORD: MOP_IN(OP_WORD); 02000 DATA_ENSURE(1); 02001 if (! ONIGENC_IS_MBC_WORD(encode, s, end)) 02002 goto fail; 02003 02004 s += enclen(encode, s, end); 02005 MOP_OUT; 02006 break; 02007 02008 case OP_ASCII_WORD: MOP_IN(OP_ASCII_WORD); 02009 DATA_ENSURE(1); 02010 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) 02011 goto fail; 02012 02013 s += enclen(encode, s, end); 02014 MOP_OUT; 02015 break; 02016 02017 case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); 02018 DATA_ENSURE(1); 02019 if (ONIGENC_IS_MBC_WORD(encode, s, end)) 02020 goto fail; 02021 02022 s += enclen(encode, s, end); 02023 MOP_OUT; 02024 break; 02025 02026 case OP_NOT_ASCII_WORD: MOP_IN(OP_NOT_ASCII_WORD); 02027 DATA_ENSURE(1); 02028 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) 02029 goto fail; 02030 02031 s += enclen(encode, s, end); 02032 MOP_OUT; 02033 break; 02034 02035 case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); 02036 if (ON_STR_BEGIN(s)) { 02037 DATA_ENSURE(1); 02038 if (! ONIGENC_IS_MBC_WORD(encode, s, end)) 02039 goto fail; 02040 } 02041 else if (ON_STR_END(s)) { 02042 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) 02043 goto fail; 02044 } 02045 else { 02046 if (ONIGENC_IS_MBC_WORD(encode, s, end) 02047 == ONIGENC_IS_MBC_WORD(encode, sprev, end)) 02048 goto fail; 02049 } 02050 MOP_OUT; 02051 continue; 02052 break; 02053 02054 case OP_ASCII_WORD_BOUND: MOP_IN(OP_ASCII_WORD_BOUND); 02055 if (ON_STR_BEGIN(s)) { 02056 DATA_ENSURE(1); 02057 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) 02058 goto fail; 02059 } 02060 else if (ON_STR_END(s)) { 02061 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) 02062 goto fail; 02063 } 02064 else { 02065 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) 02066 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) 02067 goto fail; 02068 } 02069 MOP_OUT; 02070 continue; 02071 break; 02072 02073 case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); 02074 if (ON_STR_BEGIN(s)) { 02075 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) 02076 goto fail; 02077 } 02078 else if (ON_STR_END(s)) { 02079 if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) 02080 goto fail; 02081 } 02082 else { 02083 if (ONIGENC_IS_MBC_WORD(encode, s, end) 02084 != ONIGENC_IS_MBC_WORD(encode, sprev, end)) 02085 goto fail; 02086 } 02087 MOP_OUT; 02088 continue; 02089 break; 02090 02091 case OP_NOT_ASCII_WORD_BOUND: MOP_IN(OP_NOT_ASCII_WORD_BOUND); 02092 if (ON_STR_BEGIN(s)) { 02093 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) 02094 goto fail; 02095 } 02096 else if (ON_STR_END(s)) { 02097 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) 02098 goto fail; 02099 } 02100 else { 02101 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) 02102 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) 02103 goto fail; 02104 } 02105 MOP_OUT; 02106 continue; 02107 break; 02108 02109 #ifdef USE_WORD_BEGIN_END 02110 case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); 02111 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { 02112 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { 02113 MOP_OUT; 02114 continue; 02115 } 02116 } 02117 goto fail; 02118 break; 02119 02120 case OP_ASCII_WORD_BEGIN: MOP_IN(OP_ASCII_WORD_BEGIN); 02121 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { 02122 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { 02123 MOP_OUT; 02124 continue; 02125 } 02126 } 02127 goto fail; 02128 break; 02129 02130 case OP_WORD_END: MOP_IN(OP_WORD_END); 02131 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { 02132 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { 02133 MOP_OUT; 02134 continue; 02135 } 02136 } 02137 goto fail; 02138 break; 02139 02140 case OP_ASCII_WORD_END: MOP_IN(OP_ASCII_WORD_END); 02141 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { 02142 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { 02143 MOP_OUT; 02144 continue; 02145 } 02146 } 02147 goto fail; 02148 break; 02149 #endif 02150 02151 case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); 02152 if (! ON_STR_BEGIN(s)) goto fail; 02153 02154 MOP_OUT; 02155 continue; 02156 break; 02157 02158 case OP_END_BUF: MOP_IN(OP_END_BUF); 02159 if (! ON_STR_END(s)) goto fail; 02160 02161 MOP_OUT; 02162 continue; 02163 break; 02164 02165 case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); 02166 op_begin_line: 02167 if (ON_STR_BEGIN(s)) { 02168 if (IS_NOTBOL(msa->options)) goto fail; 02169 MOP_OUT; 02170 continue; 02171 } 02172 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) 02173 #ifdef USE_CRNL_AS_LINE_TERMINATOR 02174 && !(IS_NEWLINE_CRLF(option) 02175 && ONIGENC_IS_MBC_CRNL(encode, sprev, end)) 02176 #endif 02177 && !ON_STR_END(s)) { 02178 MOP_OUT; 02179 continue; 02180 } 02181 goto fail; 02182 break; 02183 02184 case OP_END_LINE: MOP_IN(OP_END_LINE); 02185 if (ON_STR_END(s)) { 02186 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 02187 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { 02188 #endif 02189 if (IS_NOTEOL(msa->options)) goto fail; 02190 MOP_OUT; 02191 continue; 02192 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 02193 } 02194 #endif 02195 } 02196 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { 02197 MOP_OUT; 02198 continue; 02199 } 02200 goto fail; 02201 break; 02202 02203 case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); 02204 if (ON_STR_END(s)) { 02205 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 02206 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { 02207 #endif 02208 if (IS_NOTEOL(msa->options)) goto fail; 02209 MOP_OUT; 02210 continue; 02211 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 02212 } 02213 #endif 02214 } 02215 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { 02216 UChar* ss = s + enclen(encode, s, end); 02217 if (ON_STR_END(ss)) { 02218 MOP_OUT; 02219 continue; 02220 } 02221 #ifdef USE_CRNL_AS_LINE_TERMINATOR 02222 else if (IS_NEWLINE_CRLF(option) 02223 && ONIGENC_IS_MBC_CRNL(encode, s, end)) { 02224 ss += enclen(encode, ss, end); 02225 if (ON_STR_END(ss)) { 02226 MOP_OUT; 02227 continue; 02228 } 02229 } 02230 #endif 02231 } 02232 goto fail; 02233 break; 02234 02235 case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); 02236 if (s != msa->gpos) 02237 goto fail; 02238 02239 MOP_OUT; 02240 continue; 02241 break; 02242 02243 case OP_BEGIN_POS_OR_LINE: MOP_IN(OP_BEGIN_POS_OR_LINE); 02244 if (s != msa->gpos) 02245 goto op_begin_line; 02246 02247 MOP_OUT; 02248 continue; 02249 break; 02250 02251 case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); 02252 GET_MEMNUM_INC(mem, p); 02253 STACK_PUSH_MEM_START(mem, s); 02254 MOP_OUT; 02255 continue; 02256 break; 02257 02258 case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); 02259 GET_MEMNUM_INC(mem, p); 02260 mem_start_stk[mem] = (OnigStackIndex )((void* )s); 02261 MOP_OUT; 02262 continue; 02263 break; 02264 02265 case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); 02266 GET_MEMNUM_INC(mem, p); 02267 STACK_PUSH_MEM_END(mem, s); 02268 MOP_OUT; 02269 continue; 02270 break; 02271 02272 case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); 02273 GET_MEMNUM_INC(mem, p); 02274 mem_end_stk[mem] = (OnigStackIndex )((void* )s); 02275 MOP_OUT; 02276 continue; 02277 break; 02278 02279 case OP_KEEP: MOP_IN(OP_KEEP); 02280 pkeep = s; 02281 MOP_OUT; 02282 continue; 02283 break; 02284 02285 #ifdef USE_SUBEXP_CALL 02286 case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); 02287 GET_MEMNUM_INC(mem, p); 02288 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ 02289 STACK_PUSH_MEM_END(mem, s); 02290 mem_start_stk[mem] = GET_STACK_INDEX(stkp); 02291 MOP_OUT; 02292 continue; 02293 break; 02294 02295 case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); 02296 GET_MEMNUM_INC(mem, p); 02297 mem_end_stk[mem] = (OnigStackIndex )((void* )s); 02298 STACK_GET_MEM_START(mem, stkp); 02299 02300 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02301 mem_start_stk[mem] = GET_STACK_INDEX(stkp); 02302 else 02303 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); 02304 02305 STACK_PUSH_MEM_END_MARK(mem); 02306 MOP_OUT; 02307 continue; 02308 break; 02309 #endif 02310 02311 case OP_BACKREF1: MOP_IN(OP_BACKREF1); 02312 mem = 1; 02313 goto backref; 02314 break; 02315 02316 case OP_BACKREF2: MOP_IN(OP_BACKREF2); 02317 mem = 2; 02318 goto backref; 02319 break; 02320 02321 case OP_BACKREFN: MOP_IN(OP_BACKREFN); 02322 GET_MEMNUM_INC(mem, p); 02323 backref: 02324 { 02325 int len; 02326 UChar *pstart, *pend; 02327 02328 /* if you want to remove following line, 02329 you should check in parse and compile time. */ 02330 if (mem > num_mem) goto fail; 02331 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; 02332 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; 02333 02334 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02335 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 02336 else 02337 pstart = (UChar* )((void* )mem_start_stk[mem]); 02338 02339 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 02340 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 02341 : (UChar* )((void* )mem_end_stk[mem])); 02342 n = pend - pstart; 02343 DATA_ENSURE(n); 02344 sprev = s; 02345 STRING_CMP(pstart, s, n); 02346 while (sprev + (len = enclen(encode, sprev, end)) < s) 02347 sprev += len; 02348 02349 MOP_OUT; 02350 continue; 02351 } 02352 break; 02353 02354 case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); 02355 GET_MEMNUM_INC(mem, p); 02356 { 02357 int len; 02358 UChar *pstart, *pend; 02359 02360 /* if you want to remove following line, 02361 you should check in parse and compile time. */ 02362 if (mem > num_mem) goto fail; 02363 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; 02364 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; 02365 02366 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02367 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 02368 else 02369 pstart = (UChar* )((void* )mem_start_stk[mem]); 02370 02371 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 02372 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 02373 : (UChar* )((void* )mem_end_stk[mem])); 02374 n = pend - pstart; 02375 DATA_ENSURE(n); 02376 sprev = s; 02377 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end); 02378 while (sprev + (len = enclen(encode, sprev, end)) < s) 02379 sprev += len; 02380 02381 MOP_OUT; 02382 continue; 02383 } 02384 break; 02385 02386 case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); 02387 { 02388 int len, is_fail; 02389 UChar *pstart, *pend, *swork; 02390 02391 GET_LENGTH_INC(tlen, p); 02392 for (i = 0; i < tlen; i++) { 02393 GET_MEMNUM_INC(mem, p); 02394 02395 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; 02396 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; 02397 02398 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02399 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 02400 else 02401 pstart = (UChar* )((void* )mem_start_stk[mem]); 02402 02403 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 02404 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 02405 : (UChar* )((void* )mem_end_stk[mem])); 02406 n = pend - pstart; 02407 DATA_ENSURE(n); 02408 sprev = s; 02409 swork = s; 02410 STRING_CMP_VALUE(pstart, swork, n, is_fail); 02411 if (is_fail) continue; 02412 s = swork; 02413 while (sprev + (len = enclen(encode, sprev, end)) < s) 02414 sprev += len; 02415 02416 p += (SIZE_MEMNUM * (tlen - i - 1)); 02417 break; /* success */ 02418 } 02419 if (i == tlen) goto fail; 02420 MOP_OUT; 02421 continue; 02422 } 02423 break; 02424 02425 case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); 02426 { 02427 int len, is_fail; 02428 UChar *pstart, *pend, *swork; 02429 02430 GET_LENGTH_INC(tlen, p); 02431 for (i = 0; i < tlen; i++) { 02432 GET_MEMNUM_INC(mem, p); 02433 02434 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; 02435 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; 02436 02437 if (BIT_STATUS_AT(reg->bt_mem_start, mem)) 02438 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; 02439 else 02440 pstart = (UChar* )((void* )mem_start_stk[mem]); 02441 02442 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) 02443 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr 02444 : (UChar* )((void* )mem_end_stk[mem])); 02445 n = pend - pstart; 02446 DATA_ENSURE(n); 02447 sprev = s; 02448 swork = s; 02449 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); 02450 if (is_fail) continue; 02451 s = swork; 02452 while (sprev + (len = enclen(encode, sprev, end)) < s) 02453 sprev += len; 02454 02455 p += (SIZE_MEMNUM * (tlen - i - 1)); 02456 break; /* success */ 02457 } 02458 if (i == tlen) goto fail; 02459 MOP_OUT; 02460 continue; 02461 } 02462 break; 02463 02464 #ifdef USE_BACKREF_WITH_LEVEL 02465 case OP_BACKREF_WITH_LEVEL: 02466 { 02467 int len; 02468 OnigOptionType ic; 02469 LengthType level; 02470 02471 GET_OPTION_INC(ic, p); 02472 GET_LENGTH_INC(level, p); 02473 GET_LENGTH_INC(tlen, p); 02474 02475 sprev = s; 02476 if (backref_match_at_nested_level(reg, stk, stk_base, ic 02477 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { 02478 while (sprev + (len = enclen(encode, sprev, end)) < s) 02479 sprev += len; 02480 02481 p += (SIZE_MEMNUM * tlen); 02482 } 02483 else 02484 goto fail; 02485 02486 MOP_OUT; 02487 continue; 02488 } 02489 02490 break; 02491 #endif 02492 02493 #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ 02494 case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); 02495 GET_OPTION_INC(option, p); 02496 STACK_PUSH_ALT(p, s, sprev, pkeep); 02497 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; 02498 MOP_OUT; 02499 continue; 02500 break; 02501 02502 case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); 02503 GET_OPTION_INC(option, p); 02504 MOP_OUT; 02505 continue; 02506 break; 02507 #endif 02508 02509 case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); 02510 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 02511 STACK_PUSH_NULL_CHECK_START(mem, s); 02512 MOP_OUT; 02513 continue; 02514 break; 02515 02516 case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); 02517 { 02518 int isnull; 02519 02520 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 02521 STACK_NULL_CHECK(isnull, mem, s); 02522 if (isnull) { 02523 #ifdef ONIG_DEBUG_MATCH 02524 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIdPTR" (%p)\n", 02525 (int )mem, (intptr_t )s, s); 02526 #endif 02527 null_check_found: 02528 /* empty loop founded, skip next instruction */ 02529 switch (*p++) { 02530 case OP_JUMP: 02531 case OP_PUSH: 02532 p += SIZE_RELADDR; 02533 break; 02534 case OP_REPEAT_INC: 02535 case OP_REPEAT_INC_NG: 02536 case OP_REPEAT_INC_SG: 02537 case OP_REPEAT_INC_NG_SG: 02538 p += SIZE_MEMNUM; 02539 break; 02540 default: 02541 goto unexpected_bytecode_error; 02542 break; 02543 } 02544 } 02545 } 02546 MOP_OUT; 02547 continue; 02548 break; 02549 02550 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 02551 case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); 02552 { 02553 int isnull; 02554 02555 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 02556 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); 02557 if (isnull) { 02558 #ifdef ONIG_DEBUG_MATCH 02559 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIdPTR" (%p)\n", 02560 (int )mem, (intptr_t )s, s); 02561 #endif 02562 if (isnull == -1) goto fail; 02563 goto null_check_found; 02564 } 02565 } 02566 MOP_OUT; 02567 continue; 02568 break; 02569 #endif 02570 02571 #ifdef USE_SUBEXP_CALL 02572 case OP_NULL_CHECK_END_MEMST_PUSH: 02573 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); 02574 { 02575 int isnull; 02576 02577 GET_MEMNUM_INC(mem, p); /* mem: null check id */ 02578 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT 02579 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); 02580 #else 02581 STACK_NULL_CHECK_REC(isnull, mem, s); 02582 #endif 02583 if (isnull) { 02584 #ifdef ONIG_DEBUG_MATCH 02585 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIdPTR" (%p)\n", 02586 (int )mem, (intptr_t )s, s); 02587 #endif 02588 if (isnull == -1) goto fail; 02589 goto null_check_found; 02590 } 02591 else { 02592 STACK_PUSH_NULL_CHECK_END(mem); 02593 } 02594 } 02595 MOP_OUT; 02596 continue; 02597 break; 02598 #endif 02599 02600 case OP_JUMP: MOP_IN(OP_JUMP); 02601 GET_RELADDR_INC(addr, p); 02602 p += addr; 02603 MOP_OUT; 02604 CHECK_INTERRUPT_IN_MATCH_AT; 02605 continue; 02606 break; 02607 02608 case OP_PUSH: MOP_IN(OP_PUSH); 02609 GET_RELADDR_INC(addr, p); 02610 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); 02611 MOP_OUT; 02612 continue; 02613 break; 02614 02615 #ifdef USE_COMBINATION_EXPLOSION_CHECK 02616 case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); 02617 GET_STATE_CHECK_NUM_INC(mem, p); 02618 STATE_CHECK_VAL(scv, mem); 02619 if (scv) goto fail; 02620 02621 GET_RELADDR_INC(addr, p); 02622 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); 02623 MOP_OUT; 02624 continue; 02625 break; 02626 02627 case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); 02628 GET_STATE_CHECK_NUM_INC(mem, p); 02629 GET_RELADDR_INC(addr, p); 02630 STATE_CHECK_VAL(scv, mem); 02631 if (scv) { 02632 p += addr; 02633 } 02634 else { 02635 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); 02636 } 02637 MOP_OUT; 02638 continue; 02639 break; 02640 02641 case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); 02642 GET_STATE_CHECK_NUM_INC(mem, p); 02643 STATE_CHECK_VAL(scv, mem); 02644 if (scv) goto fail; 02645 02646 STACK_PUSH_STATE_CHECK(s, mem); 02647 MOP_OUT; 02648 continue; 02649 break; 02650 #endif /* USE_COMBINATION_EXPLOSION_CHECK */ 02651 02652 case OP_POP: MOP_IN(OP_POP); 02653 STACK_POP_ONE; 02654 MOP_OUT; 02655 continue; 02656 break; 02657 02658 case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); 02659 GET_RELADDR_INC(addr, p); 02660 if (*p == *s && DATA_ENSURE_CHECK1) { 02661 p++; 02662 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); 02663 MOP_OUT; 02664 continue; 02665 } 02666 p += (addr + 1); 02667 MOP_OUT; 02668 continue; 02669 break; 02670 02671 case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); 02672 GET_RELADDR_INC(addr, p); 02673 if (*p == *s) { 02674 p++; 02675 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); 02676 MOP_OUT; 02677 continue; 02678 } 02679 p++; 02680 MOP_OUT; 02681 continue; 02682 break; 02683 02684 case OP_REPEAT: MOP_IN(OP_REPEAT); 02685 { 02686 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02687 GET_RELADDR_INC(addr, p); 02688 02689 STACK_ENSURE(1); 02690 repeat_stk[mem] = GET_STACK_INDEX(stk); 02691 STACK_PUSH_REPEAT(mem, p); 02692 02693 if (reg->repeat_range[mem].lower == 0) { 02694 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); 02695 } 02696 } 02697 MOP_OUT; 02698 continue; 02699 break; 02700 02701 case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); 02702 { 02703 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02704 GET_RELADDR_INC(addr, p); 02705 02706 STACK_ENSURE(1); 02707 repeat_stk[mem] = GET_STACK_INDEX(stk); 02708 STACK_PUSH_REPEAT(mem, p); 02709 02710 if (reg->repeat_range[mem].lower == 0) { 02711 STACK_PUSH_ALT(p, s, sprev, pkeep); 02712 p += addr; 02713 } 02714 } 02715 MOP_OUT; 02716 continue; 02717 break; 02718 02719 case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); 02720 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02721 si = repeat_stk[mem]; 02722 stkp = STACK_AT(si); 02723 02724 repeat_inc: 02725 stkp->u.repeat.count++; 02726 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { 02727 /* end of repeat. Nothing to do. */ 02728 } 02729 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { 02730 STACK_PUSH_ALT(p, s, sprev, pkeep); 02731 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ 02732 } 02733 else { 02734 p = stkp->u.repeat.pcode; 02735 } 02736 STACK_PUSH_REPEAT_INC(si); 02737 MOP_OUT; 02738 CHECK_INTERRUPT_IN_MATCH_AT; 02739 continue; 02740 break; 02741 02742 case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); 02743 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02744 STACK_GET_REPEAT(mem, stkp); 02745 si = GET_STACK_INDEX(stkp); 02746 goto repeat_inc; 02747 break; 02748 02749 case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); 02750 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02751 si = repeat_stk[mem]; 02752 stkp = STACK_AT(si); 02753 02754 repeat_inc_ng: 02755 stkp->u.repeat.count++; 02756 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { 02757 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { 02758 UChar* pcode = stkp->u.repeat.pcode; 02759 02760 STACK_PUSH_REPEAT_INC(si); 02761 STACK_PUSH_ALT(pcode, s, sprev, pkeep); 02762 } 02763 else { 02764 p = stkp->u.repeat.pcode; 02765 STACK_PUSH_REPEAT_INC(si); 02766 } 02767 } 02768 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { 02769 STACK_PUSH_REPEAT_INC(si); 02770 } 02771 MOP_OUT; 02772 CHECK_INTERRUPT_IN_MATCH_AT; 02773 continue; 02774 break; 02775 02776 case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); 02777 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ 02778 STACK_GET_REPEAT(mem, stkp); 02779 si = GET_STACK_INDEX(stkp); 02780 goto repeat_inc_ng; 02781 break; 02782 02783 case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); 02784 STACK_PUSH_POS(s, sprev, pkeep); 02785 MOP_OUT; 02786 continue; 02787 break; 02788 02789 case OP_POP_POS: MOP_IN(OP_POP_POS); 02790 { 02791 STACK_POS_END(stkp); 02792 s = stkp->u.state.pstr; 02793 sprev = stkp->u.state.pstr_prev; 02794 } 02795 MOP_OUT; 02796 continue; 02797 break; 02798 02799 case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); 02800 GET_RELADDR_INC(addr, p); 02801 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep); 02802 MOP_OUT; 02803 continue; 02804 break; 02805 02806 case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); 02807 STACK_POP_TIL_POS_NOT; 02808 goto fail; 02809 break; 02810 02811 case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); 02812 STACK_PUSH_STOP_BT; 02813 MOP_OUT; 02814 continue; 02815 break; 02816 02817 case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); 02818 STACK_STOP_BT_END; 02819 MOP_OUT; 02820 continue; 02821 break; 02822 02823 case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); 02824 GET_LENGTH_INC(tlen, p); 02825 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); 02826 if (IS_NULL(s)) goto fail; 02827 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); 02828 MOP_OUT; 02829 continue; 02830 break; 02831 02832 case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); 02833 GET_RELADDR_INC(addr, p); 02834 GET_LENGTH_INC(tlen, p); 02835 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); 02836 if (IS_NULL(q)) { 02837 /* too short case -> success. ex. /(?<!XXX)a/.match("a") 02838 If you want to change to fail, replace following line. */ 02839 p += addr; 02840 /* goto fail; */ 02841 } 02842 else { 02843 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep); 02844 s = q; 02845 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); 02846 } 02847 MOP_OUT; 02848 continue; 02849 break; 02850 02851 case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); 02852 STACK_POP_TIL_LOOK_BEHIND_NOT; 02853 goto fail; 02854 break; 02855 02856 #ifdef USE_SUBEXP_CALL 02857 case OP_CALL: MOP_IN(OP_CALL); 02858 GET_ABSADDR_INC(addr, p); 02859 STACK_PUSH_CALL_FRAME(p); 02860 p = reg->p + addr; 02861 MOP_OUT; 02862 continue; 02863 break; 02864 02865 case OP_RETURN: MOP_IN(OP_RETURN); 02866 STACK_RETURN(p); 02867 STACK_PUSH_RETURN; 02868 MOP_OUT; 02869 continue; 02870 break; 02871 #endif 02872 02873 case OP_CONDITION: MOP_IN(OP_CONDITION); 02874 GET_MEMNUM_INC(mem, p); 02875 GET_RELADDR_INC(addr, p); 02876 if ((mem > num_mem) || 02877 (mem_end_stk[mem] == INVALID_STACK_INDEX) || 02878 (mem_start_stk[mem] == INVALID_STACK_INDEX)) { 02879 p += addr; 02880 } 02881 MOP_OUT; 02882 continue; 02883 break; 02884 02885 case OP_FINISH: 02886 goto finish; 02887 break; 02888 02889 fail: 02890 MOP_OUT; 02891 /* fall */ 02892 case OP_FAIL: MOP_IN(OP_FAIL); 02893 STACK_POP; 02894 p = stk->u.state.pcode; 02895 s = stk->u.state.pstr; 02896 sprev = stk->u.state.pstr_prev; 02897 pkeep = stk->u.state.pkeep; 02898 02899 #ifdef USE_COMBINATION_EXPLOSION_CHECK 02900 if (stk->u.state.state_check != 0) { 02901 stk->type = STK_STATE_CHECK_MARK; 02902 stk++; 02903 } 02904 #endif 02905 02906 MOP_OUT; 02907 continue; 02908 break; 02909 02910 default: 02911 goto bytecode_error; 02912 02913 } /* end of switch */ 02914 sprev = sbegin; 02915 } /* end of while(1) */ 02916 02917 finish: 02918 STACK_SAVE; 02919 return best_len; 02920 02921 #ifdef ONIG_DEBUG 02922 stack_error: 02923 STACK_SAVE; 02924 return ONIGERR_STACK_BUG; 02925 #endif 02926 02927 bytecode_error: 02928 STACK_SAVE; 02929 return ONIGERR_UNDEFINED_BYTECODE; 02930 02931 unexpected_bytecode_error: 02932 STACK_SAVE; 02933 return ONIGERR_UNEXPECTED_BYTECODE; 02934 } 02935 02936 02937 static UChar* 02938 slow_search(OnigEncoding enc, UChar* target, UChar* target_end, 02939 const UChar* text, const UChar* text_end, UChar* text_range) 02940 { 02941 UChar *t, *p, *s, *end; 02942 02943 end = (UChar* )text_end; 02944 end -= target_end - target - 1; 02945 if (end > text_range) 02946 end = text_range; 02947 02948 s = (UChar* )text; 02949 02950 if (enc->max_enc_len == enc->min_enc_len) { 02951 int n = enc->max_enc_len; 02952 02953 while (s < end) { 02954 if (*s == *target) { 02955 p = s + 1; 02956 t = target + 1; 02957 if (target_end == t || memcmp(t, p, target_end - t) == 0) 02958 return s; 02959 } 02960 s += n; 02961 } 02962 return (UChar* )NULL; 02963 } 02964 while (s < end) { 02965 if (*s == *target) { 02966 p = s + 1; 02967 t = target + 1; 02968 if (target_end == t || memcmp(t, p, target_end - t) == 0) 02969 return s; 02970 } 02971 s += enclen(enc, s, text_end); 02972 } 02973 02974 return (UChar* )NULL; 02975 } 02976 02977 static int 02978 str_lower_case_match(OnigEncoding enc, int case_fold_flag, 02979 const UChar* t, const UChar* tend, 02980 const UChar* p, const UChar* end) 02981 { 02982 int lowlen; 02983 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; 02984 02985 while (t < tend) { 02986 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); 02987 q = lowbuf; 02988 while (lowlen > 0) { 02989 if (*t++ != *q++) return 0; 02990 lowlen--; 02991 } 02992 } 02993 02994 return 1; 02995 } 02996 02997 static UChar* 02998 slow_search_ic(OnigEncoding enc, int case_fold_flag, 02999 UChar* target, UChar* target_end, 03000 const UChar* text, const UChar* text_end, UChar* text_range) 03001 { 03002 UChar *s, *end; 03003 03004 end = (UChar* )text_end; 03005 end -= target_end - target - 1; 03006 if (end > text_range) 03007 end = text_range; 03008 03009 s = (UChar* )text; 03010 03011 while (s < end) { 03012 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03013 s, text_end)) 03014 return s; 03015 03016 s += enclen(enc, s, text_end); 03017 } 03018 03019 return (UChar* )NULL; 03020 } 03021 03022 static UChar* 03023 slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, 03024 const UChar* text, const UChar* adjust_text, 03025 const UChar* text_end, const UChar* text_start) 03026 { 03027 UChar *t, *p, *s; 03028 03029 s = (UChar* )text_end; 03030 s -= (target_end - target); 03031 if (s > text_start) 03032 s = (UChar* )text_start; 03033 else 03034 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); 03035 03036 while (s >= text) { 03037 if (*s == *target) { 03038 p = s + 1; 03039 t = target + 1; 03040 while (t < target_end) { 03041 if (*t != *p++) 03042 break; 03043 t++; 03044 } 03045 if (t == target_end) 03046 return s; 03047 } 03048 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); 03049 } 03050 03051 return (UChar* )NULL; 03052 } 03053 03054 static UChar* 03055 slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, 03056 UChar* target, UChar* target_end, 03057 const UChar* text, const UChar* adjust_text, 03058 const UChar* text_end, const UChar* text_start) 03059 { 03060 UChar *s; 03061 03062 s = (UChar* )text_end; 03063 s -= (target_end - target); 03064 if (s > text_start) 03065 s = (UChar* )text_start; 03066 else 03067 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); 03068 03069 while (s >= text) { 03070 if (str_lower_case_match(enc, case_fold_flag, 03071 target, target_end, s, text_end)) 03072 return s; 03073 03074 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); 03075 } 03076 03077 return (UChar* )NULL; 03078 } 03079 03080 #ifndef USE_SUNDAY_QUICK_SEARCH 03081 /* Boyer-Moore-Horspool search applied to a multibyte string */ 03082 static UChar* 03083 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, 03084 const UChar* text, const UChar* text_end, 03085 const UChar* text_range) 03086 { 03087 const UChar *s, *se, *t, *p, *end; 03088 const UChar *tail; 03089 ptrdiff_t skip, tlen1; 03090 03091 #ifdef ONIG_DEBUG_SEARCH 03092 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", 03093 text, text, text_end, text_end, text_range, text_range); 03094 #endif 03095 03096 tail = target_end - 1; 03097 tlen1 = tail - target; 03098 end = text_range; 03099 if (end + tlen1 > text_end) 03100 end = text_end - tlen1; 03101 03102 s = text; 03103 03104 if (IS_NULL(reg->int_map)) { 03105 while (s < end) { 03106 p = se = s + tlen1; 03107 t = tail; 03108 while (*p == *t) { 03109 if (t == target) return (UChar* )s; 03110 p--; t--; 03111 } 03112 skip = reg->map[*se]; 03113 t = s; 03114 do { 03115 s += enclen(reg->enc, s, end); 03116 } while ((s - t) < skip && s < end); 03117 } 03118 } 03119 else { 03120 while (s < end) { 03121 p = se = s + tlen1; 03122 t = tail; 03123 while (*p == *t) { 03124 if (t == target) return (UChar* )s; 03125 p--; t--; 03126 } 03127 skip = reg->int_map[*se]; 03128 t = s; 03129 do { 03130 s += enclen(reg->enc, s, end); 03131 } while ((s - t) < skip && s < end); 03132 } 03133 } 03134 03135 return (UChar* )NULL; 03136 } 03137 03138 /* Boyer-Moore-Horspool search */ 03139 static UChar* 03140 bm_search(regex_t* reg, const UChar* target, const UChar* target_end, 03141 const UChar* text, const UChar* text_end, const UChar* text_range) 03142 { 03143 const UChar *s, *t, *p, *end; 03144 const UChar *tail; 03145 03146 #ifdef ONIG_DEBUG_SEARCH 03147 fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n", 03148 text, text_end, text_range); 03149 #endif 03150 03151 end = text_range + (target_end - target) - 1; 03152 if (end > text_end) 03153 end = text_end; 03154 03155 tail = target_end - 1; 03156 s = text + (target_end - target) - 1; 03157 if (IS_NULL(reg->int_map)) { 03158 while (s < end) { 03159 p = s; 03160 t = tail; 03161 #ifdef ONIG_DEBUG_SEARCH 03162 fprintf(stderr, "bm_search_loop: pos: %d %s\n", 03163 (int)(s - text), s); 03164 #endif 03165 while (*p == *t) { 03166 if (t == target) return (UChar* )p; 03167 p--; t--; 03168 } 03169 s += reg->map[*s]; 03170 } 03171 } 03172 else { /* see int_map[] */ 03173 while (s < end) { 03174 p = s; 03175 t = tail; 03176 while (*p == *t) { 03177 if (t == target) return (UChar* )p; 03178 p--; t--; 03179 } 03180 s += reg->int_map[*s]; 03181 } 03182 } 03183 return (UChar* )NULL; 03184 } 03185 03186 /* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */ 03187 static UChar* 03188 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, 03189 const UChar* text, const UChar* text_end, 03190 const UChar* text_range) 03191 { 03192 const UChar *s, *se, *t, *end; 03193 const UChar *tail; 03194 ptrdiff_t skip, tlen1; 03195 OnigEncoding enc = reg->enc; 03196 int case_fold_flag = reg->case_fold_flag; 03197 03198 #ifdef ONIG_DEBUG_SEARCH 03199 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", 03200 (int )text, text, (int )text_end, text_end, (int )text_range, text_range); 03201 #endif 03202 03203 tail = target_end - 1; 03204 tlen1 = tail - target; 03205 end = text_range; 03206 if (end + tlen1 > text_end) 03207 end = text_end - tlen1; 03208 03209 s = text; 03210 03211 if (IS_NULL(reg->int_map)) { 03212 while (s < end) { 03213 se = s + tlen1; 03214 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03215 s, se + 1)) 03216 return (UChar* )s; 03217 skip = reg->map[*se]; 03218 t = s; 03219 do { 03220 s += enclen(reg->enc, s, end); 03221 } while ((s - t) < skip && s < end); 03222 } 03223 } 03224 else { 03225 while (s < end) { 03226 se = s + tlen1; 03227 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03228 s, se + 1)) 03229 return (UChar* )s; 03230 skip = reg->int_map[*se]; 03231 t = s; 03232 do { 03233 s += enclen(reg->enc, s, end); 03234 } while ((s - t) < skip && s < end); 03235 } 03236 } 03237 03238 return (UChar* )NULL; 03239 } 03240 03241 /* Boyer-Moore-Horspool search (ignore case) */ 03242 static UChar* 03243 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, 03244 const UChar* text, const UChar* text_end, const UChar* text_range) 03245 { 03246 const UChar *s, *p, *end; 03247 const UChar *tail; 03248 OnigEncoding enc = reg->enc; 03249 int case_fold_flag = reg->case_fold_flag; 03250 03251 #ifdef ONIG_DEBUG_SEARCH 03252 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", 03253 (int )text, text, (int )text_end, text_end, (int )text_range, text_range); 03254 #endif 03255 03256 end = text_range + (target_end - target) - 1; 03257 if (end > text_end) 03258 end = text_end; 03259 03260 tail = target_end - 1; 03261 s = text + (target_end - target) - 1; 03262 if (IS_NULL(reg->int_map)) { 03263 while (s < end) { 03264 p = s - (target_end - target) + 1; 03265 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03266 p, s + 1)) 03267 return (UChar* )p; 03268 s += reg->map[*s]; 03269 } 03270 } 03271 else { /* see int_map[] */ 03272 while (s < end) { 03273 p = s - (target_end - target) + 1; 03274 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03275 p, s + 1)) 03276 return (UChar* )p; 03277 s += reg->int_map[*s]; 03278 } 03279 } 03280 return (UChar* )NULL; 03281 } 03282 03283 #else /* USE_SUNDAY_QUICK_SEARCH */ 03284 03285 /* Sunday's quick search applied to a multibyte string */ 03286 static UChar* 03287 bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, 03288 const UChar* text, const UChar* text_end, 03289 const UChar* text_range) 03290 { 03291 const UChar *s, *se, *t, *p, *end; 03292 const UChar *tail; 03293 ptrdiff_t skip, tlen1; 03294 OnigEncoding enc = reg->enc; 03295 03296 #ifdef ONIG_DEBUG_SEARCH 03297 fprintf(stderr, "bm_search_notrev: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", 03298 (int )text, text, (int )text_end, text_end, (int )text_range, text_range); 03299 #endif 03300 03301 tail = target_end - 1; 03302 tlen1 = tail - target; 03303 end = text_range; 03304 if (end + tlen1 > text_end) 03305 end = text_end - tlen1; 03306 03307 s = text; 03308 03309 if (IS_NULL(reg->int_map)) { 03310 while (s < end) { 03311 p = se = s + tlen1; 03312 t = tail; 03313 while (*p == *t) { 03314 if (t == target) return (UChar* )s; 03315 p--; t--; 03316 } 03317 if (s + 1 >= end) break; 03318 skip = reg->map[se[1]]; 03319 t = s; 03320 do { 03321 s += enclen(enc, s, end); 03322 } while ((s - t) < skip && s < end); 03323 } 03324 } 03325 else { 03326 while (s < end) { 03327 p = se = s + tlen1; 03328 t = tail; 03329 while (*p == *t) { 03330 if (t == target) return (UChar* )s; 03331 p--; t--; 03332 } 03333 if (s + 1 >= end) break; 03334 skip = reg->int_map[se[1]]; 03335 t = s; 03336 do { 03337 s += enclen(enc, s, end); 03338 } while ((s - t) < skip && s < end); 03339 } 03340 } 03341 03342 return (UChar* )NULL; 03343 } 03344 03345 /* Sunday's quick search */ 03346 static UChar* 03347 bm_search(regex_t* reg, const UChar* target, const UChar* target_end, 03348 const UChar* text, const UChar* text_end, const UChar* text_range) 03349 { 03350 const UChar *s, *t, *p, *end; 03351 const UChar *tail; 03352 ptrdiff_t tlen1; 03353 03354 tail = target_end - 1; 03355 tlen1 = tail - target; 03356 end = text_range + tlen1; 03357 if (end > text_end) 03358 end = text_end; 03359 03360 s = text + tlen1; 03361 if (IS_NULL(reg->int_map)) { 03362 while (s < end) { 03363 p = s; 03364 t = tail; 03365 while (*p == *t) { 03366 if (t == target) return (UChar* )p; 03367 p--; t--; 03368 } 03369 if (s + 1 >= end) break; 03370 s += reg->map[s[1]]; 03371 } 03372 } 03373 else { /* see int_map[] */ 03374 while (s < end) { 03375 p = s; 03376 t = tail; 03377 while (*p == *t) { 03378 if (t == target) return (UChar* )p; 03379 p--; t--; 03380 } 03381 if (s + 1 >= end) break; 03382 s += reg->int_map[s[1]]; 03383 } 03384 } 03385 return (UChar* )NULL; 03386 } 03387 03388 /* Sunday's quick search applied to a multibyte string (ignore case) */ 03389 static UChar* 03390 bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, 03391 const UChar* text, const UChar* text_end, 03392 const UChar* text_range) 03393 { 03394 const UChar *s, *se, *t, *end; 03395 const UChar *tail; 03396 ptrdiff_t skip, tlen1; 03397 OnigEncoding enc = reg->enc; 03398 int case_fold_flag = reg->case_fold_flag; 03399 03400 #ifdef ONIG_DEBUG_SEARCH 03401 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", 03402 (int )text, text, (int )text_end, text_end, (int )text_range, text_range); 03403 #endif 03404 03405 tail = target_end - 1; 03406 tlen1 = tail - target; 03407 end = text_range; 03408 if (end + tlen1 > text_end) 03409 end = text_end - tlen1; 03410 03411 s = text; 03412 03413 if (IS_NULL(reg->int_map)) { 03414 while (s < end) { 03415 se = s + tlen1; 03416 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03417 s, se + 1)) 03418 return (UChar* )s; 03419 if (s + 1 >= end) break; 03420 skip = reg->map[se[1]]; 03421 t = s; 03422 do { 03423 s += enclen(enc, s, end); 03424 } while ((s - t) < skip && s < end); 03425 } 03426 } 03427 else { 03428 while (s < end) { 03429 se = s + tlen1; 03430 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03431 s, se + 1)) 03432 return (UChar* )s; 03433 if (s + 1 >= end) break; 03434 skip = reg->int_map[se[1]]; 03435 t = s; 03436 do { 03437 s += enclen(enc, s, end); 03438 } while ((s - t) < skip && s < end); 03439 } 03440 } 03441 03442 return (UChar* )NULL; 03443 } 03444 03445 /* Sunday's quick search (ignore case) */ 03446 static UChar* 03447 bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, 03448 const UChar* text, const UChar* text_end, const UChar* text_range) 03449 { 03450 const UChar *s, *p, *end; 03451 const UChar *tail; 03452 ptrdiff_t tlen1; 03453 OnigEncoding enc = reg->enc; 03454 int case_fold_flag = reg->case_fold_flag; 03455 03456 #ifdef ONIG_DEBUG_SEARCH 03457 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n", 03458 (int )text, text, (int )text_end, text_end, (int )text_range, text_range); 03459 #endif 03460 03461 tail = target_end - 1; 03462 tlen1 = tail - target; 03463 end = text_range + tlen1; 03464 if (end > text_end) 03465 end = text_end; 03466 03467 s = text + tlen1; 03468 if (IS_NULL(reg->int_map)) { 03469 while (s < end) { 03470 p = s - tlen1; 03471 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03472 p, s + 1)) 03473 return (UChar* )p; 03474 if (s + 1 >= end) break; 03475 s += reg->map[s[1]]; 03476 } 03477 } 03478 else { /* see int_map[] */ 03479 while (s < end) { 03480 p = s - tlen1; 03481 if (str_lower_case_match(enc, case_fold_flag, target, target_end, 03482 p, s + 1)) 03483 return (UChar* )p; 03484 if (s + 1 >= end) break; 03485 s += reg->int_map[s[1]]; 03486 } 03487 } 03488 return (UChar* )NULL; 03489 } 03490 #endif /* USE_SUNDAY_QUICK_SEARCH */ 03491 03492 static int 03493 set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, 03494 int** skip) 03495 { 03496 int i, len; 03497 03498 if (IS_NULL(*skip)) { 03499 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); 03500 if (IS_NULL(*skip)) return ONIGERR_MEMORY; 03501 } 03502 03503 len = (int )(end - s); 03504 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) 03505 (*skip)[i] = len; 03506 03507 for (i = len - 1; i > 0; i--) 03508 (*skip)[s[i]] = i; 03509 03510 return 0; 03511 } 03512 03513 static UChar* 03514 bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, 03515 const UChar* text, const UChar* adjust_text, 03516 const UChar* text_end, const UChar* text_start) 03517 { 03518 const UChar *s, *t, *p; 03519 03520 s = text_end - (target_end - target); 03521 if (text_start < s) 03522 s = text_start; 03523 else 03524 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); 03525 03526 while (s >= text) { 03527 p = s; 03528 t = target; 03529 while (t < target_end && *p == *t) { 03530 p++; t++; 03531 } 03532 if (t == target_end) 03533 return (UChar* )s; 03534 03535 s -= reg->int_map_backward[*s]; 03536 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); 03537 } 03538 03539 return (UChar* )NULL; 03540 } 03541 03542 static UChar* 03543 map_search(OnigEncoding enc, UChar map[], 03544 const UChar* text, const UChar* text_range, const UChar* text_end) 03545 { 03546 const UChar *s = text; 03547 03548 while (s < text_range) { 03549 if (map[*s]) return (UChar* )s; 03550 03551 s += enclen(enc, s, text_end); 03552 } 03553 return (UChar* )NULL; 03554 } 03555 03556 static UChar* 03557 map_search_backward(OnigEncoding enc, UChar map[], 03558 const UChar* text, const UChar* adjust_text, 03559 const UChar* text_start, const UChar* text_end) 03560 { 03561 const UChar *s = text_start; 03562 03563 while (s >= text) { 03564 if (map[*s]) return (UChar* )s; 03565 03566 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end); 03567 } 03568 return (UChar* )NULL; 03569 } 03570 03571 extern OnigPosition 03572 onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, 03573 OnigOptionType option) 03574 { 03575 ptrdiff_t r; 03576 UChar *prev; 03577 OnigMatchArg msa; 03578 03579 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) 03580 start: 03581 THREAD_ATOMIC_START; 03582 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { 03583 ONIG_STATE_INC(reg); 03584 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 03585 onig_chain_reduce(reg); 03586 ONIG_STATE_INC(reg); 03587 } 03588 } 03589 else { 03590 int n; 03591 03592 THREAD_ATOMIC_END; 03593 n = 0; 03594 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { 03595 if (++n > THREAD_PASS_LIMIT_COUNT) 03596 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; 03597 THREAD_PASS; 03598 } 03599 goto start; 03600 } 03601 THREAD_ATOMIC_END; 03602 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ 03603 03604 MATCH_ARG_INIT(msa, option, region, at, at); 03605 #ifdef USE_COMBINATION_EXPLOSION_CHECK 03606 { 03607 int offset = at - str; 03608 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); 03609 } 03610 #endif 03611 03612 if (region 03613 #ifdef USE_POSIX_API_REGION_OPTION 03614 && !IS_POSIX_REGION(option) 03615 #endif 03616 ) { 03617 r = onig_region_resize_clear(region, reg->num_mem + 1); 03618 } 03619 else 03620 r = 0; 03621 03622 if (r == 0) { 03623 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); 03624 r = match_at(reg, str, end, 03625 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 03626 end, 03627 #endif 03628 at, prev, &msa); 03629 } 03630 03631 MATCH_ARG_FREE(msa); 03632 ONIG_STATE_DEC_THREAD(reg); 03633 return r; 03634 } 03635 03636 static int 03637 forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, 03638 UChar* range, UChar** low, UChar** high, UChar** low_prev) 03639 { 03640 UChar *p, *pprev = (UChar* )NULL; 03641 03642 #ifdef ONIG_DEBUG_SEARCH 03643 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n", 03644 str, str, end, end, s, s, range, range); 03645 #endif 03646 03647 p = s; 03648 if (reg->dmin > 0) { 03649 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { 03650 p += reg->dmin; 03651 } 03652 else { 03653 UChar *q = p + reg->dmin; 03654 while (p < q) p += enclen(reg->enc, p, end); 03655 } 03656 } 03657 03658 retry: 03659 switch (reg->optimize) { 03660 case ONIG_OPTIMIZE_EXACT: 03661 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); 03662 break; 03663 case ONIG_OPTIMIZE_EXACT_IC: 03664 p = slow_search_ic(reg->enc, reg->case_fold_flag, 03665 reg->exact, reg->exact_end, p, end, range); 03666 break; 03667 03668 case ONIG_OPTIMIZE_EXACT_BM: 03669 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); 03670 break; 03671 03672 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: 03673 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); 03674 break; 03675 03676 case ONIG_OPTIMIZE_EXACT_BM_IC: 03677 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range); 03678 break; 03679 03680 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: 03681 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range); 03682 break; 03683 03684 case ONIG_OPTIMIZE_MAP: 03685 p = map_search(reg->enc, reg->map, p, range, end); 03686 break; 03687 } 03688 03689 if (p && p < range) { 03690 if (p - reg->dmin < s) { 03691 retry_gate: 03692 pprev = p; 03693 p += enclen(reg->enc, p, end); 03694 goto retry; 03695 } 03696 03697 if (reg->sub_anchor) { 03698 UChar* prev; 03699 03700 switch (reg->sub_anchor) { 03701 case ANCHOR_BEGIN_LINE: 03702 if (!ON_STR_BEGIN(p)) { 03703 prev = onigenc_get_prev_char_head(reg->enc, 03704 (pprev ? pprev : str), p, end); 03705 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) 03706 goto retry_gate; 03707 } 03708 break; 03709 03710 case ANCHOR_END_LINE: 03711 if (ON_STR_END(p)) { 03712 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 03713 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, 03714 (pprev ? pprev : str), p); 03715 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) 03716 goto retry_gate; 03717 #endif 03718 } 03719 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) 03720 goto retry_gate; 03721 break; 03722 } 03723 } 03724 03725 if (reg->dmax == 0) { 03726 *low = p; 03727 if (low_prev) { 03728 if (*low > s) 03729 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); 03730 else 03731 *low_prev = onigenc_get_prev_char_head(reg->enc, 03732 (pprev ? pprev : str), p, end); 03733 } 03734 } 03735 else { 03736 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 03737 *low = p - reg->dmax; 03738 if (*low > s) { 03739 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, 03740 *low, end, (const UChar** )low_prev); 03741 if (low_prev && IS_NULL(*low_prev)) 03742 *low_prev = onigenc_get_prev_char_head(reg->enc, 03743 (pprev ? pprev : s), *low, end); 03744 } 03745 else { 03746 if (low_prev) 03747 *low_prev = onigenc_get_prev_char_head(reg->enc, 03748 (pprev ? pprev : str), *low, end); 03749 } 03750 } 03751 } 03752 /* no needs to adjust *high, *high is used as range check only */ 03753 *high = p - reg->dmin; 03754 03755 #ifdef ONIG_DEBUG_SEARCH 03756 fprintf(stderr, 03757 "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", 03758 (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); 03759 #endif 03760 return 1; /* success */ 03761 } 03762 03763 return 0; /* fail */ 03764 } 03765 03766 static int set_bm_backward_skip P_((UChar* s, UChar* end, OnigEncoding enc, 03767 int** skip)); 03768 03769 #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 03770 03771 static int 03772 backward_search_range(regex_t* reg, const UChar* str, const UChar* end, 03773 UChar* s, const UChar* range, UChar* adjrange, 03774 UChar** low, UChar** high) 03775 { 03776 int r; 03777 UChar *p; 03778 03779 range += reg->dmin; 03780 p = s; 03781 03782 retry: 03783 switch (reg->optimize) { 03784 case ONIG_OPTIMIZE_EXACT: 03785 exact_method: 03786 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, 03787 range, adjrange, end, p); 03788 break; 03789 03790 case ONIG_OPTIMIZE_EXACT_IC: 03791 case ONIG_OPTIMIZE_EXACT_BM_IC: 03792 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: 03793 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, 03794 reg->exact, reg->exact_end, 03795 range, adjrange, end, p); 03796 break; 03797 03798 case ONIG_OPTIMIZE_EXACT_BM: 03799 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: 03800 if (IS_NULL(reg->int_map_backward)) { 03801 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) 03802 goto exact_method; 03803 03804 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, 03805 &(reg->int_map_backward)); 03806 if (r) return r; 03807 } 03808 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, 03809 end, p); 03810 break; 03811 03812 case ONIG_OPTIMIZE_MAP: 03813 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end); 03814 break; 03815 } 03816 03817 if (p) { 03818 if (reg->sub_anchor) { 03819 UChar* prev; 03820 03821 switch (reg->sub_anchor) { 03822 case ANCHOR_BEGIN_LINE: 03823 if (!ON_STR_BEGIN(p)) { 03824 prev = onigenc_get_prev_char_head(reg->enc, str, p, end); 03825 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) { 03826 p = prev; 03827 goto retry; 03828 } 03829 } 03830 break; 03831 03832 case ANCHOR_END_LINE: 03833 if (ON_STR_END(p)) { 03834 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE 03835 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); 03836 if (IS_NULL(prev)) goto fail; 03837 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) { 03838 p = prev; 03839 goto retry; 03840 } 03841 #endif 03842 } 03843 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) { 03844 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); 03845 if (IS_NULL(p)) goto fail; 03846 goto retry; 03847 } 03848 break; 03849 } 03850 } 03851 03852 /* no needs to adjust *high, *high is used as range check only */ 03853 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 03854 *low = p - reg->dmax; 03855 *high = p - reg->dmin; 03856 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); 03857 } 03858 03859 #ifdef ONIG_DEBUG_SEARCH 03860 fprintf(stderr, "backward_search_range: low: %d, high: %d\n", 03861 (int )(*low - str), (int )(*high - str)); 03862 #endif 03863 return 1; /* success */ 03864 } 03865 03866 fail: 03867 #ifdef ONIG_DEBUG_SEARCH 03868 fprintf(stderr, "backward_search_range: fail.\n"); 03869 #endif 03870 return 0; /* fail */ 03871 } 03872 03873 03874 extern OnigPosition 03875 onig_search(regex_t* reg, const UChar* str, const UChar* end, 03876 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) 03877 { 03878 return onig_search_gpos(reg, str, end, start, start, range, region, option); 03879 } 03880 03881 extern OnigPosition 03882 onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, 03883 const UChar* global_pos, 03884 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) 03885 { 03886 ptrdiff_t r; 03887 UChar *s, *prev; 03888 OnigMatchArg msa; 03889 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 03890 const UChar *orig_start = start; 03891 const UChar *orig_range = range; 03892 #endif 03893 03894 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) 03895 start: 03896 THREAD_ATOMIC_START; 03897 if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { 03898 ONIG_STATE_INC(reg); 03899 if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { 03900 onig_chain_reduce(reg); 03901 ONIG_STATE_INC(reg); 03902 } 03903 } 03904 else { 03905 int n; 03906 03907 THREAD_ATOMIC_END; 03908 n = 0; 03909 while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { 03910 if (++n > THREAD_PASS_LIMIT_COUNT) 03911 return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; 03912 THREAD_PASS; 03913 } 03914 goto start; 03915 } 03916 THREAD_ATOMIC_END; 03917 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ 03918 03919 #ifdef ONIG_DEBUG_SEARCH 03920 fprintf(stderr, 03921 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n", 03922 str, str, end - str, start - str, range - str); 03923 #endif 03924 03925 if (region 03926 #ifdef USE_POSIX_API_REGION_OPTION 03927 && !IS_POSIX_REGION(option) 03928 #endif 03929 ) { 03930 r = onig_region_resize_clear(region, reg->num_mem + 1); 03931 if (r) goto finish_no_msa; 03932 } 03933 03934 if (start > end || start < str) goto mismatch_no_msa; 03935 03936 03937 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 03938 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 03939 #define MATCH_AND_RETURN_CHECK(upper_range) \ 03940 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ 03941 if (r != ONIG_MISMATCH) {\ 03942 if (r >= 0) {\ 03943 if (! IS_FIND_LONGEST(reg->options)) {\ 03944 goto match;\ 03945 }\ 03946 }\ 03947 else goto finish; /* error */ \ 03948 } 03949 #else 03950 #define MATCH_AND_RETURN_CHECK(upper_range) \ 03951 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ 03952 if (r != ONIG_MISMATCH) {\ 03953 if (r >= 0) {\ 03954 goto match;\ 03955 }\ 03956 else goto finish; /* error */ \ 03957 } 03958 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ 03959 #else 03960 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 03961 #define MATCH_AND_RETURN_CHECK(none) \ 03962 r = match_at(reg, str, end, s, prev, &msa);\ 03963 if (r != ONIG_MISMATCH) {\ 03964 if (r >= 0) {\ 03965 if (! IS_FIND_LONGEST(reg->options)) {\ 03966 goto match;\ 03967 }\ 03968 }\ 03969 else goto finish; /* error */ \ 03970 } 03971 #else 03972 #define MATCH_AND_RETURN_CHECK(none) \ 03973 r = match_at(reg, str, end, s, prev, &msa);\ 03974 if (r != ONIG_MISMATCH) {\ 03975 if (r >= 0) {\ 03976 goto match;\ 03977 }\ 03978 else goto finish; /* error */ \ 03979 } 03980 #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ 03981 #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ 03982 03983 03984 /* anchor optimize: resume search range */ 03985 if (reg->anchor != 0 && str < end) { 03986 UChar *min_semi_end, *max_semi_end; 03987 03988 if (reg->anchor & ANCHOR_BEGIN_POSITION) { 03989 /* search start-position only */ 03990 begin_position: 03991 if (range > start) 03992 range = start + 1; 03993 else 03994 range = start; 03995 } 03996 else if (reg->anchor & ANCHOR_BEGIN_BUF) { 03997 /* search str-position only */ 03998 if (range > start) { 03999 if (start != str) goto mismatch_no_msa; 04000 range = str + 1; 04001 } 04002 else { 04003 if (range <= str) { 04004 start = str; 04005 range = str; 04006 } 04007 else 04008 goto mismatch_no_msa; 04009 } 04010 } 04011 else if (reg->anchor & ANCHOR_END_BUF) { 04012 min_semi_end = max_semi_end = (UChar* )end; 04013 04014 end_buf: 04015 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) 04016 goto mismatch_no_msa; 04017 04018 if (range > start) { 04019 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { 04020 start = min_semi_end - reg->anchor_dmax; 04021 if (start < end) 04022 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); 04023 } 04024 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { 04025 range = max_semi_end - reg->anchor_dmin + 1; 04026 } 04027 04028 if (start > range) goto mismatch_no_msa; 04029 /* If start == range, match with empty at end. 04030 Backward search is used. */ 04031 } 04032 else { 04033 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { 04034 range = min_semi_end - reg->anchor_dmax; 04035 } 04036 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { 04037 start = max_semi_end - reg->anchor_dmin; 04038 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); 04039 } 04040 if (range > start) goto mismatch_no_msa; 04041 } 04042 } 04043 else if (reg->anchor & ANCHOR_SEMI_END_BUF) { 04044 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1); 04045 04046 max_semi_end = (UChar* )end; 04047 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { 04048 min_semi_end = pre_end; 04049 04050 #ifdef USE_CRNL_AS_LINE_TERMINATOR 04051 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); 04052 if (IS_NOT_NULL(pre_end) && 04053 IS_NEWLINE_CRLF(reg->options) && 04054 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { 04055 min_semi_end = pre_end; 04056 } 04057 #endif 04058 if (min_semi_end > str && start <= min_semi_end) { 04059 goto end_buf; 04060 } 04061 } 04062 else { 04063 min_semi_end = (UChar* )end; 04064 goto end_buf; 04065 } 04066 } 04067 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { 04068 if (! (reg->anchor & ANCHOR_LOOK_BEHIND)) { 04069 goto begin_position; 04070 } 04071 } 04072 } 04073 else if (str == end) { /* empty string */ 04074 static const UChar address_for_empty_string[] = ""; 04075 04076 #ifdef ONIG_DEBUG_SEARCH 04077 fprintf(stderr, "onig_search: empty string.\n"); 04078 #endif 04079 04080 if (reg->threshold_len == 0) { 04081 start = end = str = address_for_empty_string; 04082 s = (UChar* )start; 04083 prev = (UChar* )NULL; 04084 04085 MATCH_ARG_INIT(msa, option, region, start, start); 04086 #ifdef USE_COMBINATION_EXPLOSION_CHECK 04087 msa.state_check_buff = (void* )0; 04088 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ 04089 #endif 04090 MATCH_AND_RETURN_CHECK(end); 04091 goto mismatch; 04092 } 04093 goto mismatch_no_msa; 04094 } 04095 04096 #ifdef ONIG_DEBUG_SEARCH 04097 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", 04098 (int )(end - str), (int )(start - str), (int )(range - str)); 04099 #endif 04100 04101 MATCH_ARG_INIT(msa, option, region, start, global_pos); 04102 #ifdef USE_COMBINATION_EXPLOSION_CHECK 04103 { 04104 int offset = (MIN(start, range) - str); 04105 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); 04106 } 04107 #endif 04108 04109 s = (UChar* )start; 04110 if (range > start) { /* forward search */ 04111 if (s > str) 04112 prev = onigenc_get_prev_char_head(reg->enc, str, s, end); 04113 else 04114 prev = (UChar* )NULL; 04115 04116 if (reg->optimize != ONIG_OPTIMIZE_NONE) { 04117 UChar *sch_range, *low, *high, *low_prev; 04118 04119 sch_range = (UChar* )range; 04120 if (reg->dmax != 0) { 04121 if (reg->dmax == ONIG_INFINITE_DISTANCE) 04122 sch_range = (UChar* )end; 04123 else { 04124 sch_range += reg->dmax; 04125 if (sch_range > end) sch_range = (UChar* )end; 04126 } 04127 } 04128 04129 if ((end - start) < reg->threshold_len) 04130 goto mismatch; 04131 04132 if (reg->dmax != ONIG_INFINITE_DISTANCE) { 04133 do { 04134 if (! forward_search_range(reg, str, end, s, sch_range, 04135 &low, &high, &low_prev)) goto mismatch; 04136 if (s < low) { 04137 s = low; 04138 prev = low_prev; 04139 } 04140 while (s <= high) { 04141 MATCH_AND_RETURN_CHECK(orig_range); 04142 prev = s; 04143 s += enclen(reg->enc, s, end); 04144 } 04145 } while (s < range); 04146 goto mismatch; 04147 } 04148 else { /* check only. */ 04149 if (! forward_search_range(reg, str, end, s, sch_range, 04150 &low, &high, (UChar** )NULL)) goto mismatch; 04151 04152 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { 04153 do { 04154 if ((reg->anchor & ANCHOR_BEGIN_POSITION) == 0) 04155 msa.gpos = s; /* move \G position */ 04156 MATCH_AND_RETURN_CHECK(orig_range); 04157 prev = s; 04158 s += enclen(reg->enc, s, end); 04159 04160 if ((reg->anchor & ANCHOR_LOOK_BEHIND) == 0) { 04161 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) 04162 && s < range) { 04163 prev = s; 04164 s += enclen(reg->enc, s, end); 04165 } 04166 } 04167 } while (s < range); 04168 goto mismatch; 04169 } 04170 } 04171 } 04172 04173 do { 04174 MATCH_AND_RETURN_CHECK(orig_range); 04175 prev = s; 04176 s += enclen(reg->enc, s, end); 04177 } while (s < range); 04178 04179 if (s == range) { /* because empty match with /$/. */ 04180 MATCH_AND_RETURN_CHECK(orig_range); 04181 } 04182 } 04183 else { /* backward search */ 04184 #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE 04185 if (orig_start < end) 04186 orig_start += enclen(reg->enc, orig_start, end); /* is upper range */ 04187 #endif 04188 04189 if (reg->optimize != ONIG_OPTIMIZE_NONE) { 04190 UChar *low, *high, *adjrange, *sch_start; 04191 04192 if (range < end) 04193 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); 04194 else 04195 adjrange = (UChar* )end; 04196 04197 if (reg->dmax != ONIG_INFINITE_DISTANCE && 04198 (end - range) >= reg->threshold_len) { 04199 do { 04200 sch_start = s + reg->dmax; 04201 if (sch_start > end) sch_start = (UChar* )end; 04202 if (backward_search_range(reg, str, end, sch_start, range, adjrange, 04203 &low, &high) <= 0) 04204 goto mismatch; 04205 04206 if (s > high) 04207 s = high; 04208 04209 while (s >= low) { 04210 prev = onigenc_get_prev_char_head(reg->enc, str, s, end); 04211 MATCH_AND_RETURN_CHECK(orig_start); 04212 s = prev; 04213 } 04214 } while (s >= range); 04215 goto mismatch; 04216 } 04217 else { /* check only. */ 04218 if ((end - range) < reg->threshold_len) goto mismatch; 04219 04220 sch_start = s; 04221 if (reg->dmax != 0) { 04222 if (reg->dmax == ONIG_INFINITE_DISTANCE) 04223 sch_start = (UChar* )end; 04224 else { 04225 sch_start += reg->dmax; 04226 if (sch_start > end) sch_start = (UChar* )end; 04227 else 04228 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, 04229 start, sch_start, end); 04230 } 04231 } 04232 if (backward_search_range(reg, str, end, sch_start, range, adjrange, 04233 &low, &high) <= 0) goto mismatch; 04234 } 04235 } 04236 04237 do { 04238 prev = onigenc_get_prev_char_head(reg->enc, str, s, end); 04239 MATCH_AND_RETURN_CHECK(orig_start); 04240 s = prev; 04241 } while (s >= range); 04242 } 04243 04244 mismatch: 04245 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 04246 if (IS_FIND_LONGEST(reg->options)) { 04247 if (msa.best_len >= 0) { 04248 s = msa.best_s; 04249 goto match; 04250 } 04251 } 04252 #endif 04253 r = ONIG_MISMATCH; 04254 04255 finish: 04256 MATCH_ARG_FREE(msa); 04257 ONIG_STATE_DEC_THREAD(reg); 04258 04259 /* If result is mismatch and no FIND_NOT_EMPTY option, 04260 then the region is not set in match_at(). */ 04261 if (IS_FIND_NOT_EMPTY(reg->options) && region 04262 #ifdef USE_POSIX_API_REGION_OPTION 04263 && !IS_POSIX_REGION(option) 04264 #endif 04265 ) { 04266 onig_region_clear(region); 04267 } 04268 04269 #ifdef ONIG_DEBUG 04270 if (r != ONIG_MISMATCH) 04271 fprintf(stderr, "onig_search: error %d\n", r); 04272 #endif 04273 return r; 04274 04275 mismatch_no_msa: 04276 r = ONIG_MISMATCH; 04277 finish_no_msa: 04278 ONIG_STATE_DEC_THREAD(reg); 04279 #ifdef ONIG_DEBUG 04280 if (r != ONIG_MISMATCH) 04281 fprintf(stderr, "onig_search: error %d\n", r); 04282 #endif 04283 return r; 04284 04285 match: 04286 ONIG_STATE_DEC_THREAD(reg); 04287 MATCH_ARG_FREE(msa); 04288 return s - str; 04289 } 04290 04291 extern OnigEncoding 04292 onig_get_encoding(regex_t* reg) 04293 { 04294 return reg->enc; 04295 } 04296 04297 extern OnigOptionType 04298 onig_get_options(regex_t* reg) 04299 { 04300 return reg->options; 04301 } 04302 04303 extern OnigCaseFoldType 04304 onig_get_case_fold_flag(regex_t* reg) 04305 { 04306 return reg->case_fold_flag; 04307 } 04308 04309 extern const OnigSyntaxType* 04310 onig_get_syntax(regex_t* reg) 04311 { 04312 return reg->syntax; 04313 } 04314 04315 extern int 04316 onig_number_of_captures(regex_t* reg) 04317 { 04318 return reg->num_mem; 04319 } 04320 04321 extern int 04322 onig_number_of_capture_histories(regex_t* reg) 04323 { 04324 #ifdef USE_CAPTURE_HISTORY 04325 int i, n; 04326 04327 n = 0; 04328 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { 04329 if (BIT_STATUS_AT(reg->capture_history, i) != 0) 04330 n++; 04331 } 04332 return n; 04333 #else 04334 return 0; 04335 #endif 04336 } 04337 04338 extern void 04339 onig_copy_encoding(OnigEncoding to, OnigEncoding from) 04340 { 04341 *to = *from; 04342 } 04343 04344