Ruby  2.0.0p247(2013-06-27revision41674)
regparse.c
Go to the documentation of this file.
00001 /**********************************************************************
00002   regparse.c -  Onigmo (Oniguruma-mod) (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * Copyright (c) 2011-2013  K.Takata  <kentkt AT csc DOT jp>
00007  * All rights reserved.
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  * 1. Redistributions of source code must retain the above copyright
00013  *    notice, this list of conditions and the following disclaimer.
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in the
00016  *    documentation and/or other materials provided with the distribution.
00017  *
00018  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00019  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00022  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00023  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00024  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00025  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00026  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00027  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00028  * SUCH DAMAGE.
00029  */
00030 
00031 #include "regparse.h"
00032 
00033 #define WARN_BUFSIZE    256
00034 
00035 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
00036 
00037 
00038 const OnigSyntaxType OnigSyntaxRuby = {
00039   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
00040      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
00041      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
00042      ONIG_SYN_OP_ESC_C_CONTROL )
00043    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
00044   , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
00045       ONIG_SYN_OP2_OPTION_RUBY |
00046       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
00047       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
00048       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
00049       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
00050       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
00051       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
00052       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
00053       ONIG_SYN_OP2_ESC_H_XDIGIT |
00054       ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
00055       ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
00056       ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK |
00057       ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP )
00058   , ( SYN_GNU_REGEX_BV |
00059       ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
00060       ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
00061       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
00062       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
00063       ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
00064       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
00065       ONIG_SYN_WARN_CC_DUP |
00066       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
00067   , ( ONIG_OPTION_ASCII_RANGE | ONIG_OPTION_POSIX_BRACKET_ALL_RANGE |
00068       ONIG_OPTION_WORD_BOUND_ALL_RANGE )
00069   ,
00070   {
00071       (OnigCodePoint )'\\'                       /* esc */
00072     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
00073     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
00074     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
00075     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
00076     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
00077   }
00078 };
00079 
00080 const OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
00081 
00082 extern void onig_null_warn(const char* s ARG_UNUSED) { }
00083 
00084 #ifdef DEFAULT_WARN_FUNCTION
00085 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
00086 #else
00087 static OnigWarnFunc onig_warn = onig_null_warn;
00088 #endif
00089 
00090 #ifdef DEFAULT_VERB_WARN_FUNCTION
00091 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
00092 #else
00093 static OnigWarnFunc onig_verb_warn = onig_null_warn;
00094 #endif
00095 
00096 extern void onig_set_warn_func(OnigWarnFunc f)
00097 {
00098   onig_warn = f;
00099 }
00100 
00101 extern void onig_set_verb_warn_func(OnigWarnFunc f)
00102 {
00103   onig_verb_warn = f;
00104 }
00105 
00106 static void CC_DUP_WARN(ScanEnv *env);
00107 
00108 static void
00109 bbuf_free(BBuf* bbuf)
00110 {
00111   if (IS_NOT_NULL(bbuf)) {
00112     if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
00113     xfree(bbuf);
00114   }
00115 }
00116 
00117 static int
00118 bbuf_clone(BBuf** rto, BBuf* from)
00119 {
00120   int r;
00121   BBuf *to;
00122 
00123   *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
00124   CHECK_NULL_RETURN_MEMERR(to);
00125   r = BBUF_INIT(to, from->alloc);
00126   if (r != 0) return r;
00127   to->used = from->used;
00128   xmemcpy(to->p, from->p, from->used);
00129   return 0;
00130 }
00131 
00132 #define BACKREF_REL_TO_ABS(rel_no, env) \
00133   ((env)->num_mem + 1 + (rel_no))
00134 
00135 #define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
00136 
00137 #define MBCODE_START_POS(enc) \
00138   (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
00139 
00140 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
00141   add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)
00142 
00143 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
00144   if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
00145     r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
00146     if (r) return r;\
00147   }\
00148 } while (0)
00149 
00150 
00151 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
00152   if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
00153   BS_ROOM(bs, pos) |= BS_BIT(pos); \
00154 } while (0)
00155 
00156 #define BITSET_IS_EMPTY(bs,empty) do {\
00157   int i;\
00158   empty = 1;\
00159   for (i = 0; i < BITSET_SIZE; i++) {\
00160     if ((bs)[i] != 0) {\
00161       empty = 0; break;\
00162     }\
00163   }\
00164 } while (0)
00165 
00166 static void
00167 bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
00168 {
00169   int i;
00170   for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
00171     BITSET_SET_BIT_CHKDUP(bs, i);
00172   }
00173 }
00174 
00175 #if 0
00176 static void
00177 bitset_set_all(BitSetRef bs)
00178 {
00179   int i;
00180   for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
00181 }
00182 #endif
00183 
00184 static void
00185 bitset_invert(BitSetRef bs)
00186 {
00187   int i;
00188   for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
00189 }
00190 
00191 static void
00192 bitset_invert_to(BitSetRef from, BitSetRef to)
00193 {
00194   int i;
00195   for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
00196 }
00197 
00198 static void
00199 bitset_and(BitSetRef dest, BitSetRef bs)
00200 {
00201   int i;
00202   for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
00203 }
00204 
00205 static void
00206 bitset_or(BitSetRef dest, BitSetRef bs)
00207 {
00208   int i;
00209   for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
00210 }
00211 
00212 static void
00213 bitset_copy(BitSetRef dest, BitSetRef bs)
00214 {
00215   int i;
00216   for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
00217 }
00218 
00219 extern int
00220 onig_strncmp(const UChar* s1, const UChar* s2, int n)
00221 {
00222   int x;
00223 
00224   while (n-- > 0) {
00225     x = *s2++ - *s1++;
00226     if (x) return x;
00227   }
00228   return 0;
00229 }
00230 
00231 extern void
00232 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
00233 {
00234   ptrdiff_t len = end - src;
00235   if (len > 0) {
00236     xmemcpy(dest, src, len);
00237     dest[len] = (UChar )0;
00238   }
00239 }
00240 
00241 #ifdef USE_NAMED_GROUP
00242 static UChar*
00243 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
00244 {
00245   ptrdiff_t slen;
00246   int term_len, i;
00247   UChar *r;
00248 
00249   slen = end - s;
00250   term_len = ONIGENC_MBC_MINLEN(enc);
00251 
00252   r = (UChar* )xmalloc(slen + term_len);
00253   CHECK_NULL_RETURN(r);
00254   xmemcpy(r, s, slen);
00255 
00256   for (i = 0; i < term_len; i++)
00257     r[slen + i] = (UChar )0;
00258 
00259   return r;
00260 }
00261 #endif
00262 
00263 /* scan pattern methods */
00264 #define PEND_VALUE   0
00265 
00266 #ifdef __GNUC__
00267 /* get rid of Wunused-but-set-variable and Wuninitialized */
00268 #define PFETCH_READY  UChar* pfetch_prev = NULL; (void)pfetch_prev
00269 #else
00270 #define PFETCH_READY  UChar* pfetch_prev
00271 #endif
00272 #define PEND         (p < end ?  0 : 1)
00273 #define PUNFETCH     p = pfetch_prev
00274 #define PINC       do { \
00275   pfetch_prev = p; \
00276   p += enclen(enc, p, end); \
00277 } while (0)
00278 #define PFETCH(c)  do { \
00279   c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
00280   pfetch_prev = p; \
00281   p += enclen(enc, p, end); \
00282 } while (0)
00283 
00284 #define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
00285 #define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
00286 
00287 static UChar*
00288 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
00289               size_t capa)
00290 {
00291   UChar* r;
00292 
00293   if (dest)
00294     r = (UChar* )xrealloc(dest, capa + 1);
00295   else
00296     r = (UChar* )xmalloc(capa + 1);
00297 
00298   CHECK_NULL_RETURN(r);
00299   onig_strcpy(r + (dest_end - dest), src, src_end);
00300   return r;
00301 }
00302 
00303 /* dest on static area */
00304 static UChar*
00305 strcat_capa_from_static(UChar* dest, UChar* dest_end,
00306                         const UChar* src, const UChar* src_end, size_t capa)
00307 {
00308   UChar* r;
00309 
00310   r = (UChar* )xmalloc(capa + 1);
00311   CHECK_NULL_RETURN(r);
00312   onig_strcpy(r, dest, dest_end);
00313   onig_strcpy(r + (dest_end - dest), src, src_end);
00314   return r;
00315 }
00316 
00317 
00318 #ifdef USE_ST_LIBRARY
00319 
00320 #include "ruby/st.h"
00321 
00322 typedef struct {
00323   const UChar* s;
00324   const UChar* end;
00325 } st_str_end_key;
00326 
00327 static int
00328 str_end_cmp(st_data_t xp, st_data_t yp)
00329 {
00330   const st_str_end_key *x, *y;
00331   const UChar *p, *q;
00332   int c;
00333 
00334   x = (const st_str_end_key *)xp;
00335   y = (const st_str_end_key *)yp;
00336   if ((x->end - x->s) != (y->end - y->s))
00337     return 1;
00338 
00339   p = x->s;
00340   q = y->s;
00341   while (p < x->end) {
00342     c = (int )*p - (int )*q;
00343     if (c != 0) return c;
00344 
00345     p++; q++;
00346   }
00347 
00348   return 0;
00349 }
00350 
00351 static st_index_t
00352 str_end_hash(st_data_t xp)
00353 {
00354   const st_str_end_key *x = (const st_str_end_key *)xp;
00355   const UChar *p;
00356   st_index_t val = 0;
00357 
00358   p = x->s;
00359   while (p < x->end) {
00360     val = val * 997 + (int )*p++;
00361   }
00362 
00363   return val + (val >> 5);
00364 }
00365 
00366 extern hash_table_type*
00367 onig_st_init_strend_table_with_size(st_index_t size)
00368 {
00369   static const struct st_hash_type hashType = {
00370     str_end_cmp,
00371     str_end_hash,
00372   };
00373 
00374   return (hash_table_type* )
00375            onig_st_init_table_with_size(&hashType, size);
00376 }
00377 
00378 extern int
00379 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
00380                       const UChar* end_key, hash_data_type *value)
00381 {
00382   st_str_end_key key;
00383 
00384   key.s   = (UChar* )str_key;
00385   key.end = (UChar* )end_key;
00386 
00387   return onig_st_lookup(table, (st_data_t )(&key), value);
00388 }
00389 
00390 extern int
00391 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
00392                       const UChar* end_key, hash_data_type value)
00393 {
00394   st_str_end_key* key;
00395   int result;
00396 
00397   key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
00398   key->s   = (UChar* )str_key;
00399   key->end = (UChar* )end_key;
00400   result = onig_st_insert(table, (st_data_t )key, value);
00401   if (result) {
00402     xfree(key);
00403   }
00404   return result;
00405 }
00406 
00407 #endif /* USE_ST_LIBRARY */
00408 
00409 
00410 #ifdef USE_NAMED_GROUP
00411 
00412 #define INIT_NAME_BACKREFS_ALLOC_NUM   8
00413 
00414 typedef struct {
00415   UChar* name;
00416   size_t name_len;   /* byte length */
00417   int    back_num;   /* number of backrefs */
00418   int    back_alloc;
00419   int    back_ref1;
00420   int*   back_refs;
00421 } NameEntry;
00422 
00423 #ifdef USE_ST_LIBRARY
00424 
00425 typedef st_table  NameTable;
00426 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
00427 
00428 #ifdef ONIG_DEBUG
00429 static int
00430 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
00431 {
00432   int i;
00433   FILE* fp = (FILE* )arg;
00434 
00435   fprintf(fp, "%s: ", e->name);
00436   if (e->back_num == 0)
00437     fputs("-", fp);
00438   else if (e->back_num == 1)
00439     fprintf(fp, "%d", e->back_ref1);
00440   else {
00441     for (i = 0; i < e->back_num; i++) {
00442       if (i > 0) fprintf(fp, ", ");
00443       fprintf(fp, "%d", e->back_refs[i]);
00444     }
00445   }
00446   fputs("\n", fp);
00447   return ST_CONTINUE;
00448 }
00449 
00450 extern int
00451 onig_print_names(FILE* fp, regex_t* reg)
00452 {
00453   NameTable* t = (NameTable* )reg->name_table;
00454 
00455   if (IS_NOT_NULL(t)) {
00456     fprintf(fp, "name table\n");
00457     onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
00458     fputs("\n", fp);
00459   }
00460   return 0;
00461 }
00462 #endif /* ONIG_DEBUG */
00463 
00464 static int
00465 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
00466 {
00467   xfree(e->name);
00468   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00469   xfree(key);
00470   xfree(e);
00471   return ST_DELETE;
00472 }
00473 
00474 static int
00475 names_clear(regex_t* reg)
00476 {
00477   NameTable* t = (NameTable* )reg->name_table;
00478 
00479   if (IS_NOT_NULL(t)) {
00480     onig_st_foreach(t, i_free_name_entry, 0);
00481   }
00482   return 0;
00483 }
00484 
00485 extern int
00486 onig_names_free(regex_t* reg)
00487 {
00488   int r;
00489   NameTable* t;
00490 
00491   r = names_clear(reg);
00492   if (r) return r;
00493 
00494   t = (NameTable* )reg->name_table;
00495   if (IS_NOT_NULL(t)) onig_st_free_table(t);
00496   reg->name_table = (void* )NULL;
00497   return 0;
00498 }
00499 
00500 static NameEntry*
00501 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
00502 {
00503   NameEntry* e;
00504   NameTable* t = (NameTable* )reg->name_table;
00505 
00506   e = (NameEntry* )NULL;
00507   if (IS_NOT_NULL(t)) {
00508     onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
00509   }
00510   return e;
00511 }
00512 
00513 typedef struct {
00514   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
00515   regex_t* reg;
00516   void* arg;
00517   int ret;
00518   OnigEncoding enc;
00519 } INamesArg;
00520 
00521 static int
00522 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
00523 {
00524   int r = (*(arg->func))(e->name,
00525                          e->name + e->name_len,
00526                          e->back_num,
00527                          (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00528                          arg->reg, arg->arg);
00529   if (r != 0) {
00530     arg->ret = r;
00531     return ST_STOP;
00532   }
00533   return ST_CONTINUE;
00534 }
00535 
00536 extern int
00537 onig_foreach_name(regex_t* reg,
00538   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00539 {
00540   INamesArg narg;
00541   NameTable* t = (NameTable* )reg->name_table;
00542 
00543   narg.ret = 0;
00544   if (IS_NOT_NULL(t)) {
00545     narg.func = func;
00546     narg.reg  = reg;
00547     narg.arg  = arg;
00548     narg.enc  = reg->enc; /* should be pattern encoding. */
00549     onig_st_foreach(t, i_names, (HashDataType )&narg);
00550   }
00551   return narg.ret;
00552 }
00553 
00554 static int
00555 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
00556 {
00557   int i;
00558 
00559   if (e->back_num > 1) {
00560     for (i = 0; i < e->back_num; i++) {
00561       e->back_refs[i] = map[e->back_refs[i]].new_val;
00562     }
00563   }
00564   else if (e->back_num == 1) {
00565     e->back_ref1 = map[e->back_ref1].new_val;
00566   }
00567 
00568   return ST_CONTINUE;
00569 }
00570 
00571 extern int
00572 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
00573 {
00574   NameTable* t = (NameTable* )reg->name_table;
00575 
00576   if (IS_NOT_NULL(t)) {
00577     onig_st_foreach(t, i_renumber_name, (HashDataType )map);
00578   }
00579   return 0;
00580 }
00581 
00582 
00583 extern int
00584 onig_number_of_names(regex_t* reg)
00585 {
00586   NameTable* t = (NameTable* )reg->name_table;
00587 
00588   if (IS_NOT_NULL(t))
00589     return (int )t->num_entries;
00590   else
00591     return 0;
00592 }
00593 
00594 #else  /* USE_ST_LIBRARY */
00595 
00596 #define INIT_NAMES_ALLOC_NUM    8
00597 
00598 typedef struct {
00599   NameEntry* e;
00600   int        num;
00601   int        alloc;
00602 } NameTable;
00603 
00604 #ifdef ONIG_DEBUG
00605 extern int
00606 onig_print_names(FILE* fp, regex_t* reg)
00607 {
00608   int i, j;
00609   NameEntry* e;
00610   NameTable* t = (NameTable* )reg->name_table;
00611 
00612   if (IS_NOT_NULL(t) && t->num > 0) {
00613     fprintf(fp, "name table\n");
00614     for (i = 0; i < t->num; i++) {
00615       e = &(t->e[i]);
00616       fprintf(fp, "%s: ", e->name);
00617       if (e->back_num == 0) {
00618         fputs("-", fp);
00619       }
00620       else if (e->back_num == 1) {
00621         fprintf(fp, "%d", e->back_ref1);
00622       }
00623       else {
00624         for (j = 0; j < e->back_num; j++) {
00625           if (j > 0) fprintf(fp, ", ");
00626           fprintf(fp, "%d", e->back_refs[j]);
00627         }
00628       }
00629       fputs("\n", fp);
00630     }
00631     fputs("\n", fp);
00632   }
00633   return 0;
00634 }
00635 #endif
00636 
00637 static int
00638 names_clear(regex_t* reg)
00639 {
00640   int i;
00641   NameEntry* e;
00642   NameTable* t = (NameTable* )reg->name_table;
00643 
00644   if (IS_NOT_NULL(t)) {
00645     for (i = 0; i < t->num; i++) {
00646       e = &(t->e[i]);
00647       if (IS_NOT_NULL(e->name)) {
00648         xfree(e->name);
00649         e->name       = NULL;
00650         e->name_len   = 0;
00651         e->back_num   = 0;
00652         e->back_alloc = 0;
00653         if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00654         e->back_refs = (int* )NULL;
00655       }
00656     }
00657     if (IS_NOT_NULL(t->e)) {
00658       xfree(t->e);
00659       t->e = NULL;
00660     }
00661     t->num = 0;
00662   }
00663   return 0;
00664 }
00665 
00666 extern int
00667 onig_names_free(regex_t* reg)
00668 {
00669   int r;
00670   NameTable* t;
00671 
00672   r = names_clear(reg);
00673   if (r) return r;
00674 
00675   t = (NameTable* )reg->name_table;
00676   if (IS_NOT_NULL(t)) xfree(t);
00677   reg->name_table = NULL;
00678   return 0;
00679 }
00680 
00681 static NameEntry*
00682 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
00683 {
00684   int i, len;
00685   NameEntry* e;
00686   NameTable* t = (NameTable* )reg->name_table;
00687 
00688   if (IS_NOT_NULL(t)) {
00689     len = name_end - name;
00690     for (i = 0; i < t->num; i++) {
00691       e = &(t->e[i]);
00692       if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
00693         return e;
00694     }
00695   }
00696   return (NameEntry* )NULL;
00697 }
00698 
00699 extern int
00700 onig_foreach_name(regex_t* reg,
00701   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00702 {
00703   int i, r;
00704   NameEntry* e;
00705   NameTable* t = (NameTable* )reg->name_table;
00706 
00707   if (IS_NOT_NULL(t)) {
00708     for (i = 0; i < t->num; i++) {
00709       e = &(t->e[i]);
00710       r = (*func)(e->name, e->name + e->name_len, e->back_num,
00711                   (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00712                   reg, arg);
00713       if (r != 0) return r;
00714     }
00715   }
00716   return 0;
00717 }
00718 
00719 extern int
00720 onig_number_of_names(regex_t* reg)
00721 {
00722   NameTable* t = (NameTable* )reg->name_table;
00723 
00724   if (IS_NOT_NULL(t))
00725     return t->num;
00726   else
00727     return 0;
00728 }
00729 
00730 #endif /* else USE_ST_LIBRARY */
00731 
00732 static int
00733 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
00734 {
00735   int alloc;
00736   NameEntry* e;
00737   NameTable* t = (NameTable* )reg->name_table;
00738 
00739   if (name_end - name <= 0)
00740     return ONIGERR_EMPTY_GROUP_NAME;
00741 
00742   e = name_find(reg, name, name_end);
00743   if (IS_NULL(e)) {
00744 #ifdef USE_ST_LIBRARY
00745     if (IS_NULL(t)) {
00746       t = onig_st_init_strend_table_with_size(5);
00747       reg->name_table = (void* )t;
00748     }
00749     e = (NameEntry* )xmalloc(sizeof(NameEntry));
00750     CHECK_NULL_RETURN_MEMERR(e);
00751 
00752     e->name = strdup_with_null(reg->enc, name, name_end);
00753     if (IS_NULL(e->name)) {
00754       xfree(e);
00755       return ONIGERR_MEMORY;
00756     }
00757     onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
00758                           (HashDataType )e);
00759 
00760     e->name_len   = name_end - name;
00761     e->back_num   = 0;
00762     e->back_alloc = 0;
00763     e->back_refs  = (int* )NULL;
00764 
00765 #else
00766 
00767     if (IS_NULL(t)) {
00768       alloc = INIT_NAMES_ALLOC_NUM;
00769       t = (NameTable* )xmalloc(sizeof(NameTable));
00770       CHECK_NULL_RETURN_MEMERR(t);
00771       t->e     = NULL;
00772       t->alloc = 0;
00773       t->num   = 0;
00774 
00775       t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
00776       if (IS_NULL(t->e)) {
00777         xfree(t);
00778         return ONIGERR_MEMORY;
00779       }
00780       t->alloc = alloc;
00781       reg->name_table = t;
00782       goto clear;
00783     }
00784     else if (t->num == t->alloc) {
00785       int i;
00786       NameEntry* p;
00787 
00788       alloc = t->alloc * 2;
00789       p = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
00790       CHECK_NULL_RETURN_MEMERR(p);
00791       t->e = p;
00792       t->alloc = alloc;
00793 
00794     clear:
00795       for (i = t->num; i < t->alloc; i++) {
00796         t->e[i].name       = NULL;
00797         t->e[i].name_len   = 0;
00798         t->e[i].back_num   = 0;
00799         t->e[i].back_alloc = 0;
00800         t->e[i].back_refs  = (int* )NULL;
00801       }
00802     }
00803     e = &(t->e[t->num]);
00804     t->num++;
00805     e->name = strdup_with_null(reg->enc, name, name_end);
00806     if (IS_NULL(e->name)) return ONIGERR_MEMORY;
00807     e->name_len = name_end - name;
00808 #endif
00809   }
00810 
00811   if (e->back_num >= 1 &&
00812       ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
00813     onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
00814                                     name, name_end);
00815     return ONIGERR_MULTIPLEX_DEFINED_NAME;
00816   }
00817 
00818   e->back_num++;
00819   if (e->back_num == 1) {
00820     e->back_ref1 = backref;
00821   }
00822   else {
00823     if (e->back_num == 2) {
00824       alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
00825       e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
00826       CHECK_NULL_RETURN_MEMERR(e->back_refs);
00827       e->back_alloc = alloc;
00828       e->back_refs[0] = e->back_ref1;
00829       e->back_refs[1] = backref;
00830     }
00831     else {
00832       if (e->back_num > e->back_alloc) {
00833         int* p;
00834         alloc = e->back_alloc * 2;
00835         p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
00836         CHECK_NULL_RETURN_MEMERR(p);
00837         e->back_refs = p;
00838         e->back_alloc = alloc;
00839       }
00840       e->back_refs[e->back_num - 1] = backref;
00841     }
00842   }
00843 
00844   return 0;
00845 }
00846 
00847 extern int
00848 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00849                            const UChar* name_end, int** nums)
00850 {
00851   NameEntry* e = name_find(reg, name, name_end);
00852 
00853   if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
00854 
00855   switch (e->back_num) {
00856   case 0:
00857     *nums = 0;
00858     break;
00859   case 1:
00860     *nums = &(e->back_ref1);
00861     break;
00862   default:
00863     *nums = e->back_refs;
00864     break;
00865   }
00866   return e->back_num;
00867 }
00868 
00869 extern int
00870 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00871                             const UChar* name_end, OnigRegion *region)
00872 {
00873   int i, n, *nums;
00874 
00875   n = onig_name_to_group_numbers(reg, name, name_end, &nums);
00876   if (n < 0)
00877     return n;
00878   else if (n == 0)
00879     return ONIGERR_PARSER_BUG;
00880   else if (n == 1)
00881     return nums[0];
00882   else {
00883     if (IS_NOT_NULL(region)) {
00884       for (i = n - 1; i >= 0; i--) {
00885         if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
00886           return nums[i];
00887       }
00888     }
00889     return nums[n - 1];
00890   }
00891 }
00892 
00893 #else /* USE_NAMED_GROUP */
00894 
00895 extern int
00896 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00897                            const UChar* name_end, int** nums)
00898 {
00899   return ONIG_NO_SUPPORT_CONFIG;
00900 }
00901 
00902 extern int
00903 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00904                             const UChar* name_end, OnigRegion* region)
00905 {
00906   return ONIG_NO_SUPPORT_CONFIG;
00907 }
00908 
00909 extern int
00910 onig_foreach_name(regex_t* reg,
00911   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00912 {
00913   return ONIG_NO_SUPPORT_CONFIG;
00914 }
00915 
00916 extern int
00917 onig_number_of_names(regex_t* reg)
00918 {
00919   return 0;
00920 }
00921 #endif /* else USE_NAMED_GROUP */
00922 
00923 extern int
00924 onig_noname_group_capture_is_active(regex_t* reg)
00925 {
00926   if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
00927     return 0;
00928 
00929 #ifdef USE_NAMED_GROUP
00930   if (onig_number_of_names(reg) > 0 &&
00931       IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
00932       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
00933     return 0;
00934   }
00935 #endif
00936 
00937   return 1;
00938 }
00939 
00940 
00941 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE   16
00942 
00943 static void
00944 scan_env_clear(ScanEnv* env)
00945 {
00946   int i;
00947 
00948   BIT_STATUS_CLEAR(env->capture_history);
00949   BIT_STATUS_CLEAR(env->bt_mem_start);
00950   BIT_STATUS_CLEAR(env->bt_mem_end);
00951   BIT_STATUS_CLEAR(env->backrefed_mem);
00952   env->error      = (UChar* )NULL;
00953   env->error_end  = (UChar* )NULL;
00954   env->num_call   = 0;
00955   env->num_mem    = 0;
00956 #ifdef USE_NAMED_GROUP
00957   env->num_named  = 0;
00958 #endif
00959   env->mem_alloc         = 0;
00960   env->mem_nodes_dynamic = (Node** )NULL;
00961 
00962   for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
00963     env->mem_nodes_static[i] = NULL_NODE;
00964 
00965 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00966   env->num_comb_exp_check  = 0;
00967   env->comb_exp_max_regnum = 0;
00968   env->curr_max_regnum     = 0;
00969   env->has_recursion       = 0;
00970 #endif
00971   env->warnings_flag       = 0;
00972 }
00973 
00974 static int
00975 scan_env_add_mem_entry(ScanEnv* env)
00976 {
00977   int i, need, alloc;
00978   Node** p;
00979 
00980   need = env->num_mem + 1;
00981   if (need >= SCANENV_MEMNODES_SIZE) {
00982     if (env->mem_alloc <= need) {
00983       if (IS_NULL(env->mem_nodes_dynamic)) {
00984         alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
00985         p = (Node** )xmalloc(sizeof(Node*) * alloc);
00986         xmemcpy(p, env->mem_nodes_static,
00987                 sizeof(Node*) * SCANENV_MEMNODES_SIZE);
00988       }
00989       else {
00990         alloc = env->mem_alloc * 2;
00991         p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
00992       }
00993       CHECK_NULL_RETURN_MEMERR(p);
00994 
00995       for (i = env->num_mem + 1; i < alloc; i++)
00996         p[i] = NULL_NODE;
00997 
00998       env->mem_nodes_dynamic = p;
00999       env->mem_alloc = alloc;
01000     }
01001   }
01002 
01003   env->num_mem++;
01004   return env->num_mem;
01005 }
01006 
01007 static int
01008 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
01009 {
01010   if (env->num_mem >= num)
01011     SCANENV_MEM_NODES(env)[num] = node;
01012   else
01013     return ONIGERR_PARSER_BUG;
01014   return 0;
01015 }
01016 
01017 
01018 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01019 typedef struct _FreeNode {
01020   struct _FreeNode* next;
01021 } FreeNode;
01022 
01023 static FreeNode* FreeNodeList = (FreeNode* )NULL;
01024 #endif
01025 
01026 extern void
01027 onig_node_free(Node* node)
01028 {
01029  start:
01030   if (IS_NULL(node)) return ;
01031 
01032   switch (NTYPE(node)) {
01033   case NT_STR:
01034     if (NSTR(node)->capa != 0 &&
01035         IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
01036       xfree(NSTR(node)->s);
01037     }
01038     break;
01039 
01040   case NT_LIST:
01041   case NT_ALT:
01042     onig_node_free(NCAR(node));
01043     {
01044       Node* next_node = NCDR(node);
01045 
01046 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01047       {
01048         FreeNode* n = (FreeNode* )node;
01049 
01050         THREAD_ATOMIC_START;
01051         n->next = FreeNodeList;
01052         FreeNodeList = n;
01053         THREAD_ATOMIC_END;
01054       }
01055 #else
01056       xfree(node);
01057 #endif
01058       node = next_node;
01059       goto start;
01060     }
01061     break;
01062 
01063   case NT_CCLASS:
01064     {
01065       CClassNode* cc = NCCLASS(node);
01066 
01067       if (IS_NCCLASS_SHARE(cc)) return ;
01068       if (cc->mbuf)
01069         bbuf_free(cc->mbuf);
01070     }
01071     break;
01072 
01073   case NT_QTFR:
01074     if (NQTFR(node)->target)
01075       onig_node_free(NQTFR(node)->target);
01076     break;
01077 
01078   case NT_ENCLOSE:
01079     if (NENCLOSE(node)->target)
01080       onig_node_free(NENCLOSE(node)->target);
01081     break;
01082 
01083   case NT_BREF:
01084     if (IS_NOT_NULL(NBREF(node)->back_dynamic))
01085       xfree(NBREF(node)->back_dynamic);
01086     break;
01087 
01088   case NT_ANCHOR:
01089     if (NANCHOR(node)->target)
01090       onig_node_free(NANCHOR(node)->target);
01091     break;
01092   }
01093 
01094 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01095   {
01096     FreeNode* n = (FreeNode* )node;
01097 
01098     THREAD_ATOMIC_START;
01099     n->next = FreeNodeList;
01100     FreeNodeList = n;
01101     THREAD_ATOMIC_END;
01102   }
01103 #else
01104   xfree(node);
01105 #endif
01106 }
01107 
01108 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01109 extern int
01110 onig_free_node_list(void)
01111 {
01112   FreeNode* n;
01113 
01114   /* THREAD_ATOMIC_START; */
01115   while (IS_NOT_NULL(FreeNodeList)) {
01116     n = FreeNodeList;
01117     FreeNodeList = FreeNodeList->next;
01118     xfree(n);
01119   }
01120   /* THREAD_ATOMIC_END; */
01121   return 0;
01122 }
01123 #endif
01124 
01125 static Node*
01126 node_new(void)
01127 {
01128   Node* node;
01129 
01130 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01131   THREAD_ATOMIC_START;
01132   if (IS_NOT_NULL(FreeNodeList)) {
01133     node = (Node* )FreeNodeList;
01134     FreeNodeList = FreeNodeList->next;
01135     THREAD_ATOMIC_END;
01136     return node;
01137   }
01138   THREAD_ATOMIC_END;
01139 #endif
01140 
01141   node = (Node* )xmalloc(sizeof(Node));
01142   /* xmemset(node, 0, sizeof(Node)); */
01143   return node;
01144 }
01145 
01146 
01147 static void
01148 initialize_cclass(CClassNode* cc)
01149 {
01150   BITSET_CLEAR(cc->bs);
01151   /* cc->base.flags = 0; */
01152   cc->flags = 0;
01153   cc->mbuf  = NULL;
01154 }
01155 
01156 static Node*
01157 node_new_cclass(void)
01158 {
01159   Node* node = node_new();
01160   CHECK_NULL_RETURN(node);
01161 
01162   SET_NTYPE(node, NT_CCLASS);
01163   initialize_cclass(NCCLASS(node));
01164   return node;
01165 }
01166 
01167 static Node*
01168 node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
01169                                    const OnigCodePoint ranges[])
01170 {
01171   int n, i;
01172   CClassNode* cc;
01173   OnigCodePoint j;
01174 
01175   Node* node = node_new_cclass();
01176   CHECK_NULL_RETURN(node);
01177 
01178   cc = NCCLASS(node);
01179   if (not != 0) NCCLASS_SET_NOT(cc);
01180 
01181   BITSET_CLEAR(cc->bs);
01182   if (sb_out > 0 && IS_NOT_NULL(ranges)) {
01183     n = ONIGENC_CODE_RANGE_NUM(ranges);
01184     for (i = 0; i < n; i++) {
01185       for (j  = ONIGENC_CODE_RANGE_FROM(ranges, i);
01186            j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
01187         if (j >= sb_out) goto sb_end;
01188 
01189         BITSET_SET_BIT(cc->bs, j);
01190       }
01191     }
01192   }
01193 
01194  sb_end:
01195   if (IS_NULL(ranges)) {
01196   is_null:
01197     cc->mbuf = NULL;
01198   }
01199   else {
01200     BBuf* bbuf;
01201 
01202     n = ONIGENC_CODE_RANGE_NUM(ranges);
01203     if (n == 0) goto is_null;
01204 
01205     bbuf = (BBuf* )xmalloc(sizeof(BBuf));
01206     CHECK_NULL_RETURN(bbuf);
01207     bbuf->alloc = n + 1;
01208     bbuf->used  = n + 1;
01209     bbuf->p     = (UChar* )((void* )ranges);
01210 
01211     cc->mbuf = bbuf;
01212   }
01213 
01214   return node;
01215 }
01216 
01217 static Node*
01218 node_new_ctype(int type, int not, int ascii_range)
01219 {
01220   Node* node = node_new();
01221   CHECK_NULL_RETURN(node);
01222 
01223   SET_NTYPE(node, NT_CTYPE);
01224   NCTYPE(node)->ctype = type;
01225   NCTYPE(node)->not   = not;
01226   NCTYPE(node)->ascii_range = ascii_range;
01227   return node;
01228 }
01229 
01230 static Node*
01231 node_new_anychar(void)
01232 {
01233   Node* node = node_new();
01234   CHECK_NULL_RETURN(node);
01235 
01236   SET_NTYPE(node, NT_CANY);
01237   return node;
01238 }
01239 
01240 static Node*
01241 node_new_list(Node* left, Node* right)
01242 {
01243   Node* node = node_new();
01244   CHECK_NULL_RETURN(node);
01245 
01246   SET_NTYPE(node, NT_LIST);
01247   NCAR(node)  = left;
01248   NCDR(node) = right;
01249   return node;
01250 }
01251 
01252 extern Node*
01253 onig_node_new_list(Node* left, Node* right)
01254 {
01255   return node_new_list(left, right);
01256 }
01257 
01258 extern Node*
01259 onig_node_list_add(Node* list, Node* x)
01260 {
01261   Node *n;
01262 
01263   n = onig_node_new_list(x, NULL);
01264   if (IS_NULL(n)) return NULL_NODE;
01265 
01266   if (IS_NOT_NULL(list)) {
01267     while (IS_NOT_NULL(NCDR(list)))
01268       list = NCDR(list);
01269 
01270     NCDR(list) = n;
01271   }
01272 
01273   return n;
01274 }
01275 
01276 extern Node*
01277 onig_node_new_alt(Node* left, Node* right)
01278 {
01279   Node* node = node_new();
01280   CHECK_NULL_RETURN(node);
01281 
01282   SET_NTYPE(node, NT_ALT);
01283   NCAR(node)  = left;
01284   NCDR(node) = right;
01285   return node;
01286 }
01287 
01288 extern Node*
01289 onig_node_new_anchor(int type)
01290 {
01291   Node* node = node_new();
01292   CHECK_NULL_RETURN(node);
01293 
01294   SET_NTYPE(node, NT_ANCHOR);
01295   NANCHOR(node)->type     = type;
01296   NANCHOR(node)->target   = NULL;
01297   NANCHOR(node)->char_len = -1;
01298   NANCHOR(node)->ascii_range = 0;
01299   return node;
01300 }
01301 
01302 static Node*
01303 node_new_backref(int back_num, int* backrefs, int by_name,
01304 #ifdef USE_BACKREF_WITH_LEVEL
01305                  int exist_level, int nest_level,
01306 #endif
01307                  ScanEnv* env)
01308 {
01309   int i;
01310   Node* node = node_new();
01311 
01312   CHECK_NULL_RETURN(node);
01313 
01314   SET_NTYPE(node, NT_BREF);
01315   NBREF(node)->state    = 0;
01316   NBREF(node)->back_num = back_num;
01317   NBREF(node)->back_dynamic = (int* )NULL;
01318   if (by_name != 0)
01319     NBREF(node)->state |= NST_NAME_REF;
01320 
01321 #ifdef USE_BACKREF_WITH_LEVEL
01322   if (exist_level != 0) {
01323     NBREF(node)->state |= NST_NEST_LEVEL;
01324     NBREF(node)->nest_level  = nest_level;
01325   }
01326 #endif
01327 
01328   for (i = 0; i < back_num; i++) {
01329     if (backrefs[i] <= env->num_mem &&
01330         IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
01331       NBREF(node)->state |= NST_RECURSION;   /* /...(\1).../ */
01332       break;
01333     }
01334   }
01335 
01336   if (back_num <= NODE_BACKREFS_SIZE) {
01337     for (i = 0; i < back_num; i++)
01338       NBREF(node)->back_static[i] = backrefs[i];
01339   }
01340   else {
01341     int* p = (int* )xmalloc(sizeof(int) * back_num);
01342     if (IS_NULL(p)) {
01343       onig_node_free(node);
01344       return NULL;
01345     }
01346     NBREF(node)->back_dynamic = p;
01347     for (i = 0; i < back_num; i++)
01348       p[i] = backrefs[i];
01349   }
01350   return node;
01351 }
01352 
01353 #ifdef USE_SUBEXP_CALL
01354 static Node*
01355 node_new_call(UChar* name, UChar* name_end, int gnum)
01356 {
01357   Node* node = node_new();
01358   CHECK_NULL_RETURN(node);
01359 
01360   SET_NTYPE(node, NT_CALL);
01361   NCALL(node)->state     = 0;
01362   NCALL(node)->target    = NULL_NODE;
01363   NCALL(node)->name      = name;
01364   NCALL(node)->name_end  = name_end;
01365   NCALL(node)->group_num = gnum;  /* call by number if gnum != 0 */
01366   return node;
01367 }
01368 #endif
01369 
01370 static Node*
01371 node_new_quantifier(int lower, int upper, int by_number)
01372 {
01373   Node* node = node_new();
01374   CHECK_NULL_RETURN(node);
01375 
01376   SET_NTYPE(node, NT_QTFR);
01377   NQTFR(node)->state  = 0;
01378   NQTFR(node)->target = NULL;
01379   NQTFR(node)->lower  = lower;
01380   NQTFR(node)->upper  = upper;
01381   NQTFR(node)->greedy = 1;
01382   NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
01383   NQTFR(node)->head_exact        = NULL_NODE;
01384   NQTFR(node)->next_head_exact   = NULL_NODE;
01385   NQTFR(node)->is_refered        = 0;
01386   if (by_number != 0)
01387     NQTFR(node)->state |= NST_BY_NUMBER;
01388 
01389 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01390   NQTFR(node)->comb_exp_check_num = 0;
01391 #endif
01392 
01393   return node;
01394 }
01395 
01396 static Node*
01397 node_new_enclose(int type)
01398 {
01399   Node* node = node_new();
01400   CHECK_NULL_RETURN(node);
01401 
01402   SET_NTYPE(node, NT_ENCLOSE);
01403   NENCLOSE(node)->type      = type;
01404   NENCLOSE(node)->state     =  0;
01405   NENCLOSE(node)->regnum    =  0;
01406   NENCLOSE(node)->option    =  0;
01407   NENCLOSE(node)->target    = NULL;
01408   NENCLOSE(node)->call_addr = -1;
01409   NENCLOSE(node)->opt_count =  0;
01410   return node;
01411 }
01412 
01413 extern Node*
01414 onig_node_new_enclose(int type)
01415 {
01416   return node_new_enclose(type);
01417 }
01418 
01419 static Node*
01420 node_new_enclose_memory(OnigOptionType option, int is_named)
01421 {
01422   Node* node = node_new_enclose(ENCLOSE_MEMORY);
01423   CHECK_NULL_RETURN(node);
01424   if (is_named != 0)
01425     SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
01426 
01427 #ifdef USE_SUBEXP_CALL
01428   NENCLOSE(node)->option = option;
01429 #endif
01430   return node;
01431 }
01432 
01433 static Node*
01434 node_new_option(OnigOptionType option)
01435 {
01436   Node* node = node_new_enclose(ENCLOSE_OPTION);
01437   CHECK_NULL_RETURN(node);
01438   NENCLOSE(node)->option = option;
01439   return node;
01440 }
01441 
01442 extern int
01443 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
01444 {
01445   ptrdiff_t addlen = end - s;
01446 
01447   if (addlen > 0) {
01448     ptrdiff_t len  = NSTR(node)->end - NSTR(node)->s;
01449 
01450     if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
01451       UChar* p;
01452       ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
01453 
01454       if (capa <= NSTR(node)->capa) {
01455         onig_strcpy(NSTR(node)->s + len, s, end);
01456       }
01457       else {
01458         if (NSTR(node)->s == NSTR(node)->buf)
01459           p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
01460                                       s, end, capa);
01461         else
01462           p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
01463 
01464         CHECK_NULL_RETURN_MEMERR(p);
01465         NSTR(node)->s    = p;
01466         NSTR(node)->capa = (int )capa;
01467       }
01468     }
01469     else {
01470       onig_strcpy(NSTR(node)->s + len, s, end);
01471     }
01472     NSTR(node)->end = NSTR(node)->s + len + addlen;
01473   }
01474 
01475   return 0;
01476 }
01477 
01478 extern int
01479 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
01480 {
01481   onig_node_str_clear(node);
01482   return onig_node_str_cat(node, s, end);
01483 }
01484 
01485 static int
01486 node_str_cat_char(Node* node, UChar c)
01487 {
01488   UChar s[1];
01489 
01490   s[0] = c;
01491   return onig_node_str_cat(node, s, s + 1);
01492 }
01493 
01494 static int
01495 node_str_cat_codepoint(Node* node, OnigEncoding enc, OnigCodePoint c)
01496 {
01497   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
01498   int num = ONIGENC_CODE_TO_MBC(enc, c, buf);
01499   if (num < 0) return num;
01500   return onig_node_str_cat(node, buf, buf + num);
01501 }
01502 
01503 extern void
01504 onig_node_conv_to_str_node(Node* node, int flag)
01505 {
01506   SET_NTYPE(node, NT_STR);
01507   NSTR(node)->flag = flag;
01508   NSTR(node)->capa = 0;
01509   NSTR(node)->s    = NSTR(node)->buf;
01510   NSTR(node)->end  = NSTR(node)->buf;
01511 }
01512 
01513 extern void
01514 onig_node_str_clear(Node* node)
01515 {
01516   if (NSTR(node)->capa != 0 &&
01517       IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
01518     xfree(NSTR(node)->s);
01519   }
01520 
01521   NSTR(node)->capa = 0;
01522   NSTR(node)->flag = 0;
01523   NSTR(node)->s    = NSTR(node)->buf;
01524   NSTR(node)->end  = NSTR(node)->buf;
01525 }
01526 
01527 static Node*
01528 node_new_str(const UChar* s, const UChar* end)
01529 {
01530   Node* node = node_new();
01531   CHECK_NULL_RETURN(node);
01532 
01533   SET_NTYPE(node, NT_STR);
01534   NSTR(node)->capa = 0;
01535   NSTR(node)->flag = 0;
01536   NSTR(node)->s    = NSTR(node)->buf;
01537   NSTR(node)->end  = NSTR(node)->buf;
01538   if (onig_node_str_cat(node, s, end)) {
01539     onig_node_free(node);
01540     return NULL;
01541   }
01542   return node;
01543 }
01544 
01545 extern Node*
01546 onig_node_new_str(const UChar* s, const UChar* end)
01547 {
01548   return node_new_str(s, end);
01549 }
01550 
01551 static Node*
01552 node_new_str_raw(UChar* s, UChar* end)
01553 {
01554   Node* node = node_new_str(s, end);
01555   if (IS_NOT_NULL(node))
01556     NSTRING_SET_RAW(node);
01557   return node;
01558 }
01559 
01560 static Node*
01561 node_new_empty(void)
01562 {
01563   return node_new_str(NULL, NULL);
01564 }
01565 
01566 static Node*
01567 node_new_str_raw_char(UChar c)
01568 {
01569   UChar p[1];
01570 
01571   p[0] = c;
01572   return node_new_str_raw(p, p + 1);
01573 }
01574 
01575 static Node*
01576 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
01577 {
01578   const UChar *p;
01579   Node* n = NULL_NODE;
01580 
01581   if (sn->end > sn->s) {
01582     p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
01583     if (p && p > sn->s) { /* can be split. */
01584       n = node_new_str(p, sn->end);
01585       if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0)
01586         NSTRING_SET_RAW(n);
01587       sn->end = (UChar* )p;
01588     }
01589   }
01590   return n;
01591 }
01592 
01593 static int
01594 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
01595 {
01596   if (sn->end > sn->s) {
01597     return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s)  ?  1 : 0);
01598   }
01599   return 0;
01600 }
01601 
01602 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
01603 static int
01604 node_str_head_pad(StrNode* sn, int num, UChar val)
01605 {
01606   UChar buf[NODE_STR_BUF_SIZE];
01607   int i, len;
01608 
01609   len = sn->end - sn->s;
01610   onig_strcpy(buf, sn->s, sn->end);
01611   onig_strcpy(&(sn->s[num]), buf, buf + len);
01612   sn->end += num;
01613 
01614   for (i = 0; i < num; i++) {
01615     sn->s[i] = val;
01616   }
01617 }
01618 #endif
01619 
01620 extern int
01621 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
01622 {
01623   unsigned int num, val;
01624   OnigCodePoint c;
01625   UChar* p = *src;
01626   PFETCH_READY;
01627 
01628   num = 0;
01629   while (!PEND) {
01630     PFETCH(c);
01631     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
01632       val = (unsigned int )DIGITVAL(c);
01633       if ((INT_MAX_LIMIT - val) / 10UL < num)
01634         return -1;  /* overflow */
01635 
01636       num = num * 10 + val;
01637     }
01638     else {
01639       PUNFETCH;
01640       break;
01641     }
01642   }
01643   *src = p;
01644   return num;
01645 }
01646 
01647 static int
01648 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
01649                                  int maxlen, OnigEncoding enc)
01650 {
01651   OnigCodePoint c;
01652   unsigned int num, val;
01653   int restlen;
01654   UChar* p = *src;
01655   PFETCH_READY;
01656 
01657   restlen = maxlen - minlen;
01658   num = 0;
01659   while (!PEND && maxlen-- != 0) {
01660     PFETCH(c);
01661     if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
01662       val = (unsigned int )XDIGITVAL(enc,c);
01663       if ((INT_MAX_LIMIT - val) / 16UL < num)
01664         return -1;  /* overflow */
01665 
01666       num = (num << 4) + XDIGITVAL(enc,c);
01667     }
01668     else {
01669       PUNFETCH;
01670       break;
01671     }
01672   }
01673   if (maxlen > restlen)
01674     return -2;  /* not enough digits */
01675   *src = p;
01676   return num;
01677 }
01678 
01679 static int
01680 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
01681                            OnigEncoding enc)
01682 {
01683   OnigCodePoint c;
01684   unsigned int num, val;
01685   UChar* p = *src;
01686   PFETCH_READY;
01687 
01688   num = 0;
01689   while (!PEND && maxlen-- != 0) {
01690     PFETCH(c);
01691     if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
01692       val = ODIGITVAL(c);
01693       if ((INT_MAX_LIMIT - val) / 8UL < num)
01694         return -1;  /* overflow */
01695 
01696       num = (num << 3) + val;
01697     }
01698     else {
01699       PUNFETCH;
01700       break;
01701     }
01702   }
01703   *src = p;
01704   return num;
01705 }
01706 
01707 
01708 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
01709     BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
01710 
01711 /* data format:
01712      [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
01713      (all data size is OnigCodePoint)
01714  */
01715 static int
01716 new_code_range(BBuf** pbuf)
01717 {
01718 #define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
01719   int r;
01720   OnigCodePoint n;
01721   BBuf* bbuf;
01722 
01723   bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
01724   CHECK_NULL_RETURN_MEMERR(*pbuf);
01725   r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
01726   if (r) return r;
01727 
01728   n = 0;
01729   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01730   return 0;
01731 }
01732 
01733 static int
01734 add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
01735         int checkdup)
01736 {
01737   int r, inc_n, pos;
01738   OnigCodePoint low, high, bound, x;
01739   OnigCodePoint n, *data;
01740   BBuf* bbuf;
01741 
01742   if (from > to) {
01743     n = from; from = to; to = n;
01744   }
01745 
01746   if (IS_NULL(*pbuf)) {
01747     r = new_code_range(pbuf);
01748     if (r) return r;
01749     bbuf = *pbuf;
01750     n = 0;
01751   }
01752   else {
01753     bbuf = *pbuf;
01754     GET_CODE_POINT(n, bbuf->p);
01755   }
01756   data = (OnigCodePoint* )(bbuf->p);
01757   data++;
01758 
01759   bound = (from == 0) ? 0 : n;
01760   for (low = 0; low < bound; ) {
01761     x = (low + bound) >> 1;
01762     if (from - 1 > data[x*2 + 1])
01763       low = x + 1;
01764     else
01765       bound = x;
01766   }
01767 
01768   high = (to == ONIG_LAST_CODE_POINT) ? n : low;
01769   for (bound = n; high < bound; ) {
01770     x = (high + bound) >> 1;
01771     if (to + 1 >= data[x*2])
01772       high = x + 1;
01773     else
01774       bound = x;
01775   }
01776   /* data[(low-1)*2+1] << from <= data[low*2]
01777    * data[(high-1)*2+1] <= to << data[high*2]
01778    */
01779 
01780   inc_n = low + 1 - high;
01781   if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
01782     return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
01783 
01784   if (inc_n != 1) {
01785     if (checkdup && from <= data[low*2+1]
01786         && (data[low*2] <= from || data[low*2+1] <= to))
01787       CC_DUP_WARN(env);
01788     if (from > data[low*2])
01789       from = data[low*2];
01790     if (to < data[(high - 1)*2 + 1])
01791       to = data[(high - 1)*2 + 1];
01792   }
01793 
01794   if (inc_n != 0) {
01795     int from_pos = SIZE_CODE_POINT * (1 + high * 2);
01796     int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
01797 
01798     if (inc_n > 0) {
01799       if (high < n) {
01800         int size = (n - high) * 2 * SIZE_CODE_POINT;
01801         BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
01802       }
01803     }
01804     else {
01805       BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
01806     }
01807   }
01808 
01809   pos = SIZE_CODE_POINT * (1 + low * 2);
01810   BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
01811   BBUF_WRITE_CODE_POINT(bbuf, pos, from);
01812   BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
01813   n += inc_n;
01814   BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01815 
01816   return 0;
01817 }
01818 
01819 static int
01820 add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01821 {
01822   return add_code_range_to_buf0(pbuf, env, from, to, 1);
01823 }
01824 
01825 static int
01826 add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
01827 {
01828   if (from > to) {
01829     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
01830       return 0;
01831     else
01832       return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
01833   }
01834 
01835   return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
01836 }
01837 
01838 static int
01839 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01840 {
01841     return add_code_range0(pbuf, env, from, to, 1);
01842 }
01843 
01844 static int
01845 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
01846 {
01847   int r, i, n;
01848   OnigCodePoint pre, from, *data, to = 0;
01849 
01850   *pbuf = (BBuf* )NULL;
01851   if (IS_NULL(bbuf)) {
01852   set_all:
01853     return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01854   }
01855 
01856   data = (OnigCodePoint* )(bbuf->p);
01857   GET_CODE_POINT(n, data);
01858   data++;
01859   if (n <= 0) goto set_all;
01860 
01861   r = 0;
01862   pre = MBCODE_START_POS(enc);
01863   for (i = 0; i < n; i++) {
01864     from = data[i*2];
01865     to   = data[i*2+1];
01866     if (pre <= from - 1) {
01867       r = add_code_range_to_buf(pbuf, env, pre, from - 1);
01868       if (r != 0) return r;
01869     }
01870     if (to == ONIG_LAST_CODE_POINT) break;
01871     pre = to + 1;
01872   }
01873   if (to < ONIG_LAST_CODE_POINT) {
01874     r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT);
01875   }
01876   return r;
01877 }
01878 
01879 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
01880   BBuf *tbuf; \
01881   int  tnot; \
01882   tnot = not1;  not1  = not2;  not2  = tnot; \
01883   tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
01884 } while (0)
01885 
01886 static int
01887 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
01888                   BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
01889 {
01890   int r;
01891   OnigCodePoint i, n1, *data1;
01892   OnigCodePoint from, to;
01893 
01894   *pbuf = (BBuf* )NULL;
01895   if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
01896     if (not1 != 0 || not2 != 0)
01897       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01898     return 0;
01899   }
01900 
01901   r = 0;
01902   if (IS_NULL(bbuf2))
01903     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01904 
01905   if (IS_NULL(bbuf1)) {
01906     if (not1 != 0) {
01907       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01908     }
01909     else {
01910       if (not2 == 0) {
01911         return bbuf_clone(pbuf, bbuf2);
01912       }
01913       else {
01914         return not_code_range_buf(enc, bbuf2, pbuf, env);
01915       }
01916     }
01917   }
01918 
01919   if (not1 != 0)
01920     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01921 
01922   data1 = (OnigCodePoint* )(bbuf1->p);
01923   GET_CODE_POINT(n1, data1);
01924   data1++;
01925 
01926   if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
01927     r = bbuf_clone(pbuf, bbuf2);
01928   }
01929   else if (not1 == 0) { /* 1 OR (not 2) */
01930     r = not_code_range_buf(enc, bbuf2, pbuf, env);
01931   }
01932   if (r != 0) return r;
01933 
01934   for (i = 0; i < n1; i++) {
01935     from = data1[i*2];
01936     to   = data1[i*2+1];
01937     r = add_code_range_to_buf(pbuf, env, from, to);
01938     if (r != 0) return r;
01939   }
01940   return 0;
01941 }
01942 
01943 static int
01944 and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
01945                 OnigCodePoint* data, int n)
01946 {
01947   int i, r;
01948   OnigCodePoint from2, to2;
01949 
01950   for (i = 0; i < n; i++) {
01951     from2 = data[i*2];
01952     to2   = data[i*2+1];
01953     if (from2 < from1) {
01954       if (to2 < from1) continue;
01955       else {
01956         from1 = to2 + 1;
01957       }
01958     }
01959     else if (from2 <= to1) {
01960       if (to2 < to1) {
01961         if (from1 <= from2 - 1) {
01962           r = add_code_range_to_buf(pbuf, env, from1, from2-1);
01963           if (r != 0) return r;
01964         }
01965         from1 = to2 + 1;
01966       }
01967       else {
01968         to1 = from2 - 1;
01969       }
01970     }
01971     else {
01972       from1 = from2;
01973     }
01974     if (from1 > to1) break;
01975   }
01976   if (from1 <= to1) {
01977     r = add_code_range_to_buf(pbuf, env, from1, to1);
01978     if (r != 0) return r;
01979   }
01980   return 0;
01981 }
01982 
01983 static int
01984 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
01985 {
01986   int r;
01987   OnigCodePoint i, j, n1, n2, *data1, *data2;
01988   OnigCodePoint from, to, from1, to1, from2, to2;
01989 
01990   *pbuf = (BBuf* )NULL;
01991   if (IS_NULL(bbuf1)) {
01992     if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
01993       return bbuf_clone(pbuf, bbuf2);
01994     return 0;
01995   }
01996   else if (IS_NULL(bbuf2)) {
01997     if (not2 != 0)
01998       return bbuf_clone(pbuf, bbuf1);
01999     return 0;
02000   }
02001 
02002   if (not1 != 0)
02003     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
02004 
02005   data1 = (OnigCodePoint* )(bbuf1->p);
02006   data2 = (OnigCodePoint* )(bbuf2->p);
02007   GET_CODE_POINT(n1, data1);
02008   GET_CODE_POINT(n2, data2);
02009   data1++;
02010   data2++;
02011 
02012   if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
02013     for (i = 0; i < n1; i++) {
02014       from1 = data1[i*2];
02015       to1   = data1[i*2+1];
02016       for (j = 0; j < n2; j++) {
02017         from2 = data2[j*2];
02018         to2   = data2[j*2+1];
02019         if (from2 > to1) break;
02020         if (to2 < from1) continue;
02021         from = MAX(from1, from2);
02022         to   = MIN(to1, to2);
02023         r = add_code_range_to_buf(pbuf, env, from, to);
02024         if (r != 0) return r;
02025       }
02026     }
02027   }
02028   else if (not1 == 0) { /* 1 AND (not 2) */
02029     for (i = 0; i < n1; i++) {
02030       from1 = data1[i*2];
02031       to1   = data1[i*2+1];
02032       r = and_code_range1(pbuf, env, from1, to1, data2, n2);
02033       if (r != 0) return r;
02034     }
02035   }
02036 
02037   return 0;
02038 }
02039 
02040 static int
02041 and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
02042 {
02043   OnigEncoding enc = env->enc;
02044   int r, not1, not2;
02045   BBuf *buf1, *buf2, *pbuf = 0;
02046   BitSetRef bsr1, bsr2;
02047   BitSet bs1, bs2;
02048 
02049   not1 = IS_NCCLASS_NOT(dest);
02050   bsr1 = dest->bs;
02051   buf1 = dest->mbuf;
02052   not2 = IS_NCCLASS_NOT(cc);
02053   bsr2 = cc->bs;
02054   buf2 = cc->mbuf;
02055 
02056   if (not1 != 0) {
02057     bitset_invert_to(bsr1, bs1);
02058     bsr1 = bs1;
02059   }
02060   if (not2 != 0) {
02061     bitset_invert_to(bsr2, bs2);
02062     bsr2 = bs2;
02063   }
02064   bitset_and(bsr1, bsr2);
02065   if (bsr1 != dest->bs) {
02066     bitset_copy(dest->bs, bsr1);
02067     bsr1 = dest->bs;
02068   }
02069   if (not1 != 0) {
02070     bitset_invert(dest->bs);
02071   }
02072 
02073   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02074     if (not1 != 0 && not2 != 0) {
02075       r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
02076     }
02077     else {
02078       r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
02079       if (r == 0 && not1 != 0) {
02080         BBuf *tbuf = 0;
02081         r = not_code_range_buf(enc, pbuf, &tbuf, env);
02082         bbuf_free(pbuf);
02083         pbuf = tbuf;
02084       }
02085     }
02086     if (r != 0) {
02087       bbuf_free(pbuf);
02088       return r;
02089     }
02090 
02091     dest->mbuf = pbuf;
02092     bbuf_free(buf1);
02093     return r;
02094   }
02095   return 0;
02096 }
02097 
02098 static int
02099 or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
02100 {
02101   OnigEncoding enc = env->enc;
02102   int r, not1, not2;
02103   BBuf *buf1, *buf2, *pbuf = 0;
02104   BitSetRef bsr1, bsr2;
02105   BitSet bs1, bs2;
02106 
02107   not1 = IS_NCCLASS_NOT(dest);
02108   bsr1 = dest->bs;
02109   buf1 = dest->mbuf;
02110   not2 = IS_NCCLASS_NOT(cc);
02111   bsr2 = cc->bs;
02112   buf2 = cc->mbuf;
02113 
02114   if (not1 != 0) {
02115     bitset_invert_to(bsr1, bs1);
02116     bsr1 = bs1;
02117   }
02118   if (not2 != 0) {
02119     bitset_invert_to(bsr2, bs2);
02120     bsr2 = bs2;
02121   }
02122   bitset_or(bsr1, bsr2);
02123   if (bsr1 != dest->bs) {
02124     bitset_copy(dest->bs, bsr1);
02125     bsr1 = dest->bs;
02126   }
02127   if (not1 != 0) {
02128     bitset_invert(dest->bs);
02129   }
02130 
02131   if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02132     if (not1 != 0 && not2 != 0) {
02133       r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
02134     }
02135     else {
02136       r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
02137       if (r == 0 && not1 != 0) {
02138         BBuf *tbuf = 0;
02139         r = not_code_range_buf(enc, pbuf, &tbuf, env);
02140         bbuf_free(pbuf);
02141         pbuf = tbuf;
02142       }
02143     }
02144     if (r != 0) {
02145       bbuf_free(pbuf);
02146       return r;
02147     }
02148 
02149     dest->mbuf = pbuf;
02150     bbuf_free(buf1);
02151     return r;
02152   }
02153   else
02154     return 0;
02155 }
02156 
02157 static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
02158 
02159 static int
02160 conv_backslash_value(int c, ScanEnv* env)
02161 {
02162   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
02163     switch (c) {
02164     case 'n': return '\n';
02165     case 't': return '\t';
02166     case 'r': return '\r';
02167     case 'f': return '\f';
02168     case 'a': return '\007';
02169     case 'b': return '\010';
02170     case 'e': return '\033';
02171     case 'v':
02172       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
02173         return '\v';
02174       break;
02175 
02176     default:
02177       if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
02178           UNKNOWN_ESC_WARN(env, c);
02179       break;
02180     }
02181   }
02182   return c;
02183 }
02184 
02185 #ifdef USE_NO_INVALID_QUANTIFIER
02186 #define is_invalid_quantifier_target(node) 0
02187 #else
02188 static int
02189 is_invalid_quantifier_target(Node* node)
02190 {
02191   switch (NTYPE(node)) {
02192   case NT_ANCHOR:
02193     return 1;
02194     break;
02195 
02196   case NT_ENCLOSE:
02197     /* allow enclosed elements */
02198     /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
02199     break;
02200 
02201   case NT_LIST:
02202     do {
02203       if (! is_invalid_quantifier_target(NCAR(node))) return 0;
02204     } while (IS_NOT_NULL(node = NCDR(node)));
02205     return 0;
02206     break;
02207 
02208   case NT_ALT:
02209     do {
02210       if (is_invalid_quantifier_target(NCAR(node))) return 1;
02211     } while (IS_NOT_NULL(node = NCDR(node)));
02212     break;
02213 
02214   default:
02215     break;
02216   }
02217   return 0;
02218 }
02219 #endif
02220 
02221 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
02222 static int
02223 popular_quantifier_num(QtfrNode* q)
02224 {
02225   if (q->greedy) {
02226     if (q->lower == 0) {
02227       if (q->upper == 1) return 0;
02228       else if (IS_REPEAT_INFINITE(q->upper)) return 1;
02229     }
02230     else if (q->lower == 1) {
02231       if (IS_REPEAT_INFINITE(q->upper)) return 2;
02232     }
02233   }
02234   else {
02235     if (q->lower == 0) {
02236       if (q->upper == 1) return 3;
02237       else if (IS_REPEAT_INFINITE(q->upper)) return 4;
02238     }
02239     else if (q->lower == 1) {
02240       if (IS_REPEAT_INFINITE(q->upper)) return 5;
02241     }
02242   }
02243   return -1;
02244 }
02245 
02246 
02247 enum ReduceType {
02248   RQ_ASIS = 0, /* as is */
02249   RQ_DEL  = 1, /* delete parent */
02250   RQ_A,        /* to '*'    */
02251   RQ_AQ,       /* to '*?'   */
02252   RQ_QQ,       /* to '??'   */
02253   RQ_P_QQ,     /* to '+)??' */
02254   RQ_PQ_Q      /* to '+?)?' */
02255 };
02256 
02257 static enum ReduceType const ReduceTypeTable[6][6] = {
02258   {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
02259   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
02260   {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
02261   {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
02262   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
02263   {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
02264 };
02265 
02266 extern void
02267 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
02268 {
02269   int pnum, cnum;
02270   QtfrNode *p, *c;
02271 
02272   p = NQTFR(pnode);
02273   c = NQTFR(cnode);
02274   pnum = popular_quantifier_num(p);
02275   cnum = popular_quantifier_num(c);
02276   if (pnum < 0 || cnum < 0) return ;
02277 
02278   switch (ReduceTypeTable[cnum][pnum]) {
02279   case RQ_DEL:
02280     *pnode = *cnode;
02281     break;
02282   case RQ_A:
02283     p->target = c->target;
02284     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1;
02285     break;
02286   case RQ_AQ:
02287     p->target = c->target;
02288     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0;
02289     break;
02290   case RQ_QQ:
02291     p->target = c->target;
02292     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
02293     break;
02294   case RQ_P_QQ:
02295     p->target = cnode;
02296     p->lower  = 0;  p->upper = 1;  p->greedy = 0;
02297     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1;
02298     return ;
02299     break;
02300   case RQ_PQ_Q:
02301     p->target = cnode;
02302     p->lower  = 0;  p->upper = 1;  p->greedy = 1;
02303     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0;
02304     return ;
02305     break;
02306   case RQ_ASIS:
02307     p->target = cnode;
02308     return ;
02309     break;
02310   }
02311 
02312   c->target = NULL_NODE;
02313   onig_node_free(cnode);
02314 }
02315 
02316 
02317 enum TokenSyms {
02318   TK_EOT      = 0,   /* end of token */
02319   TK_RAW_BYTE = 1,
02320   TK_CHAR,
02321   TK_STRING,
02322   TK_CODE_POINT,
02323   TK_ANYCHAR,
02324   TK_CHAR_TYPE,
02325   TK_BACKREF,
02326   TK_CALL,
02327   TK_ANCHOR,
02328   TK_OP_REPEAT,
02329   TK_INTERVAL,
02330   TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
02331   TK_ALT,
02332   TK_SUBEXP_OPEN,
02333   TK_SUBEXP_CLOSE,
02334   TK_CC_OPEN,
02335   TK_QUOTE_OPEN,
02336   TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
02337   TK_LINEBREAK,
02338   TK_EXTENDED_GRAPHEME_CLUSTER,
02339   TK_KEEP,
02340   /* in cc */
02341   TK_CC_CLOSE,
02342   TK_CC_RANGE,
02343   TK_POSIX_BRACKET_OPEN,
02344   TK_CC_AND,             /* && */
02345   TK_CC_CC_OPEN          /* [ */
02346 };
02347 
02348 typedef struct {
02349   enum TokenSyms type;
02350   int escaped;
02351   int base;   /* is number: 8, 16 (used in [....]) */
02352   UChar* backp;
02353   union {
02354     UChar* s;
02355     int   c;
02356     OnigCodePoint code;
02357     struct {
02358       int subtype;
02359       int ascii_range;
02360     } anchor;
02361     struct {
02362       int lower;
02363       int upper;
02364       int greedy;
02365       int possessive;
02366     } repeat;
02367     struct {
02368       int  num;
02369       int  ref1;
02370       int* refs;
02371       int  by_name;
02372 #ifdef USE_BACKREF_WITH_LEVEL
02373       int  exist_level;
02374       int  level;   /* \k<name+n> */
02375 #endif
02376     } backref;
02377     struct {
02378       UChar* name;
02379       UChar* name_end;
02380       int    gnum;
02381       int    rel;
02382     } call;
02383     struct {
02384       int ctype;
02385       int not;
02386     } prop;
02387   } u;
02388 } OnigToken;
02389 
02390 
02391 static int
02392 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
02393 {
02394   int low, up, syn_allow, non_low = 0;
02395   int r = 0;
02396   OnigCodePoint c;
02397   OnigEncoding enc = env->enc;
02398   UChar* p = *src;
02399   PFETCH_READY;
02400 
02401   syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
02402 
02403   if (PEND) {
02404     if (syn_allow)
02405       return 1;  /* "....{" : OK! */
02406     else
02407       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
02408   }
02409 
02410   if (! syn_allow) {
02411     c = PPEEK;
02412     if (c == ')' || c == '(' || c == '|') {
02413       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
02414     }
02415   }
02416 
02417   low = onig_scan_unsigned_number(&p, end, env->enc);
02418   if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02419   if (low > ONIG_MAX_REPEAT_NUM)
02420     return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02421 
02422   if (p == *src) { /* can't read low */
02423     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
02424       /* allow {,n} as {0,n} */
02425       low = 0;
02426       non_low = 1;
02427     }
02428     else
02429       goto invalid;
02430   }
02431 
02432   if (PEND) goto invalid;
02433   PFETCH(c);
02434   if (c == ',') {
02435     UChar* prev = p;
02436     up = onig_scan_unsigned_number(&p, end, env->enc);
02437     if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02438     if (up > ONIG_MAX_REPEAT_NUM)
02439       return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02440 
02441     if (p == prev) {
02442       if (non_low != 0)
02443         goto invalid;
02444       up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */
02445     }
02446   }
02447   else {
02448     if (non_low != 0)
02449       goto invalid;
02450 
02451     PUNFETCH;
02452     up = low;  /* {n} : exact n times */
02453     r = 2;     /* fixed */
02454   }
02455 
02456   if (PEND) goto invalid;
02457   PFETCH(c);
02458   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
02459     if (c != MC_ESC(env->syntax)) goto invalid;
02460     PFETCH(c);
02461   }
02462   if (c != '}') goto invalid;
02463 
02464   if (!IS_REPEAT_INFINITE(up) && low > up) {
02465     return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
02466   }
02467 
02468   tok->type = TK_INTERVAL;
02469   tok->u.repeat.lower = low;
02470   tok->u.repeat.upper = up;
02471   *src = p;
02472   return r; /* 0: normal {n,m}, 2: fixed {n} */
02473 
02474  invalid:
02475   if (syn_allow)
02476     return 1;  /* OK */
02477   else
02478     return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
02479 }
02480 
02481 /* \M-, \C-, \c, or \... */
02482 static int
02483 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
02484 {
02485   int v;
02486   OnigCodePoint c;
02487   OnigEncoding enc = env->enc;
02488   UChar* p = *src;
02489   PFETCH_READY;
02490 
02491   if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
02492 
02493   PFETCH(c);
02494   switch (c) {
02495   case 'M':
02496     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
02497       if (PEND) return ONIGERR_END_PATTERN_AT_META;
02498       PFETCH(c);
02499       if (c != '-') return ONIGERR_META_CODE_SYNTAX;
02500       if (PEND) return ONIGERR_END_PATTERN_AT_META;
02501       PFETCH(c);
02502       if (c == MC_ESC(env->syntax)) {
02503         v = fetch_escaped_value(&p, end, env);
02504         if (v < 0) return v;
02505         c = (OnigCodePoint )v;
02506       }
02507       c = ((c & 0xff) | 0x80);
02508     }
02509     else
02510       goto backslash;
02511     break;
02512 
02513   case 'C':
02514     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
02515       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02516       PFETCH(c);
02517       if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
02518       goto control;
02519     }
02520     else
02521       goto backslash;
02522 
02523   case 'c':
02524     if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
02525     control:
02526       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02527       PFETCH(c);
02528       if (c == '?') {
02529         c = 0177;
02530       }
02531       else {
02532         if (c == MC_ESC(env->syntax)) {
02533           v = fetch_escaped_value(&p, end, env);
02534           if (v < 0) return v;
02535           c = (OnigCodePoint )v;
02536         }
02537         c &= 0x9f;
02538       }
02539       break;
02540     }
02541     /* fall through */
02542 
02543   default:
02544     {
02545     backslash:
02546       c = conv_backslash_value(c, env);
02547     }
02548     break;
02549   }
02550 
02551   *src = p;
02552   return c;
02553 }
02554 
02555 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
02556 
02557 static OnigCodePoint
02558 get_name_end_code_point(OnigCodePoint start)
02559 {
02560   switch (start) {
02561   case '<':  return (OnigCodePoint )'>'; break;
02562   case '\'': return (OnigCodePoint )'\''; break;
02563   case '(':  return (OnigCodePoint )')'; break;
02564   case '{':  return (OnigCodePoint )'}'; break;
02565   default:
02566     break;
02567   }
02568 
02569   return (OnigCodePoint )0;
02570 }
02571 
02572 #ifdef USE_NAMED_GROUP
02573 #ifdef USE_BACKREF_WITH_LEVEL
02574 /*
02575    \k<name+n>, \k<name-n>
02576    \k<num+n>,  \k<num-n>
02577    \k<-num+n>, \k<-num-n>
02578 */
02579 static int
02580 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
02581                       UChar** rname_end, ScanEnv* env,
02582                       int* rback_num, int* rlevel)
02583 {
02584   int r, sign, is_num, exist_level;
02585   OnigCodePoint end_code;
02586   OnigCodePoint c = 0;
02587   OnigEncoding enc = env->enc;
02588   UChar *name_end;
02589   UChar *pnum_head;
02590   UChar *p = *src;
02591   PFETCH_READY;
02592 
02593   *rback_num = 0;
02594   is_num = exist_level = 0;
02595   sign = 1;
02596   pnum_head = *src;
02597 
02598   end_code = get_name_end_code_point(start_code);
02599 
02600   name_end = end;
02601   r = 0;
02602   if (PEND) {
02603     return ONIGERR_EMPTY_GROUP_NAME;
02604   }
02605   else {
02606     PFETCH(c);
02607     if (c == end_code)
02608       return ONIGERR_EMPTY_GROUP_NAME;
02609 
02610     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02611       is_num = 1;
02612     }
02613     else if (c == '-') {
02614       is_num = 2;
02615       sign = -1;
02616       pnum_head = p;
02617     }
02618     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02619       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02620     }
02621   }
02622 
02623   while (!PEND) {
02624     name_end = p;
02625     PFETCH(c);
02626     if (c == end_code || c == ')' || c == '+' || c == '-') {
02627       if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
02628       break;
02629     }
02630 
02631     if (is_num != 0) {
02632       if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02633         is_num = 1;
02634       }
02635       else {
02636         r = ONIGERR_INVALID_GROUP_NAME;
02637         is_num = 0;
02638       }
02639     }
02640     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02641       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02642     }
02643   }
02644 
02645   if (r == 0 && c != end_code) {
02646     if (c == '+' || c == '-') {
02647       int level;
02648       int flag = (c == '-' ? -1 : 1);
02649 
02650       PFETCH(c);
02651       if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
02652       PUNFETCH;
02653       level = onig_scan_unsigned_number(&p, end, enc);
02654       if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
02655       *rlevel = (level * flag);
02656       exist_level = 1;
02657 
02658       PFETCH(c);
02659       if (c == end_code)
02660         goto end;
02661     }
02662 
02663   err:
02664     r = ONIGERR_INVALID_GROUP_NAME;
02665     name_end = end;
02666   }
02667 
02668  end:
02669   if (r == 0) {
02670     if (is_num != 0) {
02671       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02672       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02673       else if (*rback_num == 0) goto err;
02674 
02675       *rback_num *= sign;
02676     }
02677 
02678     *rname_end = name_end;
02679     *src = p;
02680     return (exist_level ? 1 : 0);
02681   }
02682   else {
02683     onig_scan_env_set_error_string(env, r, *src, name_end);
02684     return r;
02685   }
02686 }
02687 #endif /* USE_BACKREF_WITH_LEVEL */
02688 
02689 /*
02690   ref: 0 -> define name    (don't allow number name)
02691        1 -> reference name (allow number name)
02692 */
02693 static int
02694 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
02695            UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
02696 {
02697   int r, is_num, sign;
02698   OnigCodePoint end_code;
02699   OnigCodePoint c = 0;
02700   OnigEncoding enc = env->enc;
02701   UChar *name_end;
02702   UChar *pnum_head;
02703   UChar *p = *src;
02704   PFETCH_READY;
02705 
02706   *rback_num = 0;
02707 
02708   end_code = get_name_end_code_point(start_code);
02709 
02710   name_end = end;
02711   pnum_head = *src;
02712   r = 0;
02713   is_num = 0;
02714   sign = 1;
02715   if (PEND) {
02716     return ONIGERR_EMPTY_GROUP_NAME;
02717   }
02718   else {
02719     PFETCH(c);
02720     if (c == end_code)
02721       return ONIGERR_EMPTY_GROUP_NAME;
02722 
02723     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02724       if (ref == 1)
02725         is_num = 1;
02726       else {
02727         r = ONIGERR_INVALID_GROUP_NAME;
02728         is_num = 0;
02729       }
02730     }
02731     else if (c == '-') {
02732       if (ref == 1) {
02733         is_num = 2;
02734         sign = -1;
02735         pnum_head = p;
02736       }
02737       else {
02738         r = ONIGERR_INVALID_GROUP_NAME;
02739         is_num = 0;
02740       }
02741     }
02742     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02743       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02744     }
02745   }
02746 
02747   if (r == 0) {
02748     while (!PEND) {
02749       name_end = p;
02750       PFETCH(c);
02751       if (c == end_code || c == ')') {
02752         if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
02753         break;
02754       }
02755 
02756       if (is_num != 0) {
02757         if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02758           is_num = 1;
02759         }
02760         else {
02761           if (!ONIGENC_IS_CODE_WORD(enc, c))
02762             r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02763           else
02764             r = ONIGERR_INVALID_GROUP_NAME;
02765 
02766           is_num = 0;
02767         }
02768       }
02769       else {
02770         if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02771           r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02772         }
02773       }
02774     }
02775 
02776     if (c != end_code) {
02777       r = ONIGERR_INVALID_GROUP_NAME;
02778       name_end = end;
02779     }
02780 
02781     if (is_num != 0) {
02782       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02783       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02784       else if (*rback_num == 0) {
02785         r = ONIGERR_INVALID_GROUP_NAME;
02786         goto err;
02787       }
02788 
02789       *rback_num *= sign;
02790     }
02791 
02792     *rname_end = name_end;
02793     *src = p;
02794     return 0;
02795   }
02796   else {
02797     while (!PEND) {
02798       name_end = p;
02799       PFETCH(c);
02800       if (c == end_code || c == ')')
02801         break;
02802     }
02803     if (PEND)
02804       name_end = end;
02805 
02806   err:
02807     onig_scan_env_set_error_string(env, r, *src, name_end);
02808     return r;
02809   }
02810 }
02811 #else
02812 static int
02813 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
02814            UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
02815 {
02816   int r, is_num, sign;
02817   OnigCodePoint end_code;
02818   OnigCodePoint c = 0;
02819   UChar *name_end;
02820   OnigEncoding enc = env->enc;
02821   UChar *pnum_head;
02822   UChar *p = *src;
02823   PFETCH_READY;
02824 
02825   *rback_num = 0;
02826 
02827   end_code = get_name_end_code_point(start_code);
02828 
02829   *rname_end = name_end = end;
02830   r = 0;
02831   pnum_head = *src;
02832   is_num = 0;
02833   sign = 1;
02834 
02835   if (PEND) {
02836     return ONIGERR_EMPTY_GROUP_NAME;
02837   }
02838   else {
02839     PFETCH(c);
02840     if (c == end_code)
02841       return ONIGERR_EMPTY_GROUP_NAME;
02842 
02843     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02844       is_num = 1;
02845     }
02846     else if (c == '-') {
02847       is_num = 2;
02848       sign = -1;
02849       pnum_head = p;
02850     }
02851     else {
02852       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02853     }
02854   }
02855 
02856   while (!PEND) {
02857     name_end = p;
02858 
02859     PFETCH(c);
02860     if (c == end_code || c == ')') break;
02861     if (! ONIGENC_IS_CODE_DIGIT(enc, c))
02862       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02863   }
02864   if (r == 0 && c != end_code) {
02865     r = ONIGERR_INVALID_GROUP_NAME;
02866     name_end = end;
02867   }
02868 
02869   if (r == 0) {
02870     *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02871     if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02872     else if (*rback_num == 0) {
02873       r = ONIGERR_INVALID_GROUP_NAME;
02874       goto err;
02875     }
02876     *rback_num *= sign;
02877 
02878     *rname_end = name_end;
02879     *src = p;
02880     return 0;
02881   }
02882   else {
02883   err:
02884     onig_scan_env_set_error_string(env, r, *src, name_end);
02885     return r;
02886   }
02887 }
02888 #endif /* USE_NAMED_GROUP */
02889 
02890 void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
02891                            UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
02892 
02893 static void
02894 onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
02895 {
02896     va_list args;
02897     UChar buf[WARN_BUFSIZE];
02898     va_start(args, fmt);
02899     onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
02900                 env->pattern, env->pattern_end,
02901                 (const UChar *)fmt, args);
02902     va_end(args);
02903     if (env->sourcefile == NULL)
02904       rb_warn("%s", (char *)buf);
02905     else
02906       rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
02907 }
02908 
02909 static void
02910 CC_ESC_WARN(ScanEnv *env, UChar *c)
02911 {
02912   if (onig_warn == onig_null_warn) return ;
02913 
02914   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
02915       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
02916     onig_syntax_warn(env, "character class has '%s' without escape", c);
02917   }
02918 }
02919 
02920 static void
02921 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
02922 {
02923   if (onig_warn == onig_null_warn) return ;
02924 
02925   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
02926     onig_syntax_warn(env, "regular expression has '%s' without escape", c);
02927   }
02928 }
02929 
02930 static void
02931 CC_DUP_WARN(ScanEnv *env)
02932 {
02933   if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
02934 
02935   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_DUP) &&
02936       !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
02937     env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
02938     onig_syntax_warn(env, "character class has duplicated range");
02939   }
02940 }
02941 
02942 static void
02943 UNKNOWN_ESC_WARN(ScanEnv *env, int c)
02944 {
02945   if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
02946   onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
02947 }
02948 
02949 static UChar*
02950 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
02951                   UChar **next, OnigEncoding enc)
02952 {
02953   int i;
02954   OnigCodePoint x;
02955   UChar *q;
02956   UChar *p = from;
02957 
02958   while (p < to) {
02959     x = ONIGENC_MBC_TO_CODE(enc, p, to);
02960     q = p + enclen(enc, p, to);
02961     if (x == s[0]) {
02962       for (i = 1; i < n && q < to; i++) {
02963         x = ONIGENC_MBC_TO_CODE(enc, q, to);
02964         if (x != s[i]) break;
02965         q += enclen(enc, q, to);
02966       }
02967       if (i >= n) {
02968         if (IS_NOT_NULL(next))
02969           *next = q;
02970         return p;
02971       }
02972     }
02973     p = q;
02974   }
02975   return NULL_UCHARP;
02976 }
02977 
02978 static int
02979 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
02980                  OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
02981 {
02982   int i, in_esc;
02983   OnigCodePoint x;
02984   UChar *q;
02985   UChar *p = from;
02986 
02987   in_esc = 0;
02988   while (p < to) {
02989     if (in_esc) {
02990       in_esc = 0;
02991       p += enclen(enc, p, to);
02992     }
02993     else {
02994       x = ONIGENC_MBC_TO_CODE(enc, p, to);
02995       q = p + enclen(enc, p, to);
02996       if (x == s[0]) {
02997         for (i = 1; i < n && q < to; i++) {
02998           x = ONIGENC_MBC_TO_CODE(enc, q, to);
02999           if (x != s[i]) break;
03000           q += enclen(enc, q, to);
03001         }
03002         if (i >= n) return 1;
03003         p += enclen(enc, p, to);
03004       }
03005       else {
03006         x = ONIGENC_MBC_TO_CODE(enc, p, to);
03007         if (x == bad) return 0;
03008         else if (x == MC_ESC(syn)) in_esc = 1;
03009         p = q;
03010       }
03011     }
03012   }
03013   return 0;
03014 }
03015 
03016 static int
03017 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
03018 {
03019   int num;
03020   OnigCodePoint c, c2;
03021   const OnigSyntaxType* syn = env->syntax;
03022   OnigEncoding enc = env->enc;
03023   UChar* prev;
03024   UChar* p = *src;
03025   PFETCH_READY;
03026 
03027   if (PEND) {
03028     tok->type = TK_EOT;
03029     return tok->type;
03030   }
03031 
03032   PFETCH(c);
03033   tok->type = TK_CHAR;
03034   tok->base = 0;
03035   tok->u.c  = c;
03036   tok->escaped = 0;
03037 
03038   if (c == ']') {
03039     tok->type = TK_CC_CLOSE;
03040   }
03041   else if (c == '-') {
03042     tok->type = TK_CC_RANGE;
03043   }
03044   else if (c == MC_ESC(syn)) {
03045     if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
03046       goto end;
03047 
03048     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
03049 
03050     PFETCH(c);
03051     tok->escaped = 1;
03052     tok->u.c = c;
03053     switch (c) {
03054     case 'w':
03055       tok->type = TK_CHAR_TYPE;
03056       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
03057       tok->u.prop.not   = 0;
03058       break;
03059     case 'W':
03060       tok->type = TK_CHAR_TYPE;
03061       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
03062       tok->u.prop.not   = 1;
03063       break;
03064     case 'd':
03065       tok->type = TK_CHAR_TYPE;
03066       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
03067       tok->u.prop.not   = 0;
03068       break;
03069     case 'D':
03070       tok->type = TK_CHAR_TYPE;
03071       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
03072       tok->u.prop.not   = 1;
03073       break;
03074     case 's':
03075       tok->type = TK_CHAR_TYPE;
03076       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
03077       tok->u.prop.not   = 0;
03078       break;
03079     case 'S':
03080       tok->type = TK_CHAR_TYPE;
03081       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
03082       tok->u.prop.not   = 1;
03083       break;
03084     case 'h':
03085       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03086       tok->type = TK_CHAR_TYPE;
03087       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03088       tok->u.prop.not   = 0;
03089       break;
03090     case 'H':
03091       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03092       tok->type = TK_CHAR_TYPE;
03093       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03094       tok->u.prop.not   = 1;
03095       break;
03096 
03097     case 'p':
03098     case 'P':
03099       c2 = PPEEK;
03100       if (c2 == '{' &&
03101           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03102         PINC;
03103         tok->type = TK_CHAR_PROPERTY;
03104         tok->u.prop.not = (c == 'P' ? 1 : 0);
03105 
03106         if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03107           PFETCH(c2);
03108           if (c2 == '^') {
03109             tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03110           }
03111           else
03112             PUNFETCH;
03113         }
03114       }
03115       else {
03116         onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
03117       }
03118       break;
03119 
03120     case 'x':
03121       if (PEND) break;
03122 
03123       prev = p;
03124       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03125         PINC;
03126         num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
03127         if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03128         if (!PEND) {
03129           c2 = PPEEK;
03130           if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
03131             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03132         }
03133 
03134         if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
03135           PINC;
03136           tok->type   = TK_CODE_POINT;
03137           tok->base   = 16;
03138           tok->u.code = (OnigCodePoint )num;
03139         }
03140         else {
03141           /* can't read nothing or invalid format */
03142           p = prev;
03143         }
03144       }
03145       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03146         num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
03147         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03148         if (p == prev) {  /* can't read nothing. */
03149           num = 0; /* but, it's not error */
03150         }
03151         tok->type = TK_RAW_BYTE;
03152         tok->base = 16;
03153         tok->u.c  = num;
03154       }
03155       break;
03156 
03157     case 'u':
03158       if (PEND) break;
03159 
03160       prev = p;
03161       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03162         num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
03163         if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
03164         else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03165         if (p == prev) {  /* can't read nothing. */
03166           num = 0; /* but, it's not error */
03167         }
03168         tok->type   = TK_CODE_POINT;
03169         tok->base   = 16;
03170         tok->u.code = (OnigCodePoint )num;
03171       }
03172       break;
03173 
03174     case '0':
03175     case '1': case '2': case '3': case '4': case '5': case '6': case '7':
03176       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03177         PUNFETCH;
03178         prev = p;
03179         num = scan_unsigned_octal_number(&p, end, 3, enc);
03180         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03181         if (p == prev) {  /* can't read nothing. */
03182           num = 0; /* but, it's not error */
03183         }
03184         tok->type = TK_RAW_BYTE;
03185         tok->base = 8;
03186         tok->u.c  = num;
03187       }
03188       break;
03189 
03190     default:
03191       PUNFETCH;
03192       num = fetch_escaped_value(&p, end, env);
03193       if (num < 0) return num;
03194       if (tok->u.c != num) {
03195         tok->u.code = (OnigCodePoint )num;
03196         tok->type   = TK_CODE_POINT;
03197       }
03198       break;
03199     }
03200   }
03201   else if (c == '[') {
03202     if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
03203       OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
03204       tok->backp = p; /* point at '[' is read */
03205       PINC;
03206       if (str_exist_check_with_esc(send, 2, p, end,
03207                                    (OnigCodePoint )']', enc, syn)) {
03208         tok->type = TK_POSIX_BRACKET_OPEN;
03209       }
03210       else {
03211         PUNFETCH;
03212         goto cc_in_cc;
03213       }
03214     }
03215     else {
03216     cc_in_cc:
03217       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
03218         tok->type = TK_CC_CC_OPEN;
03219       }
03220       else {
03221         CC_ESC_WARN(env, (UChar* )"[");
03222       }
03223     }
03224   }
03225   else if (c == '&') {
03226     if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
03227         !PEND && (PPEEK_IS('&'))) {
03228       PINC;
03229       tok->type = TK_CC_AND;
03230     }
03231   }
03232 
03233  end:
03234   *src = p;
03235   return tok->type;
03236 }
03237 
03238 #ifdef USE_NAMED_GROUP
03239 static int
03240 fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src,
03241                           UChar* end, ScanEnv* env)
03242 {
03243   int r, num;
03244   const OnigSyntaxType* syn = env->syntax;
03245   UChar* prev;
03246   UChar* p = *src;
03247   UChar* name_end;
03248   int* backs;
03249   int back_num;
03250 
03251   prev = p;
03252 
03253 #ifdef USE_BACKREF_WITH_LEVEL
03254   name_end = NULL_UCHARP; /* no need. escape gcc warning. */
03255   r = fetch_name_with_level(c, &p, end, &name_end,
03256                             env, &back_num, &tok->u.backref.level);
03257   if (r == 1) tok->u.backref.exist_level = 1;
03258   else        tok->u.backref.exist_level = 0;
03259 #else
03260   r = fetch_name(&p, end, &name_end, env, &back_num, 1);
03261 #endif
03262   if (r < 0) return r;
03263 
03264   if (back_num != 0) {
03265     if (back_num < 0) {
03266       back_num = BACKREF_REL_TO_ABS(back_num, env);
03267       if (back_num <= 0)
03268         return ONIGERR_INVALID_BACKREF;
03269     }
03270 
03271     if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03272       if (back_num > env->num_mem ||
03273           IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
03274         return ONIGERR_INVALID_BACKREF;
03275     }
03276     tok->type = TK_BACKREF;
03277     tok->u.backref.by_name = 0;
03278     tok->u.backref.num  = 1;
03279     tok->u.backref.ref1 = back_num;
03280   }
03281   else {
03282     num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
03283     if (num <= 0) {
03284       onig_scan_env_set_error_string(env,
03285                      ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
03286       return ONIGERR_UNDEFINED_NAME_REFERENCE;
03287     }
03288     if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03289       int i;
03290       for (i = 0; i < num; i++) {
03291         if (backs[i] > env->num_mem ||
03292             IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
03293           return ONIGERR_INVALID_BACKREF;
03294       }
03295     }
03296 
03297     tok->type = TK_BACKREF;
03298     tok->u.backref.by_name = 1;
03299     if (num == 1) {
03300       tok->u.backref.num  = 1;
03301       tok->u.backref.ref1 = backs[0];
03302     }
03303     else {
03304       tok->u.backref.num  = num;
03305       tok->u.backref.refs = backs;
03306     }
03307   }
03308   *src = p;
03309   return 0;
03310 }
03311 #endif
03312 
03313 static int
03314 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
03315 {
03316   int r, num;
03317   OnigCodePoint c;
03318   OnigEncoding enc = env->enc;
03319   const OnigSyntaxType* syn = env->syntax;
03320   UChar* prev;
03321   UChar* p = *src;
03322   PFETCH_READY;
03323 
03324  start:
03325   if (PEND) {
03326     tok->type = TK_EOT;
03327     return tok->type;
03328   }
03329 
03330   tok->type  = TK_STRING;
03331   tok->base  = 0;
03332   tok->backp = p;
03333 
03334   PFETCH(c);
03335   if (IS_MC_ESC_CODE(c, syn)) {
03336     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
03337 
03338     tok->backp = p;
03339     PFETCH(c);
03340 
03341     tok->u.c = c;
03342     tok->escaped = 1;
03343     switch (c) {
03344     case '*':
03345       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
03346       tok->type = TK_OP_REPEAT;
03347       tok->u.repeat.lower = 0;
03348       tok->u.repeat.upper = REPEAT_INFINITE;
03349       goto greedy_check;
03350       break;
03351 
03352     case '+':
03353       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
03354       tok->type = TK_OP_REPEAT;
03355       tok->u.repeat.lower = 1;
03356       tok->u.repeat.upper = REPEAT_INFINITE;
03357       goto greedy_check;
03358       break;
03359 
03360     case '?':
03361       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
03362       tok->type = TK_OP_REPEAT;
03363       tok->u.repeat.lower = 0;
03364       tok->u.repeat.upper = 1;
03365     greedy_check:
03366       if (!PEND && PPEEK_IS('?') &&
03367           IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
03368         PFETCH(c);
03369         tok->u.repeat.greedy     = 0;
03370         tok->u.repeat.possessive = 0;
03371       }
03372       else {
03373       possessive_check:
03374         if (!PEND && PPEEK_IS('+') &&
03375             ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
03376               tok->type != TK_INTERVAL)  ||
03377              (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
03378               tok->type == TK_INTERVAL))) {
03379           PFETCH(c);
03380           tok->u.repeat.greedy     = 1;
03381           tok->u.repeat.possessive = 1;
03382         }
03383         else {
03384           tok->u.repeat.greedy     = 1;
03385           tok->u.repeat.possessive = 0;
03386         }
03387       }
03388       break;
03389 
03390     case '{':
03391       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
03392       r = fetch_range_quantifier(&p, end, tok, env);
03393       if (r < 0) return r;  /* error */
03394       if (r == 0) goto greedy_check;
03395       else if (r == 2) { /* {n} */
03396         if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03397           goto possessive_check;
03398 
03399         goto greedy_check;
03400       }
03401       /* r == 1 : normal char */
03402       break;
03403 
03404     case '|':
03405       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
03406       tok->type = TK_ALT;
03407       break;
03408 
03409     case '(':
03410       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03411       tok->type = TK_SUBEXP_OPEN;
03412       break;
03413 
03414     case ')':
03415       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03416       tok->type = TK_SUBEXP_CLOSE;
03417       break;
03418 
03419     case 'w':
03420       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03421       tok->type = TK_CHAR_TYPE;
03422       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
03423       tok->u.prop.not   = 0;
03424       break;
03425 
03426     case 'W':
03427       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03428       tok->type = TK_CHAR_TYPE;
03429       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
03430       tok->u.prop.not   = 1;
03431       break;
03432 
03433     case 'b':
03434       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03435       tok->type = TK_ANCHOR;
03436       tok->u.anchor.subtype = ANCHOR_WORD_BOUND;
03437       tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
03438                 && ! IS_WORD_BOUND_ALL_RANGE(env->option);
03439       break;
03440 
03441     case 'B':
03442       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03443       tok->type = TK_ANCHOR;
03444       tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND;
03445       tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
03446                 && ! IS_WORD_BOUND_ALL_RANGE(env->option);
03447       break;
03448 
03449 #ifdef USE_WORD_BEGIN_END
03450     case '<':
03451       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03452       tok->type = TK_ANCHOR;
03453       tok->u.anchor.subtype = ANCHOR_WORD_BEGIN;
03454       tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
03455       break;
03456 
03457     case '>':
03458       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03459       tok->type = TK_ANCHOR;
03460       tok->u.anchor.subtype = ANCHOR_WORD_END;
03461       tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
03462       break;
03463 #endif
03464 
03465     case 's':
03466       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03467       tok->type = TK_CHAR_TYPE;
03468       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
03469       tok->u.prop.not   = 0;
03470       break;
03471 
03472     case 'S':
03473       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03474       tok->type = TK_CHAR_TYPE;
03475       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
03476       tok->u.prop.not   = 1;
03477       break;
03478 
03479     case 'd':
03480       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03481       tok->type = TK_CHAR_TYPE;
03482       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
03483       tok->u.prop.not   = 0;
03484       break;
03485 
03486     case 'D':
03487       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03488       tok->type = TK_CHAR_TYPE;
03489       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
03490       tok->u.prop.not   = 1;
03491       break;
03492 
03493     case 'h':
03494       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03495       tok->type = TK_CHAR_TYPE;
03496       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03497       tok->u.prop.not   = 0;
03498       break;
03499 
03500     case 'H':
03501       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03502       tok->type = TK_CHAR_TYPE;
03503       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03504       tok->u.prop.not   = 1;
03505       break;
03506 
03507     case 'A':
03508       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03509     begin_buf:
03510       tok->type = TK_ANCHOR;
03511       tok->u.anchor.subtype = ANCHOR_BEGIN_BUF;
03512       break;
03513 
03514     case 'Z':
03515       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03516       tok->type = TK_ANCHOR;
03517       tok->u.anchor.subtype = ANCHOR_SEMI_END_BUF;
03518       break;
03519 
03520     case 'z':
03521       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03522     end_buf:
03523       tok->type = TK_ANCHOR;
03524       tok->u.anchor.subtype = ANCHOR_END_BUF;
03525       break;
03526 
03527     case 'G':
03528       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
03529       tok->type = TK_ANCHOR;
03530       tok->u.anchor.subtype = ANCHOR_BEGIN_POSITION;
03531       break;
03532 
03533     case '`':
03534       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03535       goto begin_buf;
03536       break;
03537 
03538     case '\'':
03539       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03540       goto end_buf;
03541       break;
03542 
03543     case 'x':
03544       if (PEND) break;
03545 
03546       prev = p;
03547       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03548         PINC;
03549         num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
03550         if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03551         if (!PEND) {
03552           if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
03553             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03554         }
03555 
03556         if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
03557           PINC;
03558           tok->type   = TK_CODE_POINT;
03559           tok->u.code = (OnigCodePoint )num;
03560         }
03561         else {
03562           /* can't read nothing or invalid format */
03563           p = prev;
03564         }
03565       }
03566       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03567         num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
03568         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03569         if (p == prev) {  /* can't read nothing. */
03570           num = 0; /* but, it's not error */
03571         }
03572         tok->type = TK_RAW_BYTE;
03573         tok->base = 16;
03574         tok->u.c  = num;
03575       }
03576       break;
03577 
03578     case 'u':
03579       if (PEND) break;
03580 
03581       prev = p;
03582       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03583         num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
03584         if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
03585         else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03586         if (p == prev) {  /* can't read nothing. */
03587           num = 0; /* but, it's not error */
03588         }
03589         tok->type   = TK_CODE_POINT;
03590         tok->base   = 16;
03591         tok->u.code = (OnigCodePoint )num;
03592       }
03593       break;
03594 
03595     case '1': case '2': case '3': case '4':
03596     case '5': case '6': case '7': case '8': case '9':
03597       PUNFETCH;
03598       prev = p;
03599       num = onig_scan_unsigned_number(&p, end, enc);
03600       if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
03601         goto skip_backref;
03602       }
03603 
03604       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
03605           (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
03606         if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03607           if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
03608             return ONIGERR_INVALID_BACKREF;
03609         }
03610 
03611         tok->type = TK_BACKREF;
03612         tok->u.backref.num     = 1;
03613         tok->u.backref.ref1    = num;
03614         tok->u.backref.by_name = 0;
03615 #ifdef USE_BACKREF_WITH_LEVEL
03616         tok->u.backref.exist_level = 0;
03617 #endif
03618         break;
03619       }
03620 
03621     skip_backref:
03622       if (c == '8' || c == '9') {
03623         /* normal char */
03624         p = prev; PINC;
03625         break;
03626       }
03627 
03628       p = prev;
03629       /* fall through */
03630     case '0':
03631       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03632         prev = p;
03633         num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
03634         if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03635         if (p == prev) {  /* can't read nothing. */
03636           num = 0; /* but, it's not error */
03637         }
03638         tok->type = TK_RAW_BYTE;
03639         tok->base = 8;
03640         tok->u.c  = num;
03641       }
03642       else if (c != '0') {
03643         PINC;
03644       }
03645       break;
03646 
03647 #ifdef USE_NAMED_GROUP
03648     case 'k':
03649       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
03650         PFETCH(c);
03651         if (c == '<' || c == '\'') {
03652           r = fetch_named_backref_token(c, tok, &p, end, env);
03653           if (r < 0) return r;
03654         }
03655         else {
03656           PUNFETCH;
03657           onig_syntax_warn(env, "invalid back reference");
03658         }
03659       }
03660       break;
03661 #endif
03662 
03663 #if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
03664     case 'g':
03665 #ifdef USE_NAMED_GROUP
03666       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_BRACE_BACKREF)) {
03667         PFETCH(c);
03668         if (c == '{') {
03669           r = fetch_named_backref_token(c, tok, &p, end, env);
03670           if (r < 0) return r;
03671         }
03672         else
03673           PUNFETCH;
03674       }
03675 #endif
03676 #ifdef USE_SUBEXP_CALL
03677       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
03678         PFETCH(c);
03679         if (c == '<' || c == '\'') {
03680           int gnum = -1, rel = 0;
03681           UChar* name_end;
03682           OnigCodePoint cnext;
03683 
03684           cnext = PPEEK;
03685           if (cnext == '0') {
03686             PINC;
03687             if (PPEEK_IS(get_name_end_code_point(c))) {  /* \g<0>, \g'0' */
03688               PINC;
03689               name_end = p;
03690               gnum = 0;
03691             }
03692           }
03693           else if (cnext == '+') {
03694             PINC;
03695             rel = 1;
03696           }
03697           prev = p;
03698           if (gnum < 0) {
03699             r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
03700             if (r < 0) return r;
03701           }
03702 
03703           tok->type = TK_CALL;
03704           tok->u.call.name     = prev;
03705           tok->u.call.name_end = name_end;
03706           tok->u.call.gnum     = gnum;
03707           tok->u.call.rel      = rel;
03708         }
03709         else {
03710           onig_syntax_warn(env, "invalid subexp call");
03711           PUNFETCH;
03712         }
03713       }
03714 #endif
03715       break;
03716 #endif
03717 
03718     case 'Q':
03719       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
03720         tok->type = TK_QUOTE_OPEN;
03721       }
03722       break;
03723 
03724     case 'p':
03725     case 'P':
03726       if (PPEEK_IS('{') &&
03727           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03728         PINC;
03729         tok->type = TK_CHAR_PROPERTY;
03730         tok->u.prop.not = (c == 'P' ? 1 : 0);
03731 
03732         if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03733           PFETCH(c);
03734           if (c == '^') {
03735             tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03736           }
03737           else
03738             PUNFETCH;
03739         }
03740       }
03741       else {
03742         onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
03743       }
03744       break;
03745 
03746     case 'R':
03747       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK)) {
03748         tok->type = TK_LINEBREAK;
03749       }
03750       break;
03751 
03752     case 'X':
03753       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER)) {
03754         tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;
03755       }
03756       break;
03757 
03758     case 'K':
03759       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) {
03760         tok->type = TK_KEEP;
03761       }
03762       break;
03763 
03764     default:
03765       PUNFETCH;
03766       num = fetch_escaped_value(&p, end, env);
03767       if (num < 0) return num;
03768       /* set_raw: */
03769       if (tok->u.c != num) {
03770         tok->type = TK_CODE_POINT;
03771         tok->u.code = (OnigCodePoint )num;
03772       }
03773       else { /* string */
03774         p = tok->backp + enclen(enc, tok->backp, end);
03775       }
03776       break;
03777     }
03778   }
03779   else {
03780     tok->u.c = c;
03781     tok->escaped = 0;
03782 
03783 #ifdef USE_VARIABLE_META_CHARS
03784     if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
03785         IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
03786       if (c == MC_ANYCHAR(syn))
03787         goto any_char;
03788       else if (c == MC_ANYTIME(syn))
03789         goto anytime;
03790       else if (c == MC_ZERO_OR_ONE_TIME(syn))
03791         goto zero_or_one_time;
03792       else if (c == MC_ONE_OR_MORE_TIME(syn))
03793         goto one_or_more_time;
03794       else if (c == MC_ANYCHAR_ANYTIME(syn)) {
03795         tok->type = TK_ANYCHAR_ANYTIME;
03796         goto out;
03797       }
03798     }
03799 #endif
03800 
03801     switch (c) {
03802     case '.':
03803       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
03804 #ifdef USE_VARIABLE_META_CHARS
03805     any_char:
03806 #endif
03807       tok->type = TK_ANYCHAR;
03808       break;
03809 
03810     case '*':
03811       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
03812 #ifdef USE_VARIABLE_META_CHARS
03813     anytime:
03814 #endif
03815       tok->type = TK_OP_REPEAT;
03816       tok->u.repeat.lower = 0;
03817       tok->u.repeat.upper = REPEAT_INFINITE;
03818       goto greedy_check;
03819       break;
03820 
03821     case '+':
03822       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
03823 #ifdef USE_VARIABLE_META_CHARS
03824     one_or_more_time:
03825 #endif
03826       tok->type = TK_OP_REPEAT;
03827       tok->u.repeat.lower = 1;
03828       tok->u.repeat.upper = REPEAT_INFINITE;
03829       goto greedy_check;
03830       break;
03831 
03832     case '?':
03833       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
03834 #ifdef USE_VARIABLE_META_CHARS
03835     zero_or_one_time:
03836 #endif
03837       tok->type = TK_OP_REPEAT;
03838       tok->u.repeat.lower = 0;
03839       tok->u.repeat.upper = 1;
03840       goto greedy_check;
03841       break;
03842 
03843     case '{':
03844       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
03845       r = fetch_range_quantifier(&p, end, tok, env);
03846       if (r < 0) return r;  /* error */
03847       if (r == 0) goto greedy_check;
03848       else if (r == 2) { /* {n} */
03849         if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03850           goto possessive_check;
03851 
03852         goto greedy_check;
03853       }
03854       /* r == 1 : normal char */
03855       break;
03856 
03857     case '|':
03858       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
03859       tok->type = TK_ALT;
03860       break;
03861 
03862     case '(':
03863       if (PPEEK_IS('?') &&
03864           IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
03865         PINC;
03866         if (PPEEK_IS('#')) {
03867           PFETCH(c);
03868           while (1) {
03869             if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
03870             PFETCH(c);
03871             if (c == MC_ESC(syn)) {
03872               if (!PEND) PFETCH(c);
03873             }
03874             else {
03875               if (c == ')') break;
03876             }
03877           }
03878           goto start;
03879         }
03880 #ifdef USE_PERL_SUBEXP_CALL
03881         /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */
03882         c = PPEEK;
03883         if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) &&
03884             IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_SUBEXP_CALL)) {
03885           /* (?&name), (?n), (?R), (?0) */
03886           int gnum;
03887           UChar *name;
03888           UChar *name_end;
03889 
03890           if (c == 'R' || c == '0') {
03891             PINC;   /* skip 'R' / '0' */
03892             if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
03893             PINC;   /* skip ')' */
03894             name_end = name = p;
03895             gnum = 0;
03896           }
03897           else {
03898             int numref = 1;
03899             if (c == '&') {     /* (?&name) */
03900               PINC;
03901               numref = 0;       /* don't allow number name */
03902             }
03903             name = p;
03904             r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref);
03905             if (r < 0) return r;
03906           }
03907 
03908           tok->type = TK_CALL;
03909           tok->u.call.name     = name;
03910           tok->u.call.name_end = name_end;
03911           tok->u.call.gnum     = gnum;
03912           tok->u.call.rel      = 0;
03913           break;
03914         }
03915         else if ((c == '-' || c == '+') &&
03916             IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_SUBEXP_CALL)) {
03917           /* (?+n), (?-n) */
03918           int gnum;
03919           UChar *name;
03920           UChar *name_end;
03921           OnigCodePoint cnext;
03922           PFETCH_READY;
03923 
03924           PINC;     /* skip '-' / '+' */
03925           cnext = PPEEK;
03926           if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) {
03927             if (c == '-') PUNFETCH;
03928             name = p;
03929             r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1);
03930             if (r < 0) return r;
03931 
03932             tok->type = TK_CALL;
03933             tok->u.call.name     = name;
03934             tok->u.call.name_end = name_end;
03935             tok->u.call.gnum     = gnum;
03936             tok->u.call.rel      = 1;
03937             break;
03938           }
03939         }
03940 #endif /* USE_PERL_SUBEXP_CALL */
03941 #ifdef USE_CAPITAL_P_NAMED_GROUP
03942         if (PPEEK_IS('P') &&
03943             IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) {
03944           int gnum;
03945           UChar *name;
03946           UChar *name_end;
03947           PFETCH_READY;
03948 
03949           PINC;     /* skip 'P' */
03950           PFETCH(c);
03951           if (c == '=') {       /* (?P=name): backref */
03952             r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env);
03953             if (r < 0) return r;
03954             break;
03955           }
03956           else if (c == '>') {  /* (?P>name): subexp call */
03957             name = p;
03958             r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0);
03959             if (r < 0) return r;
03960 
03961             tok->type = TK_CALL;
03962             tok->u.call.name     = name;
03963             tok->u.call.name_end = name_end;
03964             tok->u.call.gnum     = gnum;
03965             tok->u.call.rel      = 0;
03966             break;
03967           }
03968           PUNFETCH;
03969         }
03970 #endif /* USE_CAPITAL_P_NAMED_GROUP */
03971         PUNFETCH;
03972       }
03973 
03974       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03975       tok->type = TK_SUBEXP_OPEN;
03976       break;
03977 
03978     case ')':
03979       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03980       tok->type = TK_SUBEXP_CLOSE;
03981       break;
03982 
03983     case '^':
03984       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03985       tok->type = TK_ANCHOR;
03986       tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
03987                                ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
03988       break;
03989 
03990     case '$':
03991       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03992       tok->type = TK_ANCHOR;
03993       tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
03994                                ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
03995       break;
03996 
03997     case '[':
03998       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
03999       tok->type = TK_CC_OPEN;
04000       break;
04001 
04002     case ']':
04003       if (*src > env->pattern)   /* /].../ is allowed. */
04004         CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
04005       break;
04006 
04007     case '#':
04008       if (IS_EXTEND(env->option)) {
04009         while (!PEND) {
04010           PFETCH(c);
04011           if (ONIGENC_IS_CODE_NEWLINE(enc, c))
04012             break;
04013         }
04014         goto start;
04015         break;
04016       }
04017       break;
04018 
04019     case ' ': case '\t': case '\n': case '\r': case '\f':
04020       if (IS_EXTEND(env->option))
04021         goto start;
04022       break;
04023 
04024     default:
04025       /* string */
04026       break;
04027     }
04028   }
04029 
04030 #ifdef USE_VARIABLE_META_CHARS
04031  out:
04032 #endif
04033   *src = p;
04034   return tok->type;
04035 }
04036 
04037 static int
04038 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
04039                          ScanEnv* env,
04040                          OnigCodePoint sb_out, const OnigCodePoint mbr[])
04041 {
04042   int i, r;
04043   OnigCodePoint j;
04044 
04045   int n = ONIGENC_CODE_RANGE_NUM(mbr);
04046 
04047   if (not == 0) {
04048     for (i = 0; i < n; i++) {
04049       for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
04050            j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
04051         if (j >= sb_out) {
04052           if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
04053             r = add_code_range_to_buf(&(cc->mbuf), env, j,
04054                                       ONIGENC_CODE_RANGE_TO(mbr, i));
04055             if (r != 0) return r;
04056             i++;
04057           }
04058 
04059           goto sb_end;
04060         }
04061         BITSET_SET_BIT_CHKDUP(cc->bs, j);
04062       }
04063     }
04064 
04065   sb_end:
04066     for ( ; i < n; i++) {
04067       r = add_code_range_to_buf(&(cc->mbuf), env,
04068                                 ONIGENC_CODE_RANGE_FROM(mbr, i),
04069                                 ONIGENC_CODE_RANGE_TO(mbr, i));
04070       if (r != 0) return r;
04071     }
04072   }
04073   else {
04074     OnigCodePoint prev = 0;
04075 
04076     for (i = 0; i < n; i++) {
04077       for (j = prev;
04078            j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
04079         if (j >= sb_out) {
04080           goto sb_end2;
04081         }
04082         BITSET_SET_BIT_CHKDUP(cc->bs, j);
04083       }
04084       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
04085     }
04086     for (j = prev; j < sb_out; j++) {
04087       BITSET_SET_BIT_CHKDUP(cc->bs, j);
04088     }
04089 
04090   sb_end2:
04091     prev = sb_out;
04092 
04093     for (i = 0; i < n; i++) {
04094       if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
04095         r = add_code_range_to_buf(&(cc->mbuf), env, prev,
04096                                   ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
04097         if (r != 0) return r;
04098       }
04099       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
04100     }
04101     if (prev < 0x7fffffff) {
04102       r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
04103       if (r != 0) return r;
04104     }
04105   }
04106 
04107   return 0;
04108 }
04109 
04110 static int
04111 add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
04112 {
04113   int maxcode, ascii_range;
04114   int c, r;
04115   const OnigCodePoint *ranges;
04116   OnigCodePoint sb_out;
04117   OnigEncoding enc = env->enc;
04118   OnigOptionType option = env->option;
04119 
04120   ascii_range = IS_ASCII_RANGE(option) && (char_prop == 0);
04121 
04122   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
04123   if (r == 0) {
04124     if (ascii_range) {
04125       CClassNode ccwork;
04126       initialize_cclass(&ccwork);
04127       r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,
04128                                    ranges);
04129       if (r == 0) {
04130         if (not) {
04131           r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
04132         }
04133         else {
04134           CClassNode ccascii;
04135           initialize_cclass(&ccascii);
04136           if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
04137             add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
04138           }
04139           else {
04140             bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
04141           }
04142           r = and_cclass(&ccwork, &ccascii, env);
04143           if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
04144         }
04145         if (r == 0) {
04146           r = or_cclass(cc, &ccwork, env);
04147         }
04148         if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);
04149       }
04150     }
04151     else {
04152       r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
04153     }
04154     return r;
04155   }
04156   else if (r != ONIG_NO_SUPPORT_CONFIG) {
04157     return r;
04158   }
04159 
04160   maxcode = ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
04161   r = 0;
04162   switch (ctype) {
04163   case ONIGENC_CTYPE_ALPHA:
04164   case ONIGENC_CTYPE_BLANK:
04165   case ONIGENC_CTYPE_CNTRL:
04166   case ONIGENC_CTYPE_DIGIT:
04167   case ONIGENC_CTYPE_LOWER:
04168   case ONIGENC_CTYPE_PUNCT:
04169   case ONIGENC_CTYPE_SPACE:
04170   case ONIGENC_CTYPE_UPPER:
04171   case ONIGENC_CTYPE_XDIGIT:
04172   case ONIGENC_CTYPE_ASCII:
04173   case ONIGENC_CTYPE_ALNUM:
04174     if (not != 0) {
04175       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
04176         if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
04177           BITSET_SET_BIT_CHKDUP(cc->bs, c);
04178       }
04179       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
04180     }
04181     else {
04182       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
04183         if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
04184           BITSET_SET_BIT_CHKDUP(cc->bs, c);
04185       }
04186     }
04187     break;
04188 
04189   case ONIGENC_CTYPE_GRAPH:
04190   case ONIGENC_CTYPE_PRINT:
04191     if (not != 0) {
04192       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
04193         if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)
04194             || c >= maxcode)
04195           BITSET_SET_BIT_CHKDUP(cc->bs, c);
04196       }
04197       if (ascii_range)
04198         ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
04199     }
04200     else {
04201       for (c = 0; c < maxcode; c++) {
04202         if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
04203           BITSET_SET_BIT_CHKDUP(cc->bs, c);
04204       }
04205       if (! ascii_range)
04206         ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
04207     }
04208     break;
04209 
04210   case ONIGENC_CTYPE_WORD:
04211     if (not == 0) {
04212       for (c = 0; c < maxcode; c++) {
04213         if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
04214       }
04215       if (! ascii_range)
04216         ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
04217     }
04218     else {
04219       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
04220         if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
04221             && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode))
04222           BITSET_SET_BIT_CHKDUP(cc->bs, c);
04223       }
04224       if (ascii_range)
04225         ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
04226     }
04227     break;
04228 
04229   default:
04230     return ONIGERR_PARSER_BUG;
04231     break;
04232   }
04233 
04234   return r;
04235 }
04236 
04237 static int
04238 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
04239 {
04240 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
04241 #define POSIX_BRACKET_NAME_MIN_LEN         4
04242 
04243   static const PosixBracketEntryType PBS[] = {
04244     { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
04245     { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
04246     { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
04247     { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
04248     { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
04249     { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
04250     { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
04251     { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
04252     { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
04253     { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
04254     { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
04255     { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
04256     { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
04257     { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
04258     { (UChar* )NULL,     -1, 0 }
04259   };
04260 
04261   const PosixBracketEntryType *pb;
04262   int not, i, r;
04263   OnigCodePoint c;
04264   OnigEncoding enc = env->enc;
04265   UChar *p = *src;
04266   PFETCH_READY;
04267 
04268   if (PPEEK_IS('^')) {
04269     PINC;
04270     not = 1;
04271   }
04272   else
04273     not = 0;
04274 
04275   if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
04276     goto not_posix_bracket;
04277 
04278   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
04279     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
04280       p = (UChar* )onigenc_step(enc, p, end, pb->len);
04281       if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
04282         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
04283 
04284       r = add_ctype_to_cc(cc, pb->ctype, not,
04285             IS_POSIX_BRACKET_ALL_RANGE(env->option),
04286             env);
04287       if (r != 0) return r;
04288 
04289       PINC; PINC;
04290       *src = p;
04291       return 0;
04292     }
04293   }
04294 
04295  not_posix_bracket:
04296   c = 0;
04297   i = 0;
04298   while (!PEND && ((c = PPEEK) != ':') && c != ']') {
04299     PINC;
04300     if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
04301   }
04302   if (c == ':' && ! PEND) {
04303     PINC;
04304     if (! PEND) {
04305       PFETCH(c);
04306       if (c == ']')
04307         return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
04308     }
04309   }
04310 
04311   return 1;  /* 1: is not POSIX bracket, but no error. */
04312 }
04313 
04314 static int
04315 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
04316 {
04317   int r;
04318   OnigCodePoint c;
04319   OnigEncoding enc = env->enc;
04320   UChar *prev, *start, *p = *src;
04321   PFETCH_READY;
04322 
04323   r = 0;
04324   start = prev = p;
04325 
04326   while (!PEND) {
04327     prev = p;
04328     PFETCH(c);
04329     if (c == '}') {
04330       r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
04331       if (r < 0) break;
04332 
04333       *src = p;
04334       return r;
04335     }
04336     else if (c == '(' || c == ')' || c == '{' || c == '|') {
04337       r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
04338       break;
04339     }
04340   }
04341 
04342   onig_scan_env_set_error_string(env, r, *src, prev);
04343   return r;
04344 }
04345 
04346 static int
04347 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
04348                     ScanEnv* env)
04349 {
04350   int r, ctype;
04351   CClassNode* cc;
04352 
04353   ctype = fetch_char_property_to_ctype(src, end, env);
04354   if (ctype < 0) return ctype;
04355 
04356   *np = node_new_cclass();
04357   CHECK_NULL_RETURN_MEMERR(*np);
04358   cc = NCCLASS(*np);
04359   r = add_ctype_to_cc(cc, ctype, 0, 1, env);
04360   if (r != 0) return r;
04361   if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
04362 
04363   return 0;
04364 }
04365 
04366 
04367 enum CCSTATE {
04368   CCS_VALUE,
04369   CCS_RANGE,
04370   CCS_COMPLETE,
04371   CCS_START
04372 };
04373 
04374 enum CCVALTYPE {
04375   CCV_SB,
04376   CCV_CODE_POINT,
04377   CCV_CLASS
04378 };
04379 
04380 static int
04381 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
04382                  enum CCSTATE* state, ScanEnv* env)
04383 {
04384   int r;
04385 
04386   if (*state == CCS_RANGE)
04387     return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
04388 
04389   if (*state == CCS_VALUE && *type != CCV_CLASS) {
04390     if (*type == CCV_SB)
04391       BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
04392     else if (*type == CCV_CODE_POINT) {
04393       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
04394       if (r < 0) return r;
04395     }
04396   }
04397 
04398   *state = CCS_VALUE;
04399   *type  = CCV_CLASS;
04400   return 0;
04401 }
04402 
04403 static int
04404 next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
04405                int* vs_israw, int v_israw,
04406                enum CCVALTYPE intype, enum CCVALTYPE* type,
04407                enum CCSTATE* state, ScanEnv* env)
04408 {
04409   int r;
04410 
04411   switch (*state) {
04412   case CCS_VALUE:
04413     if (*type == CCV_SB)
04414       BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
04415     else if (*type == CCV_CODE_POINT) {
04416       r = add_code_range(&(cc->mbuf), env, *vs, *vs);
04417       if (r < 0) return r;
04418     }
04419     break;
04420 
04421   case CCS_RANGE:
04422     if (intype == *type) {
04423       if (intype == CCV_SB) {
04424         if (*vs > 0xff || v > 0xff)
04425           return ONIGERR_INVALID_CODE_POINT_VALUE;
04426 
04427         if (*vs > v) {
04428           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04429             goto ccs_range_end;
04430           else
04431             return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04432         }
04433         bitset_set_range(env, cc->bs, (int )*vs, (int )v);
04434       }
04435       else {
04436         r = add_code_range(&(cc->mbuf), env, *vs, v);
04437         if (r < 0) return r;
04438       }
04439     }
04440     else {
04441 #if 0
04442       if (intype == CCV_CODE_POINT && *type == CCV_SB) {
04443 #endif
04444         if (*vs > v) {
04445           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04446             goto ccs_range_end;
04447           else
04448             return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04449         }
04450         bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
04451         r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
04452         if (r < 0) return r;
04453 #if 0
04454       }
04455       else
04456         return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
04457 #endif
04458     }
04459   ccs_range_end:
04460     *state = CCS_COMPLETE;
04461     break;
04462 
04463   case CCS_COMPLETE:
04464   case CCS_START:
04465     *state = CCS_VALUE;
04466     break;
04467 
04468   default:
04469     break;
04470   }
04471 
04472   *vs_israw = v_israw;
04473   *vs       = v;
04474   *type     = intype;
04475   return 0;
04476 }
04477 
04478 static int
04479 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
04480                  ScanEnv* env)
04481 {
04482   int in_esc;
04483   OnigCodePoint code;
04484   OnigEncoding enc = env->enc;
04485   UChar* p = from;
04486   PFETCH_READY;
04487 
04488   in_esc = 0;
04489   while (! PEND) {
04490     if (ignore_escaped && in_esc) {
04491       in_esc = 0;
04492     }
04493     else {
04494       PFETCH(code);
04495       if (code == c) return 1;
04496       if (code == MC_ESC(env->syntax)) in_esc = 1;
04497     }
04498   }
04499   return 0;
04500 }
04501 
04502 static int
04503 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
04504                  ScanEnv* env)
04505 {
04506   int r, neg, len, fetched, and_start;
04507   OnigCodePoint v, vs;
04508   UChar *p;
04509   Node* node;
04510   CClassNode *cc, *prev_cc;
04511   CClassNode work_cc;
04512 
04513   enum CCSTATE state;
04514   enum CCVALTYPE val_type, in_type;
04515   int val_israw, in_israw;
04516 
04517   prev_cc = (CClassNode* )NULL;
04518   *np = NULL_NODE;
04519   r = fetch_token_in_cc(tok, src, end, env);
04520   if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
04521     neg = 1;
04522     r = fetch_token_in_cc(tok, src, end, env);
04523   }
04524   else {
04525     neg = 0;
04526   }
04527 
04528   if (r < 0) return r;
04529   if (r == TK_CC_CLOSE) {
04530     if (! code_exist_check((OnigCodePoint )']',
04531                            *src, env->pattern_end, 1, env))
04532       return ONIGERR_EMPTY_CHAR_CLASS;
04533 
04534     CC_ESC_WARN(env, (UChar* )"]");
04535     r = tok->type = TK_CHAR;  /* allow []...] */
04536   }
04537 
04538   *np = node = node_new_cclass();
04539   CHECK_NULL_RETURN_MEMERR(node);
04540   cc = NCCLASS(node);
04541 
04542   and_start = 0;
04543   state = CCS_START;
04544   p = *src;
04545   while (r != TK_CC_CLOSE) {
04546     fetched = 0;
04547     switch (r) {
04548     case TK_CHAR:
04549       if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
04550           (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
04551         in_type = CCV_CODE_POINT;
04552       }
04553       else if (len < 0) {
04554         r = len;
04555         goto err;
04556       }
04557       else {
04558       sb_char:
04559         in_type = CCV_SB;
04560       }
04561       v = (OnigCodePoint )tok->u.c;
04562       in_israw = 0;
04563       goto val_entry2;
04564       break;
04565 
04566     case TK_RAW_BYTE:
04567       /* tok->base != 0 : octal or hexadec. */
04568       if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
04569         UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04570         UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
04571         UChar* psave = p;
04572         int i, base = tok->base;
04573 
04574         buf[0] = (UChar )tok->u.c;
04575         for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
04576           r = fetch_token_in_cc(tok, &p, end, env);
04577           if (r < 0) goto err;
04578           if (r != TK_RAW_BYTE || tok->base != base) {
04579             fetched = 1;
04580             break;
04581           }
04582           buf[i] = (UChar )tok->u.c;
04583         }
04584 
04585         if (i < ONIGENC_MBC_MINLEN(env->enc)) {
04586           r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04587           goto err;
04588         }
04589 
04590         len = enclen(env->enc, buf, buf+i);
04591         if (i < len) {
04592           r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04593           goto err;
04594         }
04595         else if (i > len) { /* fetch back */
04596           p = psave;
04597           for (i = 1; i < len; i++) {
04598             r = fetch_token_in_cc(tok, &p, end, env);
04599           }
04600           fetched = 0;
04601         }
04602 
04603         if (i == 1) {
04604           v = (OnigCodePoint )buf[0];
04605           goto raw_single;
04606         }
04607         else {
04608           v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
04609           in_type = CCV_CODE_POINT;
04610         }
04611       }
04612       else {
04613         v = (OnigCodePoint )tok->u.c;
04614       raw_single:
04615         in_type = CCV_SB;
04616       }
04617       in_israw = 1;
04618       goto val_entry2;
04619       break;
04620 
04621     case TK_CODE_POINT:
04622       v = tok->u.code;
04623       in_israw = 1;
04624     val_entry:
04625       len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
04626       if (len < 0) {
04627         r = len;
04628         goto err;
04629       }
04630       in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
04631     val_entry2:
04632       r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
04633                          &state, env);
04634       if (r != 0) goto err;
04635       break;
04636 
04637     case TK_POSIX_BRACKET_OPEN:
04638       r = parse_posix_bracket(cc, &p, end, env);
04639       if (r < 0) goto err;
04640       if (r == 1) {  /* is not POSIX bracket */
04641         CC_ESC_WARN(env, (UChar* )"[");
04642         p = tok->backp;
04643         v = (OnigCodePoint )tok->u.c;
04644         in_israw = 0;
04645         goto val_entry;
04646       }
04647       goto next_class;
04648       break;
04649 
04650     case TK_CHAR_TYPE:
04651       r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, 0, env);
04652       if (r != 0) return r;
04653 
04654     next_class:
04655       r = next_state_class(cc, &vs, &val_type, &state, env);
04656       if (r != 0) goto err;
04657       break;
04658 
04659     case TK_CHAR_PROPERTY:
04660       {
04661         int ctype;
04662 
04663         ctype = fetch_char_property_to_ctype(&p, end, env);
04664         if (ctype < 0) return ctype;
04665         r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 1, env);
04666         if (r != 0) return r;
04667         goto next_class;
04668       }
04669       break;
04670 
04671     case TK_CC_RANGE:
04672       if (state == CCS_VALUE) {
04673         r = fetch_token_in_cc(tok, &p, end, env);
04674         if (r < 0) goto err;
04675         fetched = 1;
04676         if (r == TK_CC_CLOSE) { /* allow [x-] */
04677         range_end_val:
04678           v = (OnigCodePoint )'-';
04679           in_israw = 0;
04680           goto val_entry;
04681         }
04682         else if (r == TK_CC_AND) {
04683           CC_ESC_WARN(env, (UChar* )"-");
04684           goto range_end_val;
04685         }
04686         state = CCS_RANGE;
04687       }
04688       else if (state == CCS_START) {
04689         /* [-xa] is allowed */
04690         v = (OnigCodePoint )tok->u.c;
04691         in_israw = 0;
04692 
04693         r = fetch_token_in_cc(tok, &p, end, env);
04694         if (r < 0) goto err;
04695         fetched = 1;
04696         /* [--x] or [a&&-x] is warned. */
04697         if (r == TK_CC_RANGE || and_start != 0)
04698           CC_ESC_WARN(env, (UChar* )"-");
04699 
04700         goto val_entry;
04701       }
04702       else if (state == CCS_RANGE) {
04703         CC_ESC_WARN(env, (UChar* )"-");
04704         goto sb_char;  /* [!--x] is allowed */
04705       }
04706       else { /* CCS_COMPLETE */
04707         r = fetch_token_in_cc(tok, &p, end, env);
04708         if (r < 0) goto err;
04709         fetched = 1;
04710         if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
04711         else if (r == TK_CC_AND) {
04712           CC_ESC_WARN(env, (UChar* )"-");
04713           goto range_end_val;
04714         }
04715 
04716         if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
04717           CC_ESC_WARN(env, (UChar* )"-");
04718           goto range_end_val;   /* [0-9-a] is allowed as [0-9\-a] */
04719         }
04720         r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
04721         goto err;
04722       }
04723       break;
04724 
04725     case TK_CC_CC_OPEN: /* [ */
04726       {
04727         Node *anode;
04728         CClassNode* acc;
04729 
04730         r = parse_char_class(&anode, tok, &p, end, env);
04731         if (r == 0) {
04732           acc = NCCLASS(anode);
04733           r = or_cclass(cc, acc, env);
04734         }
04735         onig_node_free(anode);
04736         if (r != 0) goto err;
04737       }
04738       break;
04739 
04740     case TK_CC_AND: /* && */
04741       {
04742         if (state == CCS_VALUE) {
04743           r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04744                              &val_type, &state, env);
04745           if (r != 0) goto err;
04746         }
04747         /* initialize local variables */
04748         and_start = 1;
04749         state = CCS_START;
04750 
04751         if (IS_NOT_NULL(prev_cc)) {
04752           r = and_cclass(prev_cc, cc, env);
04753           if (r != 0) goto err;
04754           bbuf_free(cc->mbuf);
04755         }
04756         else {
04757           prev_cc = cc;
04758           cc = &work_cc;
04759         }
04760         initialize_cclass(cc);
04761       }
04762       break;
04763 
04764     case TK_EOT:
04765       r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
04766       goto err;
04767       break;
04768     default:
04769       r = ONIGERR_PARSER_BUG;
04770       goto err;
04771       break;
04772     }
04773 
04774     if (fetched)
04775       r = tok->type;
04776     else {
04777       r = fetch_token_in_cc(tok, &p, end, env);
04778       if (r < 0) goto err;
04779     }
04780   }
04781 
04782   if (state == CCS_VALUE) {
04783     r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04784                        &val_type, &state, env);
04785     if (r != 0) goto err;
04786   }
04787 
04788   if (IS_NOT_NULL(prev_cc)) {
04789     r = and_cclass(prev_cc, cc, env);
04790     if (r != 0) goto err;
04791     bbuf_free(cc->mbuf);
04792     cc = prev_cc;
04793   }
04794 
04795   if (neg != 0)
04796     NCCLASS_SET_NOT(cc);
04797   else
04798     NCCLASS_CLEAR_NOT(cc);
04799   if (IS_NCCLASS_NOT(cc) &&
04800       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
04801     int is_empty;
04802 
04803     is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
04804     if (is_empty != 0)
04805       BITSET_IS_EMPTY(cc->bs, is_empty);
04806 
04807     if (is_empty == 0) {
04808 #define NEWLINE_CODE    0x0a
04809 
04810       if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
04811         if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
04812           BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
04813         else {
04814           r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
04815           if (r < 0) goto err;
04816         }
04817       }
04818     }
04819   }
04820   *src = p;
04821   return 0;
04822 
04823  err:
04824   if (cc != NCCLASS(*np))
04825     bbuf_free(cc->mbuf);
04826   return r;
04827 }
04828 
04829 static int parse_subexp(Node** top, OnigToken* tok, int term,
04830                         UChar** src, UChar* end, ScanEnv* env);
04831 
04832 static int
04833 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
04834               ScanEnv* env)
04835 {
04836   int r = 0, num;
04837   Node *target, *work1 = NULL, *work2 = NULL;
04838   OnigOptionType option;
04839   OnigCodePoint c;
04840   OnigEncoding enc = env->enc;
04841 
04842 #ifdef USE_NAMED_GROUP
04843   int list_capture;
04844 #endif
04845 
04846   UChar* p = *src;
04847   PFETCH_READY;
04848 
04849   *np = NULL;
04850   if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
04851 
04852   option = env->option;
04853   if (PPEEK_IS('?') &&
04854       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
04855     PINC;
04856     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04857 
04858     PFETCH(c);
04859     switch (c) {
04860     case ':':   /* (?:...) grouping only */
04861     group:
04862       r = fetch_token(tok, &p, end, env);
04863       if (r < 0) return r;
04864       r = parse_subexp(np, tok, term, &p, end, env);
04865       if (r < 0) return r;
04866       *src = p;
04867       return 1; /* group */
04868       break;
04869 
04870     case '=':
04871       *np = onig_node_new_anchor(ANCHOR_PREC_READ);
04872       break;
04873     case '!':   /* preceding read */
04874       *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
04875       break;
04876     case '>':   /* (?>...) stop backtrack */
04877       *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
04878       break;
04879 
04880 #ifdef USE_NAMED_GROUP
04881     case '\'':
04882       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04883         goto named_group1;
04884       }
04885       else
04886         return ONIGERR_UNDEFINED_GROUP_OPTION;
04887       break;
04888 
04889 #ifdef USE_CAPITAL_P_NAMED_GROUP
04890     case 'P':   /* (?P<name>...) */
04891       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) {
04892         PFETCH(c);
04893         if (c == '<') goto named_group1;
04894       }
04895       return ONIGERR_UNDEFINED_GROUP_OPTION;
04896       break;
04897 #endif
04898 #endif
04899 
04900     case '<':   /* look behind (?<=...), (?<!...) */
04901       PFETCH(c);
04902       if (c == '=')
04903         *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
04904       else if (c == '!')
04905         *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
04906 #ifdef USE_NAMED_GROUP
04907       else {    /* (?<name>...) */
04908         if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04909           UChar *name;
04910           UChar *name_end;
04911 
04912           PUNFETCH;
04913           c = '<';
04914 
04915         named_group1:
04916           list_capture = 0;
04917 
04918         named_group2:
04919           name = p;
04920           r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
04921           if (r < 0) return r;
04922 
04923           num = scan_env_add_mem_entry(env);
04924           if (num < 0) return num;
04925           if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
04926             return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04927 
04928           r = name_add(env->reg, name, name_end, num, env);
04929           if (r != 0) return r;
04930           *np = node_new_enclose_memory(env->option, 1);
04931           CHECK_NULL_RETURN_MEMERR(*np);
04932           NENCLOSE(*np)->regnum = num;
04933           if (list_capture != 0)
04934             BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04935           env->num_named++;
04936         }
04937         else {
04938           return ONIGERR_UNDEFINED_GROUP_OPTION;
04939         }
04940       }
04941 #else
04942       else {
04943         return ONIGERR_UNDEFINED_GROUP_OPTION;
04944       }
04945 #endif
04946       break;
04947 
04948     case '@':
04949       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
04950 #ifdef USE_NAMED_GROUP
04951         if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04952           PFETCH(c);
04953           if (c == '<' || c == '\'') {
04954             list_capture = 1;
04955             goto named_group2; /* (?@<name>...) */
04956           }
04957           PUNFETCH;
04958         }
04959 #endif
04960         *np = node_new_enclose_memory(env->option, 0);
04961         CHECK_NULL_RETURN_MEMERR(*np);
04962         num = scan_env_add_mem_entry(env);
04963         if (num < 0) {
04964           onig_node_free(*np);
04965           return num;
04966         }
04967         else if (num >= (int )BIT_STATUS_BITS_NUM) {
04968           onig_node_free(*np);
04969           return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04970         }
04971         NENCLOSE(*np)->regnum = num;
04972         BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04973       }
04974       else {
04975         return ONIGERR_UNDEFINED_GROUP_OPTION;
04976       }
04977       break;
04978 
04979     case '(':   /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
04980       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)) {
04981         UChar *name = NULL;
04982         UChar *name_end;
04983         PFETCH(c);
04984         if (ONIGENC_IS_CODE_DIGIT(enc, c)) {     /* (n) */
04985           PUNFETCH;
04986           r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1);
04987           if (r < 0) return r;
04988           if (num < 0) {
04989             num = BACKREF_REL_TO_ABS(num, env);
04990             if (num <= 0)
04991               return ONIGERR_INVALID_BACKREF;
04992           }
04993           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
04994             if (num > env->num_mem ||
04995                 IS_NULL(SCANENV_MEM_NODES(env)[num]))
04996             return ONIGERR_INVALID_BACKREF;
04997           }
04998         }
04999 #ifdef USE_NAMED_GROUP
05000         else if (c == '<' || c == '\'') {    /* (<name>), ('name') */
05001           int nums;
05002           int *backs;
05003 
05004           name = p;
05005           r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
05006           if (r < 0) return r;
05007           PFETCH(c);
05008           if (c != ')') return ONIGERR_UNDEFINED_GROUP_OPTION;
05009 
05010           nums = onig_name_to_group_numbers(env->reg, name, name_end, &backs);
05011           if (nums <= 0) {
05012             onig_scan_env_set_error_string(env,
05013                      ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
05014             return ONIGERR_UNDEFINED_NAME_REFERENCE;
05015           }
05016           if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
05017             int i;
05018             for (i = 0; i < nums; i++) {
05019               if (backs[i] > env->num_mem ||
05020                   IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
05021               return ONIGERR_INVALID_BACKREF;
05022             }
05023           }
05024           num = backs[0];       /* XXX: use left most named group as Perl */
05025         }
05026 #endif
05027         else
05028           return ONIGERR_INVALID_CONDITION_PATTERN;
05029         *np = node_new_enclose(ENCLOSE_CONDITION);
05030         CHECK_NULL_RETURN_MEMERR(*np);
05031         NENCLOSE(*np)->regnum = num;
05032         if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF;
05033       }
05034       else
05035         return ONIGERR_UNDEFINED_GROUP_OPTION;
05036       break;
05037 
05038 #if 0
05039     case '|':   /* branch reset: (?|...) */
05040       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET)) {
05041         /* TODO */
05042       }
05043       else
05044         return ONIGERR_UNDEFINED_GROUP_OPTION;
05045       break;
05046 #endif
05047 
05048     case '^':   /* loads default options */
05049       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
05050         /* d-imsx */
05051         ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
05052         ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
05053         ONOFF(option, ONIG_OPTION_SINGLELINE, 0);
05054         ONOFF(option, ONIG_OPTION_MULTILINE,  1);
05055         ONOFF(option, ONIG_OPTION_EXTEND, 1);
05056         PFETCH(c);
05057       }
05058 #if 0
05059       else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
05060         /* d-imx */
05061         ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
05062         ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0);
05063         ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0);
05064         ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
05065         ONOFF(option, ONIG_OPTION_MULTILINE,  1);
05066         ONOFF(option, ONIG_OPTION_EXTEND, 1);
05067         PFETCH(c);
05068       }
05069 #endif
05070       else {
05071         return ONIGERR_UNDEFINED_GROUP_OPTION;
05072       }
05073       /* fall through */
05074 #ifdef USE_POSIXLINE_OPTION
05075     case 'p':
05076 #endif
05077     case '-': case 'i': case 'm': case 's': case 'x':
05078     case 'a': case 'd': case 'l': case 'u':
05079       {
05080         int neg = 0;
05081 
05082         while (1) {
05083           switch (c) {
05084           case ':':
05085           case ')':
05086           break;
05087 
05088           case '-':  neg = 1; break;
05089           case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break;
05090           case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
05091           case 's':
05092             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
05093               ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
05094             }
05095             else
05096               return ONIGERR_UNDEFINED_GROUP_OPTION;
05097             break;
05098 
05099           case 'm':
05100             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
05101               ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
05102             }
05103             else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
05104               ONOFF(option, ONIG_OPTION_MULTILINE,  neg);
05105             }
05106             else
05107               return ONIGERR_UNDEFINED_GROUP_OPTION;
05108             break;
05109 #ifdef USE_POSIXLINE_OPTION
05110           case 'p':
05111             ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
05112             break;
05113 #endif
05114 
05115           case 'a':     /* limits \d, \s, \w and POSIX brackets to ASCII range */
05116             if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) ||
05117                  IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) &&
05118                 (neg == 0)) {
05119               ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
05120               ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1);
05121               ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1);
05122             }
05123             else
05124               return ONIGERR_UNDEFINED_GROUP_OPTION;
05125             break;
05126 
05127           case 'u':
05128             if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) ||
05129                  IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) &&
05130                 (neg == 0)) {
05131               ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
05132               ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1);
05133               ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1);
05134             }
05135             else
05136               return ONIGERR_UNDEFINED_GROUP_OPTION;
05137             break;
05138 
05139           case 'd':
05140             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) &&
05141                 (neg == 0)) {
05142               ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
05143             }
05144             else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) &&
05145                 (neg == 0)) {
05146               ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
05147               ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0);
05148               ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0);
05149             }
05150             else
05151               return ONIGERR_UNDEFINED_GROUP_OPTION;
05152             break;
05153 
05154           case 'l':
05155             if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) {
05156               ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
05157             }
05158             else
05159               return ONIGERR_UNDEFINED_GROUP_OPTION;
05160             break;
05161 
05162           default:
05163             return ONIGERR_UNDEFINED_GROUP_OPTION;
05164           }
05165 
05166           if (c == ')') {
05167             *np = node_new_option(option);
05168             CHECK_NULL_RETURN_MEMERR(*np);
05169             *src = p;
05170             return 2; /* option only */
05171           }
05172           else if (c == ':') {
05173             OnigOptionType prev = env->option;
05174 
05175             env->option     = option;
05176             r = fetch_token(tok, &p, end, env);
05177             if (r < 0) return r;
05178             r = parse_subexp(&target, tok, term, &p, end, env);
05179             env->option = prev;
05180             if (r < 0) return r;
05181             *np = node_new_option(option);
05182             CHECK_NULL_RETURN_MEMERR(*np);
05183             NENCLOSE(*np)->target = target;
05184             *src = p;
05185             return 0;
05186           }
05187 
05188           if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
05189           PFETCH(c);
05190         }
05191       }
05192       break;
05193 
05194     default:
05195       return ONIGERR_UNDEFINED_GROUP_OPTION;
05196     }
05197   }
05198   else {
05199     if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
05200       goto group;
05201 
05202     *np = node_new_enclose_memory(env->option, 0);
05203     CHECK_NULL_RETURN_MEMERR(*np);
05204     num = scan_env_add_mem_entry(env);
05205     if (num < 0) return num;
05206     NENCLOSE(*np)->regnum = num;
05207   }
05208 
05209   CHECK_NULL_RETURN_MEMERR(*np);
05210   r = fetch_token(tok, &p, end, env);
05211   if (r < 0) return r;
05212   r = parse_subexp(&target, tok, term, &p, end, env);
05213   if (r < 0) {
05214     onig_node_free(target);
05215     return r;
05216   }
05217 
05218   if (NTYPE(*np) == NT_ANCHOR)
05219     NANCHOR(*np)->target = target;
05220   else {
05221     NENCLOSE(*np)->target = target;
05222     if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
05223       /* Don't move this to previous of parse_subexp() */
05224       r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
05225       if (r != 0) return r;
05226     }
05227     else if (NENCLOSE(*np)->type == ENCLOSE_CONDITION) {
05228       if (NTYPE(target) != NT_ALT) {
05229         /* convert (?(cond)yes) to (?(cond)yes|empty) */
05230         work1 = node_new_empty();
05231         if (IS_NULL(work1)) goto err;
05232         work2 = onig_node_new_alt(work1, NULL_NODE);
05233         if (IS_NULL(work2)) goto err;
05234         work1 = onig_node_new_alt(target, work2);
05235         if (IS_NULL(work1)) goto err;
05236         NENCLOSE(*np)->target = work1;
05237       }
05238     }
05239   }
05240 
05241   *src = p;
05242   return 0;
05243 
05244  err:
05245   onig_node_free(work1);
05246   onig_node_free(work2);
05247   onig_node_free(*np);
05248   *np = NULL;
05249   return ONIGERR_MEMORY;
05250 }
05251 
05252 static const char* const PopularQStr[] = {
05253   "?", "*", "+", "??", "*?", "+?"
05254 };
05255 
05256 static const char* const ReduceQStr[] = {
05257   "", "", "*", "*?", "??", "+ and ??", "+? and ?"
05258 };
05259 
05260 static int
05261 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
05262 {
05263   QtfrNode* qn;
05264 
05265   qn = NQTFR(qnode);
05266   if (qn->lower == 1 && qn->upper == 1) {
05267     return 1;
05268   }
05269 
05270   switch (NTYPE(target)) {
05271   case NT_STR:
05272     if (! group) {
05273       StrNode* sn = NSTR(target);
05274       if (str_node_can_be_split(sn, env->enc)) {
05275         Node* n = str_node_split_last_char(sn, env->enc);
05276         if (IS_NOT_NULL(n)) {
05277           qn->target = n;
05278           return 2;
05279         }
05280       }
05281     }
05282     break;
05283 
05284   case NT_QTFR:
05285     { /* check redundant double repeat. */
05286       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
05287       QtfrNode* qnt   = NQTFR(target);
05288       int nestq_num   = popular_quantifier_num(qn);
05289       int targetq_num = popular_quantifier_num(qnt);
05290 
05291 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
05292       if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
05293           IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
05294         UChar buf[WARN_BUFSIZE];
05295 
05296         switch (ReduceTypeTable[targetq_num][nestq_num]) {
05297         case RQ_ASIS:
05298           break;
05299 
05300         case RQ_DEL:
05301           if (onig_verb_warn != onig_null_warn) {
05302             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
05303                                  env->pattern, env->pattern_end,
05304                                  (UChar* )"redundant nested repeat operator");
05305             (*onig_verb_warn)((char* )buf);
05306           }
05307           goto warn_exit;
05308           break;
05309 
05310         default:
05311           if (onig_verb_warn != onig_null_warn) {
05312             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
05313                                        env->pattern, env->pattern_end,
05314             (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
05315             PopularQStr[targetq_num], PopularQStr[nestq_num],
05316             ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
05317             (*onig_verb_warn)((char* )buf);
05318           }
05319           goto warn_exit;
05320           break;
05321         }
05322       }
05323 
05324     warn_exit:
05325 #endif
05326       if (targetq_num >= 0) {
05327         if (nestq_num >= 0) {
05328           onig_reduce_nested_quantifier(qnode, target);
05329           goto q_exit;
05330         }
05331         else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
05332           /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
05333           if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
05334             qn->upper = (qn->lower == 0 ? 1 : qn->lower);
05335           }
05336         }
05337       }
05338     }
05339     break;
05340 
05341   default:
05342     break;
05343   }
05344 
05345   qn->target = target;
05346  q_exit:
05347   return 0;
05348 }
05349 
05350 
05351 #ifdef USE_SHARED_CCLASS_TABLE
05352 
05353 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS     8
05354 
05355 /* for ctype node hash table */
05356 
05357 typedef struct {
05358   OnigEncoding enc;
05359   int not;
05360   int type;
05361 } type_cclass_key;
05362 
05363 static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
05364 {
05365   if (x->type != y->type) return 1;
05366   if (x->enc  != y->enc)  return 1;
05367   if (x->not  != y->not)  return 1;
05368   return 0;
05369 }
05370 
05371 static st_index_t type_cclass_hash(type_cclass_key* key)
05372 {
05373   int i, val;
05374   UChar *p;
05375 
05376   val = 0;
05377 
05378   p = (UChar* )&(key->enc);
05379   for (i = 0; i < (int )sizeof(key->enc); i++) {
05380     val = val * 997 + (int )*p++;
05381   }
05382 
05383   p = (UChar* )(&key->type);
05384   for (i = 0; i < (int )sizeof(key->type); i++) {
05385     val = val * 997 + (int )*p++;
05386   }
05387 
05388   val += key->not;
05389   return val + (val >> 5);
05390 }
05391 
05392 static const struct st_hash_type type_type_cclass_hash = {
05393     type_cclass_cmp,
05394     type_cclass_hash,
05395 };
05396 
05397 static st_table* OnigTypeCClassTable;
05398 
05399 
05400 static int
05401 i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
05402 {
05403   if (IS_NOT_NULL(node)) {
05404     CClassNode* cc = NCCLASS(node);
05405     if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
05406     xfree(node);
05407   }
05408 
05409   if (IS_NOT_NULL(key)) xfree(key);
05410   return ST_DELETE;
05411 }
05412 
05413 extern int
05414 onig_free_shared_cclass_table(void)
05415 {
05416   THREAD_ATOMIC_START;
05417   if (IS_NOT_NULL(OnigTypeCClassTable)) {
05418     onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
05419     onig_st_free_table(OnigTypeCClassTable);
05420     OnigTypeCClassTable = NULL;
05421   }
05422   THREAD_ATOMIC_END;
05423 
05424   return 0;
05425 }
05426 
05427 #endif /* USE_SHARED_CCLASS_TABLE */
05428 
05429 
05430 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05431 static int
05432 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
05433 {
05434   BBuf *tbuf;
05435   int r;
05436 
05437   if (IS_NCCLASS_NOT(cc)) {
05438     bitset_invert(cc->bs);
05439 
05440     if (! ONIGENC_IS_SINGLEBYTE(enc)) {
05441       r = not_code_range_buf(enc, cc->mbuf, &tbuf);
05442       if (r != 0) return r;
05443 
05444       bbuf_free(cc->mbuf);
05445       cc->mbuf = tbuf;
05446     }
05447 
05448     NCCLASS_CLEAR_NOT(cc);
05449   }
05450 
05451   return 0;
05452 }
05453 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
05454 
05455 typedef struct {
05456   ScanEnv*    env;
05457   CClassNode* cc;
05458   Node*       alt_root;
05459   Node**      ptail;
05460 } IApplyCaseFoldArg;
05461 
05462 static int
05463 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
05464                   int to_len, void* arg)
05465 {
05466   IApplyCaseFoldArg* iarg;
05467   ScanEnv* env;
05468   CClassNode* cc;
05469   BitSetRef bs;
05470 
05471   iarg = (IApplyCaseFoldArg* )arg;
05472   env = iarg->env;
05473   cc  = iarg->cc;
05474   bs = cc->bs;
05475 
05476   if (to_len == 1) {
05477     int is_in = onig_is_code_in_cc(env->enc, from, cc);
05478 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05479     if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
05480         (is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
05481       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
05482         add_code_range0(&(cc->mbuf), env, *to, *to, 0);
05483       }
05484       else {
05485         BITSET_SET_BIT(bs, *to);
05486       }
05487     }
05488 #else
05489     if (is_in != 0) {
05490       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
05491         if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
05492         add_code_range0(&(cc->mbuf), env, *to, *to, 0);
05493       }
05494       else {
05495         if (IS_NCCLASS_NOT(cc)) {
05496           BITSET_CLEAR_BIT(bs, *to);
05497         }
05498         else
05499           BITSET_SET_BIT(bs, *to);
05500       }
05501     }
05502 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
05503   }
05504   else {
05505     int r, i, len;
05506     UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
05507     Node *snode = NULL_NODE;
05508 
05509     if (onig_is_code_in_cc(env->enc, from, cc)
05510 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05511         && !IS_NCCLASS_NOT(cc)
05512 #endif
05513         ) {
05514       for (i = 0; i < to_len; i++) {
05515         len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
05516         if (i == 0) {
05517           snode = onig_node_new_str(buf, buf + len);
05518           CHECK_NULL_RETURN_MEMERR(snode);
05519 
05520           /* char-class expanded multi-char only
05521              compare with string folded at match time. */
05522           NSTRING_SET_AMBIG(snode);
05523         }
05524         else {
05525           r = onig_node_str_cat(snode, buf, buf + len);
05526           if (r < 0) {
05527             onig_node_free(snode);
05528             return r;
05529           }
05530         }
05531       }
05532 
05533       *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
05534       CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
05535       iarg->ptail = &(NCDR((*(iarg->ptail))));
05536     }
05537   }
05538 
05539   return 0;
05540 }
05541 
05542 static int
05543 node_linebreak(Node** np, ScanEnv* env)
05544 {
05545   /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
05546   Node* left = NULL;
05547   Node* right = NULL;
05548   Node* target1 = NULL;
05549   Node* target2 = NULL;
05550   CClassNode* cc;
05551   int num1, num2;
05552   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
05553 
05554   /* \x0D\x0A */
05555   num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
05556   if (num1 < 0) return num1;
05557   num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
05558   if (num2 < 0) return num2;
05559   left = node_new_str_raw(buf, buf + num1 + num2);
05560   if (IS_NULL(left)) goto err;
05561 
05562   /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
05563   right = node_new_cclass();
05564   if (IS_NULL(right)) goto err;
05565   cc = NCCLASS(right);
05566   if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
05567     add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);
05568   }
05569   else {
05570     bitset_set_range(env, cc->bs, 0x0A, 0x0D);
05571   }
05572 
05573   /* TODO: move this block to enc/unicode.c */
05574   if (ONIGENC_IS_UNICODE(env->enc)) {
05575     /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
05576     add_code_range(&(cc->mbuf), env, 0x85, 0x85);
05577     add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
05578   }
05579 
05580   /* ...|... */
05581   target1 = onig_node_new_alt(right, NULL_NODE);
05582   if (IS_NULL(target1)) goto err;
05583   right = NULL;
05584   target2 = onig_node_new_alt(left, target1);
05585   if (IS_NULL(target2)) goto err;
05586   left = NULL;
05587   target1 = NULL;
05588 
05589   /* (?>...) */
05590   *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
05591   if (IS_NULL(*np)) goto err;
05592   NENCLOSE(*np)->target = target2;
05593   return ONIG_NORMAL;
05594 
05595  err:
05596   onig_node_free(left);
05597   onig_node_free(right);
05598   onig_node_free(target1);
05599   onig_node_free(target2);
05600   return ONIGERR_MEMORY;
05601 }
05602 
05603 static int
05604 node_extended_grapheme_cluster(Node** np, ScanEnv* env)
05605 {
05606   /* same as (?>\P{M}\p{M}*) */
05607   Node* np1 = NULL;
05608   Node* np2 = NULL;
05609   Node* qn = NULL;
05610   Node* list1 = NULL;
05611   Node* list2 = NULL;
05612   int r = 0;
05613 
05614 #ifdef USE_UNICODE_PROPERTIES
05615   if (ONIGENC_IS_UNICODE(env->enc)) {
05616     /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
05617     CClassNode* cc1;
05618     CClassNode* cc2;
05619     UChar* propname = (UChar* )"M";
05620     int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII,
05621         propname, propname + 1);
05622     if (ctype >= 0) {
05623       /* \P{M} */
05624       np1 = node_new_cclass();
05625       if (IS_NULL(np1)) goto err;
05626       cc1 = NCCLASS(np1);
05627       r = add_ctype_to_cc(cc1, ctype, 0, 1, env);
05628       if (r != 0) goto err;
05629       NCCLASS_SET_NOT(cc1);
05630 
05631       /* \p{M}* */
05632       np2 = node_new_cclass();
05633       if (IS_NULL(np2)) goto err;
05634       cc2 = NCCLASS(np2);
05635       r = add_ctype_to_cc(cc2, ctype, 0, 1, env);
05636       if (r != 0) goto err;
05637 
05638       qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
05639       if (IS_NULL(qn)) goto err;
05640       NQTFR(qn)->target = np2;
05641       np2 = NULL;
05642 
05643       /* \P{M}\p{M}* */
05644       list2 = node_new_list(qn, NULL_NODE);
05645       if (IS_NULL(list2)) goto err;
05646       qn = NULL;
05647       list1 = node_new_list(np1, list2);
05648       if (IS_NULL(list1)) goto err;
05649       np1 = NULL;
05650       list2 = NULL;
05651 
05652       /* (?>...) */
05653       *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
05654       if (IS_NULL(*np)) goto err;
05655       NENCLOSE(*np)->target = list1;
05656       return ONIG_NORMAL;
05657     }
05658   }
05659 #endif /* USE_UNICODE_PROPERTIES */
05660   if (IS_NULL(*np)) {
05661     /* PerlSyntax: (?s:.), RubySyntax: (?m:.) */
05662     OnigOptionType option;
05663     np1 = node_new_anychar();
05664     if (IS_NULL(np1)) goto err;
05665 
05666     option = env->option;
05667     ONOFF(option, ONIG_OPTION_MULTILINE, 0);
05668     *np = node_new_option(option);
05669     if (IS_NULL(*np)) goto err;
05670     NENCLOSE(*np)->target = np1;
05671   }
05672   return ONIG_NORMAL;
05673 
05674  err:
05675   onig_node_free(np1);
05676   onig_node_free(np2);
05677   onig_node_free(qn);
05678   onig_node_free(list1);
05679   onig_node_free(list2);
05680   return (r == 0) ? ONIGERR_MEMORY : r;
05681 }
05682 
05683 static int
05684 countbits(unsigned int bits)
05685 {
05686   bits = (bits & 0x55555555) + ((bits >> 1) & 0x55555555);
05687   bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
05688   bits = (bits & 0x0f0f0f0f) + ((bits >> 4) & 0x0f0f0f0f);
05689   bits = (bits & 0x00ff00ff) + ((bits >> 8) & 0x00ff00ff);
05690   return (bits & 0x0000ffff) + ((bits >>16) & 0x0000ffff);
05691 }
05692 
05693 static int
05694 is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
05695 {
05696   const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;
05697   OnigCodePoint c = not_found;
05698   int i;
05699   BBuf *bbuf = cc->mbuf;
05700 
05701   if (IS_NCCLASS_NOT(cc)) return 0;
05702 
05703   /* check bbuf */
05704   if (IS_NOT_NULL(bbuf)) {
05705     OnigCodePoint n, *data;
05706     GET_CODE_POINT(n, bbuf->p);
05707     data = (OnigCodePoint* )(bbuf->p) + 1;
05708     if ((n == 1) && (data[0] == data[1])) {
05709       /* only one char found in the bbuf, save the code point. */
05710       c = data[0];
05711       if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) {
05712         /* skip if c is included in the bitset */
05713         c = not_found;
05714       }
05715     }
05716     else {
05717       return 0;  /* the bbuf contains multiple chars */
05718     }
05719   }
05720 
05721   /* check bitset */
05722   for (i = 0; i < BITSET_SIZE; i++) {
05723     Bits b1 = cc->bs[i];
05724     if (b1 != 0) {
05725       if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
05726         c = BITS_IN_ROOM * i + countbits(b1 - 1);
05727       } else {
05728         return 0;  /* the character class contains multiple chars */
05729       }
05730     }
05731   }
05732 
05733   if (c != not_found) {
05734     *code = c;
05735     return 1;
05736   }
05737 
05738   /* the character class contains no char. */
05739   return 0;
05740 }
05741 
05742 
05743 static int
05744 parse_exp(Node** np, OnigToken* tok, int term,
05745           UChar** src, UChar* end, ScanEnv* env)
05746 {
05747   int r, len, group = 0;
05748   Node* qn;
05749   Node** targetp;
05750 
05751   *np = NULL;
05752   if (tok->type == (enum TokenSyms )term)
05753     goto end_of_token;
05754 
05755   switch (tok->type) {
05756   case TK_ALT:
05757   case TK_EOT:
05758   end_of_token:
05759     *np = node_new_empty();
05760     return tok->type;
05761     break;
05762 
05763   case TK_SUBEXP_OPEN:
05764     r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
05765     if (r < 0) return r;
05766     if (r == 1) group = 1;
05767     else if (r == 2) { /* option only */
05768       Node* target;
05769       OnigOptionType prev = env->option;
05770 
05771       env->option = NENCLOSE(*np)->option;
05772       r = fetch_token(tok, src, end, env);
05773       if (r < 0) return r;
05774       r = parse_subexp(&target, tok, term, src, end, env);
05775       env->option = prev;
05776       if (r < 0) {
05777         onig_node_free(target);
05778         return r;
05779       }
05780       NENCLOSE(*np)->target = target;
05781       return tok->type;
05782     }
05783     break;
05784 
05785   case TK_SUBEXP_CLOSE:
05786     if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
05787       return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
05788 
05789     if (tok->escaped) goto tk_raw_byte;
05790     else goto tk_byte;
05791     break;
05792 
05793   case TK_LINEBREAK:
05794     r = node_linebreak(np, env);
05795     if (r < 0) return r;
05796     break;
05797 
05798   case TK_EXTENDED_GRAPHEME_CLUSTER:
05799     r = node_extended_grapheme_cluster(np, env);
05800     if (r < 0) return r;
05801     break;
05802 
05803   case TK_KEEP:
05804     *np = onig_node_new_anchor(ANCHOR_KEEP);
05805     CHECK_NULL_RETURN_MEMERR(*np);
05806     break;
05807 
05808   case TK_STRING:
05809   tk_byte:
05810     {
05811       *np = node_new_str(tok->backp, *src);
05812       CHECK_NULL_RETURN_MEMERR(*np);
05813 
05814     string_loop:
05815       while (1) {
05816         r = fetch_token(tok, src, end, env);
05817         if (r < 0) return r;
05818         if (r == TK_STRING) {
05819           r = onig_node_str_cat(*np, tok->backp, *src);
05820         }
05821 #ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
05822         else if (r == TK_CODE_POINT) {
05823           r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
05824         }
05825 #endif
05826         else {
05827           break;
05828         }
05829         if (r < 0) return r;
05830       }
05831 
05832     string_end:
05833       targetp = np;
05834       goto repeat;
05835     }
05836     break;
05837 
05838   case TK_RAW_BYTE:
05839   tk_raw_byte:
05840     {
05841       *np = node_new_str_raw_char((UChar )tok->u.c);
05842       CHECK_NULL_RETURN_MEMERR(*np);
05843       len = 1;
05844       while (1) {
05845         if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
05846           if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
05847             r = fetch_token(tok, src, end, env);
05848             NSTRING_CLEAR_RAW(*np);
05849             goto string_end;
05850           }
05851         }
05852 
05853         r = fetch_token(tok, src, end, env);
05854         if (r < 0) return r;
05855         if (r != TK_RAW_BYTE) {
05856           /* Don't use this, it is wrong for little endian encodings. */
05857 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
05858           int rem;
05859           if (len < ONIGENC_MBC_MINLEN(env->enc)) {
05860             rem = ONIGENC_MBC_MINLEN(env->enc) - len;
05861             (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
05862             if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
05863               NSTRING_CLEAR_RAW(*np);
05864               goto string_end;
05865             }
05866           }
05867 #endif
05868           return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
05869         }
05870 
05871         r = node_str_cat_char(*np, (UChar )tok->u.c);
05872         if (r < 0) return r;
05873 
05874         len++;
05875       }
05876     }
05877     break;
05878 
05879   case TK_CODE_POINT:
05880     {
05881       *np = node_new_empty();
05882       CHECK_NULL_RETURN_MEMERR(*np);
05883       r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
05884       if (r != 0) return r;
05885 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
05886       NSTRING_SET_RAW(*np);
05887 #else
05888       goto string_loop;
05889 #endif
05890     }
05891     break;
05892 
05893   case TK_QUOTE_OPEN:
05894     {
05895       OnigCodePoint end_op[2];
05896       UChar *qstart, *qend, *nextp;
05897 
05898       end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
05899       end_op[1] = (OnigCodePoint )'E';
05900       qstart = *src;
05901       qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
05902       if (IS_NULL(qend)) {
05903         nextp = qend = end;
05904       }
05905       *np = node_new_str(qstart, qend);
05906       CHECK_NULL_RETURN_MEMERR(*np);
05907       *src = nextp;
05908     }
05909     break;
05910 
05911   case TK_CHAR_TYPE:
05912     {
05913       switch (tok->u.prop.ctype) {
05914       case ONIGENC_CTYPE_WORD:
05915         *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not,
05916                              IS_ASCII_RANGE(env->option));
05917         CHECK_NULL_RETURN_MEMERR(*np);
05918         break;
05919 
05920       case ONIGENC_CTYPE_SPACE:
05921       case ONIGENC_CTYPE_DIGIT:
05922       case ONIGENC_CTYPE_XDIGIT:
05923         {
05924           CClassNode* cc;
05925 
05926 #ifdef USE_SHARED_CCLASS_TABLE
05927           const OnigCodePoint *mbr;
05928           OnigCodePoint sb_out;
05929 
05930           r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
05931                                            &sb_out, &mbr);
05932           if (r == 0 &&
05933               ! IS_ASCII_RANGE(env->option) &&
05934               ONIGENC_CODE_RANGE_NUM(mbr)
05935               >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
05936             type_cclass_key  key;
05937             type_cclass_key* new_key;
05938 
05939             key.enc  = env->enc;
05940             key.not  = tok->u.prop.not;
05941             key.type = tok->u.prop.ctype;
05942 
05943             THREAD_ATOMIC_START;
05944 
05945             if (IS_NULL(OnigTypeCClassTable)) {
05946               OnigTypeCClassTable
05947                 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
05948               if (IS_NULL(OnigTypeCClassTable)) {
05949                 THREAD_ATOMIC_END;
05950                 return ONIGERR_MEMORY;
05951               }
05952             }
05953             else {
05954               if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
05955                                  (st_data_t* )np)) {
05956                 THREAD_ATOMIC_END;
05957                 break;
05958               }
05959             }
05960 
05961             *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
05962                                                      sb_out, mbr);
05963             if (IS_NULL(*np)) {
05964               THREAD_ATOMIC_END;
05965               return ONIGERR_MEMORY;
05966             }
05967 
05968             cc = NCCLASS(*np);
05969             NCCLASS_SET_SHARE(cc);
05970             new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
05971             xmemcpy(new_key, &key, sizeof(type_cclass_key));
05972             onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
05973                                (st_data_t )*np);
05974 
05975             THREAD_ATOMIC_END;
05976           }
05977           else {
05978 #endif
05979             *np = node_new_cclass();
05980             CHECK_NULL_RETURN_MEMERR(*np);
05981             cc = NCCLASS(*np);
05982             r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 0, env);
05983             if (r != 0) return r;
05984             if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
05985 #ifdef USE_SHARED_CCLASS_TABLE
05986           }
05987 #endif
05988         }
05989         break;
05990 
05991       default:
05992         return ONIGERR_PARSER_BUG;
05993         break;
05994       }
05995     }
05996     break;
05997 
05998   case TK_CHAR_PROPERTY:
05999     r = parse_char_property(np, tok, src, end, env);
06000     if (r != 0) return r;
06001     break;
06002 
06003   case TK_CC_OPEN:
06004     {
06005       CClassNode* cc;
06006       OnigCodePoint code;
06007 
06008       r = parse_char_class(np, tok, src, end, env);
06009       if (r != 0) return r;
06010 
06011       cc = NCCLASS(*np);
06012       if (is_onechar_cclass(cc, &code)) {
06013         onig_node_free(*np);
06014         *np = node_new_empty();
06015         CHECK_NULL_RETURN_MEMERR(*np);
06016         r = node_str_cat_codepoint(*np, env->enc, code);
06017         if (r != 0) return r;
06018         goto string_loop;
06019       }
06020       if (IS_IGNORECASE(env->option)) {
06021         IApplyCaseFoldArg iarg;
06022 
06023         iarg.env      = env;
06024         iarg.cc       = cc;
06025         iarg.alt_root = NULL_NODE;
06026         iarg.ptail    = &(iarg.alt_root);
06027 
06028         r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
06029                                         i_apply_case_fold, &iarg);
06030         if (r != 0) {
06031           onig_node_free(iarg.alt_root);
06032           return r;
06033         }
06034         if (IS_NOT_NULL(iarg.alt_root)) {
06035           Node* work = onig_node_new_alt(*np, iarg.alt_root);
06036           if (IS_NULL(work)) {
06037             onig_node_free(iarg.alt_root);
06038             return ONIGERR_MEMORY;
06039           }
06040           *np = work;
06041         }
06042       }
06043     }
06044     break;
06045 
06046   case TK_ANYCHAR:
06047     *np = node_new_anychar();
06048     CHECK_NULL_RETURN_MEMERR(*np);
06049     break;
06050 
06051   case TK_ANYCHAR_ANYTIME:
06052     *np = node_new_anychar();
06053     CHECK_NULL_RETURN_MEMERR(*np);
06054     qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
06055     CHECK_NULL_RETURN_MEMERR(qn);
06056     NQTFR(qn)->target = *np;
06057     *np = qn;
06058     break;
06059 
06060   case TK_BACKREF:
06061     len = tok->u.backref.num;
06062     *np = node_new_backref(len,
06063                    (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
06064                            tok->u.backref.by_name,
06065 #ifdef USE_BACKREF_WITH_LEVEL
06066                            tok->u.backref.exist_level,
06067                            tok->u.backref.level,
06068 #endif
06069                            env);
06070     CHECK_NULL_RETURN_MEMERR(*np);
06071     break;
06072 
06073 #ifdef USE_SUBEXP_CALL
06074   case TK_CALL:
06075     {
06076       int gnum = tok->u.call.gnum;
06077 
06078       if (gnum < 0 || tok->u.call.rel != 0) {
06079         if (gnum > 0) gnum--;
06080         gnum = BACKREF_REL_TO_ABS(gnum, env);
06081         if (gnum <= 0)
06082           return ONIGERR_INVALID_BACKREF;
06083       }
06084       *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
06085       CHECK_NULL_RETURN_MEMERR(*np);
06086       env->num_call++;
06087     }
06088     break;
06089 #endif
06090 
06091   case TK_ANCHOR:
06092     *np = onig_node_new_anchor(tok->u.anchor.subtype);
06093     CHECK_NULL_RETURN_MEMERR(*np);
06094     NANCHOR(*np)->ascii_range = tok->u.anchor.ascii_range;
06095     break;
06096 
06097   case TK_OP_REPEAT:
06098   case TK_INTERVAL:
06099     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
06100       if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
06101         return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
06102       else
06103         *np = node_new_empty();
06104     }
06105     else {
06106       goto tk_byte;
06107     }
06108     break;
06109 
06110   default:
06111     return ONIGERR_PARSER_BUG;
06112     break;
06113   }
06114 
06115   {
06116     targetp = np;
06117 
06118   re_entry:
06119     r = fetch_token(tok, src, end, env);
06120     if (r < 0) return r;
06121 
06122   repeat:
06123     if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
06124       if (is_invalid_quantifier_target(*targetp))
06125         return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
06126 
06127       qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
06128                                (r == TK_INTERVAL ? 1 : 0));
06129       CHECK_NULL_RETURN_MEMERR(qn);
06130       NQTFR(qn)->greedy = tok->u.repeat.greedy;
06131       r = set_quantifier(qn, *targetp, group, env);
06132       if (r < 0) {
06133         onig_node_free(qn);
06134         return r;
06135       }
06136 
06137       if (tok->u.repeat.possessive != 0) {
06138         Node* en;
06139         en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
06140         if (IS_NULL(en)) {
06141           onig_node_free(qn);
06142           return ONIGERR_MEMORY;
06143         }
06144         NENCLOSE(en)->target = qn;
06145         qn = en;
06146       }
06147 
06148       if (r == 0) {
06149         *targetp = qn;
06150       }
06151       else if (r == 1) {
06152         onig_node_free(qn);
06153       }
06154       else if (r == 2) { /* split case: /abc+/ */
06155         Node *tmp;
06156 
06157         *targetp = node_new_list(*targetp, NULL);
06158         if (IS_NULL(*targetp)) {
06159           onig_node_free(qn);
06160           return ONIGERR_MEMORY;
06161         }
06162         tmp = NCDR(*targetp) = node_new_list(qn, NULL);
06163         if (IS_NULL(tmp)) {
06164           onig_node_free(qn);
06165           return ONIGERR_MEMORY;
06166         }
06167         targetp = &(NCAR(tmp));
06168       }
06169       goto re_entry;
06170     }
06171   }
06172 
06173   return r;
06174 }
06175 
06176 static int
06177 parse_branch(Node** top, OnigToken* tok, int term,
06178              UChar** src, UChar* end, ScanEnv* env)
06179 {
06180   int r;
06181   Node *node, **headp;
06182 
06183   *top = NULL;
06184   r = parse_exp(&node, tok, term, src, end, env);
06185   if (r < 0) {
06186     onig_node_free(node);
06187     return r;
06188   }
06189 
06190   if (r == TK_EOT || r == term || r == TK_ALT) {
06191     *top = node;
06192   }
06193   else {
06194     *top  = node_new_list(node, NULL);
06195     headp = &(NCDR(*top));
06196     while (r != TK_EOT && r != term && r != TK_ALT) {
06197       r = parse_exp(&node, tok, term, src, end, env);
06198       if (r < 0) {
06199         onig_node_free(node);
06200         return r;
06201       }
06202 
06203       if (NTYPE(node) == NT_LIST) {
06204         *headp = node;
06205         while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
06206         headp = &(NCDR(node));
06207       }
06208       else {
06209         *headp = node_new_list(node, NULL);
06210         headp = &(NCDR(*headp));
06211       }
06212     }
06213   }
06214 
06215   return r;
06216 }
06217 
06218 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
06219 static int
06220 parse_subexp(Node** top, OnigToken* tok, int term,
06221              UChar** src, UChar* end, ScanEnv* env)
06222 {
06223   int r;
06224   Node *node, **headp;
06225 
06226   *top = NULL;
06227   r = parse_branch(&node, tok, term, src, end, env);
06228   if (r < 0) {
06229     onig_node_free(node);
06230     return r;
06231   }
06232 
06233   if (r == term) {
06234     *top = node;
06235   }
06236   else if (r == TK_ALT) {
06237     *top  = onig_node_new_alt(node, NULL);
06238     headp = &(NCDR(*top));
06239     while (r == TK_ALT) {
06240       r = fetch_token(tok, src, end, env);
06241       if (r < 0) return r;
06242       r = parse_branch(&node, tok, term, src, end, env);
06243       if (r < 0) {
06244         onig_node_free(node);
06245         return r;
06246       }
06247 
06248       *headp = onig_node_new_alt(node, NULL);
06249       headp = &(NCDR(*headp));
06250     }
06251 
06252     if (tok->type != (enum TokenSyms )term)
06253       goto err;
06254   }
06255   else {
06256     onig_node_free(node);
06257   err:
06258     if (term == TK_SUBEXP_CLOSE)
06259       return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
06260     else
06261       return ONIGERR_PARSER_BUG;
06262   }
06263 
06264   return r;
06265 }
06266 
06267 static int
06268 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
06269 {
06270   int r;
06271   OnigToken tok;
06272 
06273   r = fetch_token(&tok, src, end, env);
06274   if (r < 0) return r;
06275   r = parse_subexp(top, &tok, TK_EOT, src, end, env);
06276   if (r < 0) return r;
06277 
06278 #ifdef USE_SUBEXP_CALL
06279   if (env->num_call > 0) {
06280     /* Capture the pattern itself. It is used for (?R), (?0) and \g<0>. */
06281     const int num = 0;
06282     Node* np;
06283     np = node_new_enclose_memory(env->option, 0);
06284     CHECK_NULL_RETURN_MEMERR(np);
06285     NENCLOSE(np)->regnum = num;
06286     NENCLOSE(np)->target = *top;
06287     r = scan_env_set_mem_node(env, num, np);
06288     if (r != 0) return r;
06289     *top = np;
06290   }
06291 #endif
06292   return 0;
06293 }
06294 
06295 extern int
06296 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
06297                      regex_t* reg, ScanEnv* env)
06298 {
06299   int r;
06300   UChar* p;
06301 
06302 #ifdef USE_NAMED_GROUP
06303   names_clear(reg);
06304 #endif
06305 
06306   scan_env_clear(env);
06307   env->option         = reg->options;
06308   env->case_fold_flag = reg->case_fold_flag;
06309   env->enc            = reg->enc;
06310   env->syntax         = reg->syntax;
06311   env->pattern        = (UChar* )pattern;
06312   env->pattern_end    = (UChar* )end;
06313   env->reg            = reg;
06314 
06315   *root = NULL;
06316   p = (UChar* )pattern;
06317   r = parse_regexp(root, &p, (UChar* )end, env);
06318   reg->num_mem = env->num_mem;
06319   return r;
06320 }
06321 
06322 extern void
06323 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
06324                                 UChar* arg, UChar* arg_end)
06325 {
06326   env->error     = arg;
06327   env->error_end = arg_end;
06328 }
06329