Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 /* 00002 date_strptime.c: Coded by Tadayoshi Funaba 2011,2012 00003 */ 00004 00005 #include "ruby.h" 00006 #include "ruby/encoding.h" 00007 #include "ruby/re.h" 00008 #include <ctype.h> 00009 00010 static const char *day_names[] = { 00011 "Sunday", "Monday", "Tuesday", "Wednesday", 00012 "Thursday", "Friday", "Saturday", 00013 "Sun", "Mon", "Tue", "Wed", 00014 "Thu", "Fri", "Sat" 00015 }; 00016 00017 static const char *month_names[] = { 00018 "January", "February", "March", "April", 00019 "May", "June", "July", "August", "September", 00020 "October", "November", "December", 00021 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 00022 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 00023 }; 00024 00025 static const char *merid_names[] = { 00026 "am", "pm", 00027 "a.m.", "p.m." 00028 }; 00029 00030 static const char *extz_pats[] = { 00031 ":z", 00032 "::z", 00033 ":::z" 00034 }; 00035 00036 #define sizeof_array(o) (sizeof o / sizeof o[0]) 00037 00038 #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0) 00039 #define f_add(x,y) rb_funcall(x, '+', 1, y) 00040 #define f_sub(x,y) rb_funcall(x, '-', 1, y) 00041 #define f_mul(x,y) rb_funcall(x, '*', 1, y) 00042 #define f_div(x,y) rb_funcall(x, '/', 1, y) 00043 #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y) 00044 #define f_mod(x,y) rb_funcall(x, '%', 1, y) 00045 #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y) 00046 00047 #define f_lt_p(x,y) rb_funcall(x, '<', 1, y) 00048 #define f_gt_p(x,y) rb_funcall(x, '>', 1, y) 00049 #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y) 00050 #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y) 00051 00052 #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s) 00053 #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i) 00054 #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i) 00055 00056 #define issign(c) ((c) == '-' || (c) == '+') 00057 00058 static int 00059 num_pattern_p(const char *s) 00060 { 00061 if (isdigit((unsigned char)*s)) 00062 return 1; 00063 if (*s == '%') { 00064 s++; 00065 if (*s == 'E' || *s == 'O') 00066 s++; 00067 if (*s && 00068 (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) || 00069 isdigit((unsigned char)*s))) 00070 return 1; 00071 } 00072 return 0; 00073 } 00074 00075 #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1]) 00076 00077 static long 00078 read_digits(const char *s, VALUE *n, size_t width) 00079 { 00080 size_t l; 00081 00082 l = strspn(s, "0123456789"); 00083 00084 if (l == 0) 00085 return 0; 00086 00087 if (width < l) 00088 l = width; 00089 00090 if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) { 00091 const char *os = s; 00092 long v; 00093 00094 v = 0; 00095 while ((size_t)(s - os) < l) { 00096 v *= 10; 00097 v += *s - '0'; 00098 s++; 00099 } 00100 if (os == s) 00101 return 0; 00102 *n = LONG2NUM(v); 00103 return l; 00104 } 00105 else { 00106 char *s2 = ALLOCA_N(char, l + 1); 00107 memcpy(s2, s, l); 00108 s2[l] = '\0'; 00109 *n = rb_cstr_to_inum(s2, 10, 0); 00110 return l; 00111 } 00112 } 00113 00114 #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v) 00115 #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k))) 00116 #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k))) 00117 00118 #define fail() \ 00119 { \ 00120 set_hash("_fail", Qtrue); \ 00121 return 0; \ 00122 } 00123 00124 #define fail_p() (!NIL_P(ref_hash("_fail"))) 00125 00126 #define READ_DIGITS(n,w) \ 00127 { \ 00128 size_t l; \ 00129 l = read_digits(&str[si], &n, w); \ 00130 if (l == 0) \ 00131 fail(); \ 00132 si += l; \ 00133 } 00134 00135 #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX) 00136 00137 static int 00138 valid_range_p(VALUE v, int a, int b) 00139 { 00140 if (FIXNUM_P(v)) { 00141 int vi = FIX2INT(v); 00142 return !(vi < a || vi > b); 00143 } 00144 return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b))); 00145 } 00146 00147 #define recur(fmt) \ 00148 { \ 00149 size_t l; \ 00150 l = date__strptime_internal(&str[si], slen - si, \ 00151 fmt, sizeof fmt - 1, hash); \ 00152 if (fail_p()) \ 00153 return 0; \ 00154 si += l; \ 00155 } 00156 00157 VALUE date_zone_to_diff(VALUE); 00158 00159 static size_t 00160 date__strptime_internal(const char *str, size_t slen, 00161 const char *fmt, size_t flen, VALUE hash) 00162 { 00163 size_t si, fi; 00164 int c; 00165 00166 si = fi = 0; 00167 00168 while (fi < flen) { 00169 00170 switch (fmt[fi]) { 00171 case '%': 00172 00173 again: 00174 fi++; 00175 c = fmt[fi]; 00176 00177 switch (c) { 00178 case 'E': 00179 if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1])) 00180 goto again; 00181 fi--; 00182 goto ordinal; 00183 case 'O': 00184 if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1])) 00185 goto again; 00186 fi--; 00187 goto ordinal; 00188 case ':': 00189 { 00190 int i; 00191 00192 for (i = 0; i < (int)sizeof_array(extz_pats); i++) 00193 if (strncmp(extz_pats[i], &fmt[fi], 00194 strlen(extz_pats[i])) == 0) { 00195 fi += i; 00196 goto again; 00197 } 00198 fail(); 00199 } 00200 00201 case 'A': 00202 case 'a': 00203 { 00204 int i; 00205 00206 for (i = 0; i < (int)sizeof_array(day_names); i++) { 00207 size_t l = strlen(day_names[i]); 00208 if (strncasecmp(day_names[i], &str[si], l) == 0) { 00209 si += l; 00210 set_hash("wday", INT2FIX(i % 7)); 00211 goto matched; 00212 } 00213 } 00214 fail(); 00215 } 00216 case 'B': 00217 case 'b': 00218 case 'h': 00219 { 00220 int i; 00221 00222 for (i = 0; i < (int)sizeof_array(month_names); i++) { 00223 size_t l = strlen(month_names[i]); 00224 if (strncasecmp(month_names[i], &str[si], l) == 0) { 00225 si += l; 00226 set_hash("mon", INT2FIX((i % 12) + 1)); 00227 goto matched; 00228 } 00229 } 00230 fail(); 00231 } 00232 00233 case 'C': 00234 { 00235 VALUE n; 00236 00237 if (NUM_PATTERN_P()) 00238 READ_DIGITS(n, 2) 00239 else 00240 READ_DIGITS_MAX(n) 00241 set_hash("_cent", n); 00242 goto matched; 00243 } 00244 00245 case 'c': 00246 recur("%a %b %e %H:%M:%S %Y"); 00247 goto matched; 00248 00249 case 'D': 00250 recur("%m/%d/%y"); 00251 goto matched; 00252 00253 case 'd': 00254 case 'e': 00255 { 00256 VALUE n; 00257 00258 if (str[si] == ' ') { 00259 si++; 00260 READ_DIGITS(n, 1); 00261 } else { 00262 READ_DIGITS(n, 2); 00263 } 00264 if (!valid_range_p(n, 1, 31)) 00265 fail(); 00266 set_hash("mday", n); 00267 goto matched; 00268 } 00269 00270 case 'F': 00271 recur("%Y-%m-%d"); 00272 goto matched; 00273 00274 case 'G': 00275 { 00276 VALUE n; 00277 00278 if (NUM_PATTERN_P()) 00279 READ_DIGITS(n, 4) 00280 else 00281 READ_DIGITS_MAX(n) 00282 set_hash("cwyear", n); 00283 goto matched; 00284 } 00285 00286 case 'g': 00287 { 00288 VALUE n; 00289 00290 READ_DIGITS(n, 2); 00291 if (!valid_range_p(n, 0, 99)) 00292 fail(); 00293 set_hash("cwyear",n); 00294 set_hash("_cent", 00295 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20)); 00296 goto matched; 00297 } 00298 00299 case 'H': 00300 case 'k': 00301 { 00302 VALUE n; 00303 00304 if (str[si] == ' ') { 00305 si++; 00306 READ_DIGITS(n, 1); 00307 } else { 00308 READ_DIGITS(n, 2); 00309 } 00310 if (!valid_range_p(n, 0, 24)) 00311 fail(); 00312 set_hash("hour", n); 00313 goto matched; 00314 } 00315 00316 case 'I': 00317 case 'l': 00318 { 00319 VALUE n; 00320 00321 if (str[si] == ' ') { 00322 si++; 00323 READ_DIGITS(n, 1); 00324 } else { 00325 READ_DIGITS(n, 2); 00326 } 00327 if (!valid_range_p(n, 1, 12)) 00328 fail(); 00329 set_hash("hour", n); 00330 goto matched; 00331 } 00332 00333 case 'j': 00334 { 00335 VALUE n; 00336 00337 READ_DIGITS(n, 3); 00338 if (!valid_range_p(n, 1, 366)) 00339 fail(); 00340 set_hash("yday", n); 00341 goto matched; 00342 } 00343 00344 case 'L': 00345 case 'N': 00346 { 00347 VALUE n; 00348 int sign = 1; 00349 size_t osi; 00350 00351 if (issign(str[si])) { 00352 if (str[si] == '-') 00353 sign = -1; 00354 si++; 00355 } 00356 osi = si; 00357 if (NUM_PATTERN_P()) 00358 READ_DIGITS(n, c == 'L' ? 3 : 9) 00359 else 00360 READ_DIGITS_MAX(n) 00361 if (sign == -1) 00362 n = f_negate(n); 00363 set_hash("sec_fraction", 00364 rb_rational_new2(n, 00365 f_expt(INT2FIX(10), 00366 ULONG2NUM(si - osi)))); 00367 goto matched; 00368 } 00369 00370 case 'M': 00371 { 00372 VALUE n; 00373 00374 READ_DIGITS(n, 2); 00375 if (!valid_range_p(n, 0, 59)) 00376 fail(); 00377 set_hash("min", n); 00378 goto matched; 00379 } 00380 00381 case 'm': 00382 { 00383 VALUE n; 00384 00385 READ_DIGITS(n, 2); 00386 if (!valid_range_p(n, 1, 12)) 00387 fail(); 00388 set_hash("mon", n); 00389 goto matched; 00390 } 00391 00392 case 'n': 00393 case 't': 00394 recur(" "); 00395 goto matched; 00396 00397 case 'P': 00398 case 'p': 00399 { 00400 int i; 00401 00402 for (i = 0; i < 4; i++) { 00403 size_t l = strlen(merid_names[i]); 00404 if (strncasecmp(merid_names[i], &str[si], l) == 0) { 00405 si += l; 00406 set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12)); 00407 goto matched; 00408 } 00409 } 00410 fail(); 00411 } 00412 00413 case 'Q': 00414 { 00415 VALUE n; 00416 int sign = 1; 00417 00418 if (str[si] == '-') { 00419 sign = -1; 00420 si++; 00421 } 00422 READ_DIGITS_MAX(n); 00423 if (sign == -1) 00424 n = f_negate(n); 00425 set_hash("seconds", 00426 rb_rational_new2(n, 00427 f_expt(INT2FIX(10), 00428 INT2FIX(3)))); 00429 goto matched; 00430 } 00431 00432 case 'R': 00433 recur("%H:%M"); 00434 goto matched; 00435 00436 case 'r': 00437 recur("%I:%M:%S %p"); 00438 goto matched; 00439 00440 case 'S': 00441 { 00442 VALUE n; 00443 00444 READ_DIGITS(n, 2); 00445 if (!valid_range_p(n, 0, 60)) 00446 fail(); 00447 set_hash("sec", n); 00448 goto matched; 00449 } 00450 00451 case 's': 00452 { 00453 VALUE n; 00454 int sign = 1; 00455 00456 if (str[si] == '-') { 00457 sign = -1; 00458 si++; 00459 } 00460 READ_DIGITS_MAX(n); 00461 if (sign == -1) 00462 n = f_negate(n); 00463 set_hash("seconds", n); 00464 goto matched; 00465 } 00466 00467 case 'T': 00468 recur("%H:%M:%S"); 00469 goto matched; 00470 00471 case 'U': 00472 case 'W': 00473 { 00474 VALUE n; 00475 00476 READ_DIGITS(n, 2); 00477 if (!valid_range_p(n, 0, 53)) 00478 fail(); 00479 set_hash(c == 'U' ? "wnum0" : "wnum1", n); 00480 goto matched; 00481 } 00482 00483 case 'u': 00484 { 00485 VALUE n; 00486 00487 READ_DIGITS(n, 1); 00488 if (!valid_range_p(n, 1, 7)) 00489 fail(); 00490 set_hash("cwday", n); 00491 goto matched; 00492 } 00493 00494 case 'V': 00495 { 00496 VALUE n; 00497 00498 READ_DIGITS(n, 2); 00499 if (!valid_range_p(n, 1, 53)) 00500 fail(); 00501 set_hash("cweek", n); 00502 goto matched; 00503 } 00504 00505 case 'v': 00506 recur("%e-%b-%Y"); 00507 goto matched; 00508 00509 case 'w': 00510 { 00511 VALUE n; 00512 00513 READ_DIGITS(n, 1); 00514 if (!valid_range_p(n, 0, 6)) 00515 fail(); 00516 set_hash("wday", n); 00517 goto matched; 00518 } 00519 00520 case 'X': 00521 recur("%H:%M:%S"); 00522 goto matched; 00523 00524 case 'x': 00525 recur("%m/%d/%y"); 00526 goto matched; 00527 00528 case 'Y': 00529 { 00530 VALUE n; 00531 int sign = 1; 00532 00533 if (issign(str[si])) { 00534 if (str[si] == '-') 00535 sign = -1; 00536 si++; 00537 } 00538 if (NUM_PATTERN_P()) 00539 READ_DIGITS(n, 4) 00540 else 00541 READ_DIGITS_MAX(n) 00542 if (sign == -1) 00543 n = f_negate(n); 00544 set_hash("year", n); 00545 goto matched; 00546 } 00547 00548 case 'y': 00549 { 00550 VALUE n; 00551 int sign = 1; 00552 00553 READ_DIGITS(n, 2); 00554 if (!valid_range_p(n, 0, 99)) 00555 fail(); 00556 if (sign == -1) 00557 n = f_negate(n); 00558 set_hash("year", n); 00559 set_hash("_cent", 00560 INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20)); 00561 goto matched; 00562 } 00563 00564 case 'Z': 00565 case 'z': 00566 { 00567 static const char pat_source[] = 00568 "\\A(" 00569 "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?" 00570 "|[[:alpha:].\\s]+(?:standard|daylight)\\s+time\\b" 00571 "|[[:alpha:]]+(?:\\s+dst)?\\b" 00572 ")"; 00573 static VALUE pat = Qnil; 00574 VALUE m, b; 00575 00576 if (NIL_P(pat)) { 00577 pat = rb_reg_new(pat_source, sizeof pat_source - 1, 00578 ONIG_OPTION_IGNORECASE); 00579 rb_gc_register_mark_object(pat); 00580 } 00581 00582 b = rb_backref_get(); 00583 rb_match_busy(b); 00584 m = f_match(pat, rb_usascii_str_new2(&str[si])); 00585 00586 if (!NIL_P(m)) { 00587 VALUE s, l, o; 00588 00589 s = rb_reg_nth_match(1, m); 00590 l = f_end(m, INT2FIX(0)); 00591 o = date_zone_to_diff(s); 00592 si += NUM2LONG(l); 00593 set_hash("zone", s); 00594 set_hash("offset", o); 00595 rb_backref_set(b); 00596 goto matched; 00597 } 00598 rb_backref_set(b); 00599 fail(); 00600 } 00601 00602 case '%': 00603 if (str[si] != '%') 00604 fail(); 00605 si++; 00606 goto matched; 00607 00608 case '+': 00609 recur("%a %b %e %H:%M:%S %Z %Y"); 00610 goto matched; 00611 00612 default: 00613 if (str[si] != '%') 00614 fail(); 00615 si++; 00616 if (fi < flen) 00617 if (str[si] != fmt[fi]) 00618 fail(); 00619 si++; 00620 goto matched; 00621 } 00622 case ' ': 00623 case '\t': 00624 case '\n': 00625 case '\v': 00626 case '\f': 00627 case '\r': 00628 while (isspace((unsigned char)str[si])) 00629 si++; 00630 fi++; 00631 break; 00632 default: 00633 ordinal: 00634 if (str[si] != fmt[fi]) 00635 fail(); 00636 si++; 00637 fi++; 00638 break; 00639 matched: 00640 fi++; 00641 break; 00642 } 00643 } 00644 00645 return si; 00646 } 00647 00648 VALUE 00649 date__strptime(const char *str, size_t slen, 00650 const char *fmt, size_t flen, VALUE hash) 00651 { 00652 size_t si; 00653 VALUE cent, merid; 00654 00655 si = date__strptime_internal(str, slen, fmt, flen, hash); 00656 00657 if (slen > si) { 00658 VALUE s; 00659 00660 s = rb_usascii_str_new(&str[si], slen - si); 00661 set_hash("leftover", s); 00662 } 00663 00664 if (fail_p()) 00665 return Qnil; 00666 00667 cent = ref_hash("_cent"); 00668 if (!NIL_P(cent)) { 00669 VALUE year; 00670 00671 year = ref_hash("cwyear"); 00672 if (!NIL_P(year)) 00673 set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100)))); 00674 year = ref_hash("year"); 00675 if (!NIL_P(year)) 00676 set_hash("year", f_add(year, f_mul(cent, INT2FIX(100)))); 00677 del_hash("_cent"); 00678 } 00679 00680 merid = ref_hash("_merid"); 00681 if (!NIL_P(merid)) { 00682 VALUE hour; 00683 00684 hour = ref_hash("hour"); 00685 if (!NIL_P(hour)) { 00686 hour = f_mod(hour, INT2FIX(12)); 00687 set_hash("hour", f_add(hour, merid)); 00688 } 00689 del_hash("_merid"); 00690 } 00691 00692 return hash; 00693 } 00694 00695 /* 00696 Local variables: 00697 c-file-style: "ruby" 00698 End: 00699 */ 00700