Ruby  2.0.0p247(2013-06-27revision41674)
ext/date/date_strptime.c
Go to the documentation of this file.
00001 /*
00002   date_strptime.c: Coded by Tadayoshi Funaba 2011,2012
00003 */
00004 
00005 #include "ruby.h"
00006 #include "ruby/encoding.h"
00007 #include "ruby/re.h"
00008 #include <ctype.h>
00009 
00010 static const char *day_names[] = {
00011     "Sunday", "Monday", "Tuesday", "Wednesday",
00012     "Thursday", "Friday", "Saturday",
00013     "Sun", "Mon", "Tue", "Wed",
00014     "Thu", "Fri", "Sat"
00015 };
00016 
00017 static const char *month_names[] = {
00018     "January", "February", "March", "April",
00019     "May", "June", "July", "August", "September",
00020     "October", "November", "December",
00021     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
00022     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
00023 };
00024 
00025 static const char *merid_names[] = {
00026     "am", "pm",
00027     "a.m.", "p.m."
00028 };
00029 
00030 static const char *extz_pats[] = {
00031     ":z",
00032     "::z",
00033     ":::z"
00034 };
00035 
00036 #define sizeof_array(o) (sizeof o / sizeof o[0])
00037 
00038 #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
00039 #define f_add(x,y) rb_funcall(x, '+', 1, y)
00040 #define f_sub(x,y) rb_funcall(x, '-', 1, y)
00041 #define f_mul(x,y) rb_funcall(x, '*', 1, y)
00042 #define f_div(x,y) rb_funcall(x, '/', 1, y)
00043 #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y)
00044 #define f_mod(x,y) rb_funcall(x, '%', 1, y)
00045 #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
00046 
00047 #define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
00048 #define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
00049 #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
00050 #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
00051 
00052 #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
00053 #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
00054 #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
00055 
00056 #define issign(c) ((c) == '-' || (c) == '+')
00057 
00058 static int
00059 num_pattern_p(const char *s)
00060 {
00061     if (isdigit((unsigned char)*s))
00062         return 1;
00063     if (*s == '%') {
00064         s++;
00065         if (*s == 'E' || *s == 'O')
00066             s++;
00067         if (*s &&
00068             (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) ||
00069              isdigit((unsigned char)*s)))
00070             return 1;
00071     }
00072     return 0;
00073 }
00074 
00075 #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
00076 
00077 static long
00078 read_digits(const char *s, VALUE *n, size_t width)
00079 {
00080     size_t l;
00081 
00082     l = strspn(s, "0123456789");
00083 
00084     if (l == 0)
00085         return 0;
00086 
00087     if (width < l)
00088         l = width;
00089 
00090     if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
00091         const char *os = s;
00092         long v;
00093 
00094         v = 0;
00095         while ((size_t)(s - os) < l) {
00096             v *= 10;
00097             v += *s - '0';
00098             s++;
00099         }
00100         if (os == s)
00101             return 0;
00102         *n = LONG2NUM(v);
00103         return l;
00104     }
00105     else {
00106         char *s2 = ALLOCA_N(char, l + 1);
00107         memcpy(s2, s, l);
00108         s2[l] = '\0';
00109         *n = rb_cstr_to_inum(s2, 10, 0);
00110         return l;
00111     }
00112 }
00113 
00114 #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k)), v)
00115 #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k)))
00116 #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k)))
00117 
00118 #define fail() \
00119 { \
00120     set_hash("_fail", Qtrue); \
00121     return 0; \
00122 }
00123 
00124 #define fail_p() (!NIL_P(ref_hash("_fail")))
00125 
00126 #define READ_DIGITS(n,w) \
00127 { \
00128     size_t l; \
00129     l = read_digits(&str[si], &n, w); \
00130     if (l == 0) \
00131         fail(); \
00132     si += l; \
00133 }
00134 
00135 #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
00136 
00137 static int
00138 valid_range_p(VALUE v, int a, int b)
00139 {
00140     if (FIXNUM_P(v)) {
00141         int vi = FIX2INT(v);
00142         return !(vi < a || vi > b);
00143     }
00144     return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
00145 }
00146 
00147 #define recur(fmt) \
00148 { \
00149     size_t l; \
00150     l = date__strptime_internal(&str[si], slen - si, \
00151                                 fmt, sizeof fmt - 1, hash); \
00152     if (fail_p()) \
00153         return 0; \
00154     si += l; \
00155 }
00156 
00157 VALUE date_zone_to_diff(VALUE);
00158 
00159 static size_t
00160 date__strptime_internal(const char *str, size_t slen,
00161                         const char *fmt, size_t flen, VALUE hash)
00162 {
00163     size_t si, fi;
00164     int c;
00165 
00166     si = fi = 0;
00167 
00168     while (fi < flen) {
00169 
00170         switch (fmt[fi]) {
00171           case '%':
00172 
00173           again:
00174             fi++;
00175             c = fmt[fi];
00176 
00177             switch (c) {
00178               case 'E':
00179                 if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
00180                     goto again;
00181                 fi--;
00182                 goto ordinal;
00183               case 'O':
00184                 if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
00185                     goto again;
00186                 fi--;
00187                 goto ordinal;
00188               case ':':
00189                 {
00190                     int i;
00191 
00192                     for (i = 0; i < (int)sizeof_array(extz_pats); i++)
00193                         if (strncmp(extz_pats[i], &fmt[fi],
00194                                         strlen(extz_pats[i])) == 0) {
00195                             fi += i;
00196                             goto again;
00197                         }
00198                     fail();
00199                 }
00200 
00201               case 'A':
00202               case 'a':
00203                 {
00204                     int i;
00205 
00206                     for (i = 0; i < (int)sizeof_array(day_names); i++) {
00207                         size_t l = strlen(day_names[i]);
00208                         if (strncasecmp(day_names[i], &str[si], l) == 0) {
00209                             si += l;
00210                             set_hash("wday", INT2FIX(i % 7));
00211                             goto matched;
00212                         }
00213                     }
00214                     fail();
00215                 }
00216               case 'B':
00217               case 'b':
00218               case 'h':
00219                 {
00220                     int i;
00221 
00222                     for (i = 0; i < (int)sizeof_array(month_names); i++) {
00223                         size_t l = strlen(month_names[i]);
00224                         if (strncasecmp(month_names[i], &str[si], l) == 0) {
00225                             si += l;
00226                             set_hash("mon", INT2FIX((i % 12) + 1));
00227                             goto matched;
00228                         }
00229                     }
00230                     fail();
00231                 }
00232 
00233               case 'C':
00234                 {
00235                     VALUE n;
00236 
00237                     if (NUM_PATTERN_P())
00238                         READ_DIGITS(n, 2)
00239                     else
00240                         READ_DIGITS_MAX(n)
00241                     set_hash("_cent", n);
00242                     goto matched;
00243                 }
00244 
00245               case 'c':
00246                 recur("%a %b %e %H:%M:%S %Y");
00247                 goto matched;
00248 
00249               case 'D':
00250                 recur("%m/%d/%y");
00251                 goto matched;
00252 
00253               case 'd':
00254               case 'e':
00255                 {
00256                     VALUE n;
00257 
00258                     if (str[si] == ' ') {
00259                         si++;
00260                         READ_DIGITS(n, 1);
00261                     } else {
00262                         READ_DIGITS(n, 2);
00263                     }
00264                     if (!valid_range_p(n, 1, 31))
00265                         fail();
00266                     set_hash("mday", n);
00267                     goto matched;
00268                 }
00269 
00270               case 'F':
00271                 recur("%Y-%m-%d");
00272                 goto matched;
00273 
00274               case 'G':
00275                 {
00276                     VALUE n;
00277 
00278                     if (NUM_PATTERN_P())
00279                         READ_DIGITS(n, 4)
00280                     else
00281                         READ_DIGITS_MAX(n)
00282                     set_hash("cwyear", n);
00283                     goto matched;
00284                 }
00285 
00286               case 'g':
00287                 {
00288                     VALUE n;
00289 
00290                     READ_DIGITS(n, 2);
00291                     if (!valid_range_p(n, 0, 99))
00292                         fail();
00293                     set_hash("cwyear",n);
00294                     set_hash("_cent",
00295                              INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
00296                     goto matched;
00297                 }
00298 
00299               case 'H':
00300               case 'k':
00301                 {
00302                     VALUE n;
00303 
00304                     if (str[si] == ' ') {
00305                         si++;
00306                         READ_DIGITS(n, 1);
00307                     } else {
00308                         READ_DIGITS(n, 2);
00309                     }
00310                     if (!valid_range_p(n, 0, 24))
00311                         fail();
00312                     set_hash("hour", n);
00313                     goto matched;
00314                 }
00315 
00316               case 'I':
00317               case 'l':
00318                 {
00319                     VALUE n;
00320 
00321                     if (str[si] == ' ') {
00322                         si++;
00323                         READ_DIGITS(n, 1);
00324                     } else {
00325                         READ_DIGITS(n, 2);
00326                     }
00327                     if (!valid_range_p(n, 1, 12))
00328                         fail();
00329                     set_hash("hour", n);
00330                     goto matched;
00331                 }
00332 
00333               case 'j':
00334                 {
00335                     VALUE n;
00336 
00337                     READ_DIGITS(n, 3);
00338                     if (!valid_range_p(n, 1, 366))
00339                         fail();
00340                     set_hash("yday", n);
00341                     goto matched;
00342                 }
00343 
00344               case 'L':
00345               case 'N':
00346                 {
00347                     VALUE n;
00348                     int sign = 1;
00349                     size_t osi;
00350 
00351                     if (issign(str[si])) {
00352                         if (str[si] == '-')
00353                             sign = -1;
00354                         si++;
00355                     }
00356                     osi = si;
00357                     if (NUM_PATTERN_P())
00358                         READ_DIGITS(n, c == 'L' ? 3 : 9)
00359                     else
00360                         READ_DIGITS_MAX(n)
00361                     if (sign == -1)
00362                         n = f_negate(n);
00363                     set_hash("sec_fraction",
00364                              rb_rational_new2(n,
00365                                               f_expt(INT2FIX(10),
00366                                                      ULONG2NUM(si - osi))));
00367                     goto matched;
00368                 }
00369 
00370               case 'M':
00371                 {
00372                     VALUE n;
00373 
00374                     READ_DIGITS(n, 2);
00375                     if (!valid_range_p(n, 0, 59))
00376                         fail();
00377                     set_hash("min", n);
00378                     goto matched;
00379                 }
00380 
00381               case 'm':
00382                 {
00383                     VALUE n;
00384 
00385                     READ_DIGITS(n, 2);
00386                     if (!valid_range_p(n, 1, 12))
00387                         fail();
00388                     set_hash("mon", n);
00389                     goto matched;
00390                 }
00391 
00392               case 'n':
00393               case 't':
00394                 recur(" ");
00395                 goto matched;
00396 
00397               case 'P':
00398               case 'p':
00399                 {
00400                     int i;
00401 
00402                     for (i = 0; i < 4; i++) {
00403                         size_t l = strlen(merid_names[i]);
00404                         if (strncasecmp(merid_names[i], &str[si], l) == 0) {
00405                             si += l;
00406                             set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
00407                             goto matched;
00408                         }
00409                     }
00410                     fail();
00411                 }
00412 
00413               case 'Q':
00414                 {
00415                     VALUE n;
00416                     int sign = 1;
00417 
00418                     if (str[si] == '-') {
00419                         sign = -1;
00420                         si++;
00421                     }
00422                     READ_DIGITS_MAX(n);
00423                     if (sign == -1)
00424                         n = f_negate(n);
00425                     set_hash("seconds",
00426                              rb_rational_new2(n,
00427                                               f_expt(INT2FIX(10),
00428                                                      INT2FIX(3))));
00429                     goto matched;
00430                 }
00431 
00432               case 'R':
00433                 recur("%H:%M");
00434                 goto matched;
00435 
00436               case 'r':
00437                 recur("%I:%M:%S %p");
00438                 goto matched;
00439 
00440               case 'S':
00441                 {
00442                     VALUE n;
00443 
00444                     READ_DIGITS(n, 2);
00445                     if (!valid_range_p(n, 0, 60))
00446                         fail();
00447                     set_hash("sec", n);
00448                     goto matched;
00449                 }
00450 
00451               case 's':
00452                 {
00453                     VALUE n;
00454                     int sign = 1;
00455 
00456                     if (str[si] == '-') {
00457                         sign = -1;
00458                         si++;
00459                     }
00460                     READ_DIGITS_MAX(n);
00461                     if (sign == -1)
00462                         n = f_negate(n);
00463                     set_hash("seconds", n);
00464                     goto matched;
00465                 }
00466 
00467               case 'T':
00468                 recur("%H:%M:%S");
00469                 goto matched;
00470 
00471               case 'U':
00472               case 'W':
00473                 {
00474                     VALUE n;
00475 
00476                     READ_DIGITS(n, 2);
00477                     if (!valid_range_p(n, 0, 53))
00478                         fail();
00479                     set_hash(c == 'U' ? "wnum0" : "wnum1", n);
00480                     goto matched;
00481                 }
00482 
00483               case 'u':
00484                 {
00485                     VALUE n;
00486 
00487                     READ_DIGITS(n, 1);
00488                     if (!valid_range_p(n, 1, 7))
00489                         fail();
00490                     set_hash("cwday", n);
00491                     goto matched;
00492                 }
00493 
00494               case 'V':
00495                 {
00496                     VALUE n;
00497 
00498                     READ_DIGITS(n, 2);
00499                     if (!valid_range_p(n, 1, 53))
00500                         fail();
00501                     set_hash("cweek", n);
00502                     goto matched;
00503                 }
00504 
00505               case 'v':
00506                 recur("%e-%b-%Y");
00507                 goto matched;
00508 
00509               case 'w':
00510                 {
00511                     VALUE n;
00512 
00513                     READ_DIGITS(n, 1);
00514                     if (!valid_range_p(n, 0, 6))
00515                         fail();
00516                     set_hash("wday", n);
00517                     goto matched;
00518                 }
00519 
00520               case 'X':
00521                 recur("%H:%M:%S");
00522                 goto matched;
00523 
00524               case 'x':
00525                 recur("%m/%d/%y");
00526                 goto matched;
00527 
00528               case 'Y':
00529                   {
00530                       VALUE n;
00531                       int sign = 1;
00532 
00533                       if (issign(str[si])) {
00534                           if (str[si] == '-')
00535                               sign = -1;
00536                           si++;
00537                       }
00538                       if (NUM_PATTERN_P())
00539                           READ_DIGITS(n, 4)
00540                       else
00541                           READ_DIGITS_MAX(n)
00542                     if (sign == -1)
00543                         n = f_negate(n);
00544                       set_hash("year", n);
00545                       goto matched;
00546                   }
00547 
00548               case 'y':
00549                 {
00550                     VALUE n;
00551                     int sign = 1;
00552 
00553                     READ_DIGITS(n, 2);
00554                     if (!valid_range_p(n, 0, 99))
00555                         fail();
00556                     if (sign == -1)
00557                         n = f_negate(n);
00558                     set_hash("year", n);
00559                     set_hash("_cent",
00560                              INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
00561                     goto matched;
00562                 }
00563 
00564               case 'Z':
00565               case 'z':
00566                 {
00567                     static const char pat_source[] =
00568                         "\\A("
00569                         "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
00570                         "|[[:alpha:].\\s]+(?:standard|daylight)\\s+time\\b"
00571                         "|[[:alpha:]]+(?:\\s+dst)?\\b"
00572                         ")";
00573                     static VALUE pat = Qnil;
00574                     VALUE m, b;
00575 
00576                     if (NIL_P(pat)) {
00577                         pat = rb_reg_new(pat_source, sizeof pat_source - 1,
00578                                          ONIG_OPTION_IGNORECASE);
00579                         rb_gc_register_mark_object(pat);
00580                     }
00581 
00582                     b = rb_backref_get();
00583                     rb_match_busy(b);
00584                     m = f_match(pat, rb_usascii_str_new2(&str[si]));
00585 
00586                     if (!NIL_P(m)) {
00587                         VALUE s, l, o;
00588 
00589                         s = rb_reg_nth_match(1, m);
00590                         l = f_end(m, INT2FIX(0));
00591                         o = date_zone_to_diff(s);
00592                         si += NUM2LONG(l);
00593                         set_hash("zone", s);
00594                         set_hash("offset", o);
00595                         rb_backref_set(b);
00596                         goto matched;
00597                     }
00598                     rb_backref_set(b);
00599                     fail();
00600                 }
00601 
00602               case '%':
00603                 if (str[si] != '%')
00604                     fail();
00605                 si++;
00606                 goto matched;
00607 
00608               case '+':
00609                 recur("%a %b %e %H:%M:%S %Z %Y");
00610                 goto matched;
00611 
00612               default:
00613                 if (str[si] != '%')
00614                     fail();
00615                 si++;
00616                 if (fi < flen)
00617                     if (str[si] != fmt[fi])
00618                         fail();
00619                 si++;
00620                 goto matched;
00621             }
00622           case ' ':
00623           case '\t':
00624           case '\n':
00625           case '\v':
00626           case '\f':
00627           case '\r':
00628             while (isspace((unsigned char)str[si]))
00629                 si++;
00630             fi++;
00631             break;
00632           default:
00633           ordinal:
00634             if (str[si] != fmt[fi])
00635                 fail();
00636             si++;
00637             fi++;
00638             break;
00639           matched:
00640             fi++;
00641             break;
00642         }
00643     }
00644 
00645     return si;
00646 }
00647 
00648 VALUE
00649 date__strptime(const char *str, size_t slen,
00650                const char *fmt, size_t flen, VALUE hash)
00651 {
00652     size_t si;
00653     VALUE cent, merid;
00654 
00655     si = date__strptime_internal(str, slen, fmt, flen, hash);
00656 
00657     if (slen > si) {
00658         VALUE s;
00659 
00660         s = rb_usascii_str_new(&str[si], slen - si);
00661         set_hash("leftover", s);
00662     }
00663 
00664     if (fail_p())
00665         return Qnil;
00666 
00667     cent = ref_hash("_cent");
00668     if (!NIL_P(cent)) {
00669         VALUE year;
00670 
00671         year = ref_hash("cwyear");
00672         if (!NIL_P(year))
00673             set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
00674         year = ref_hash("year");
00675         if (!NIL_P(year))
00676             set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
00677         del_hash("_cent");
00678     }
00679 
00680     merid = ref_hash("_merid");
00681     if (!NIL_P(merid)) {
00682         VALUE hour;
00683 
00684         hour = ref_hash("hour");
00685         if (!NIL_P(hour)) {
00686             hour = f_mod(hour, INT2FIX(12));
00687             set_hash("hour", f_add(hour, merid));
00688         }
00689         del_hash("_merid");
00690     }
00691 
00692     return hash;
00693 }
00694 
00695 /*
00696 Local variables:
00697 c-file-style: "ruby"
00698 End:
00699 */
00700