Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 /********************************************************************** 00002 00003 sprintf.c - 00004 00005 $Author: nobu $ 00006 created at: Fri Oct 15 10:39:26 JST 1993 00007 00008 Copyright (C) 1993-2007 Yukihiro Matsumoto 00009 Copyright (C) 2000 Network Applied Communication Laboratory, Inc. 00010 Copyright (C) 2000 Information-technology Promotion Agency, Japan 00011 00012 **********************************************************************/ 00013 00014 #include "ruby/ruby.h" 00015 #include "ruby/re.h" 00016 #include "ruby/encoding.h" 00017 #include <math.h> 00018 #include <stdarg.h> 00019 00020 #ifdef HAVE_IEEEFP_H 00021 #include <ieeefp.h> 00022 #endif 00023 00024 #define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */ 00025 #define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT) 00026 #define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n))) 00027 00028 static void fmt_setup(char*,size_t,int,int,int,int); 00029 00030 static char* 00031 remove_sign_bits(char *str, int base) 00032 { 00033 char *t = str; 00034 00035 if (base == 16) { 00036 while (*t == 'f') { 00037 t++; 00038 } 00039 } 00040 else if (base == 8) { 00041 *t |= EXTENDSIGN(3, strlen(t)); 00042 while (*t == '7') { 00043 t++; 00044 } 00045 } 00046 else if (base == 2) { 00047 while (*t == '1') { 00048 t++; 00049 } 00050 } 00051 00052 return t; 00053 } 00054 00055 static char 00056 sign_bits(int base, const char *p) 00057 { 00058 char c = '.'; 00059 00060 switch (base) { 00061 case 16: 00062 if (*p == 'X') c = 'F'; 00063 else c = 'f'; 00064 break; 00065 case 8: 00066 c = '7'; break; 00067 case 2: 00068 c = '1'; break; 00069 } 00070 return c; 00071 } 00072 00073 #define FNONE 0 00074 #define FSHARP 1 00075 #define FMINUS 2 00076 #define FPLUS 4 00077 #define FZERO 8 00078 #define FSPACE 16 00079 #define FWIDTH 32 00080 #define FPREC 64 00081 #define FPREC0 128 00082 00083 #define CHECK(l) do {\ 00084 int cr = ENC_CODERANGE(result);\ 00085 while (blen + (l) >= bsiz) {\ 00086 bsiz*=2;\ 00087 }\ 00088 rb_str_resize(result, bsiz);\ 00089 ENC_CODERANGE_SET(result, cr);\ 00090 buf = RSTRING_PTR(result);\ 00091 } while (0) 00092 00093 #define PUSH(s, l) do { \ 00094 CHECK(l);\ 00095 memcpy(&buf[blen], (s), (l));\ 00096 blen += (l);\ 00097 } while (0) 00098 00099 #define FILL(c, l) do { \ 00100 CHECK(l);\ 00101 memset(&buf[blen], (c), (l));\ 00102 blen += (l);\ 00103 } while (0) 00104 00105 #define GETARG() (nextvalue != Qundef ? nextvalue : \ 00106 posarg == -1 ? \ 00107 (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \ 00108 posarg == -2 ? \ 00109 (rb_raise(rb_eArgError, "unnumbered(%d) mixed with named", nextarg), 0) : \ 00110 (posarg = nextarg++, GETNTHARG(posarg))) 00111 00112 #define GETPOSARG(n) (posarg > 0 ? \ 00113 (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", (n), posarg), 0) : \ 00114 posarg == -2 ? \ 00115 (rb_raise(rb_eArgError, "numbered(%d) after named", (n)), 0) : \ 00116 (((n) < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", (n)), 0) : \ 00117 (posarg = -1, GETNTHARG(n)))) 00118 00119 #define GETNTHARG(nth) \ 00120 (((nth) >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[(nth)]) 00121 00122 #define GETNAMEARG(id, name, len, enc) ( \ 00123 posarg > 0 ? \ 00124 (rb_enc_raise((enc), rb_eArgError, "named%.*s after unnumbered(%d)", (len), (name), posarg), 0) : \ 00125 posarg == -1 ? \ 00126 (rb_enc_raise((enc), rb_eArgError, "named%.*s after numbered", (len), (name)), 0) : \ 00127 (posarg = -2, rb_hash_lookup2(get_hash(&hash, argc, argv), (id), Qundef))) 00128 00129 #define GETNUM(n, val) \ 00130 for (; p < end && rb_enc_isdigit(*p, enc); p++) { \ 00131 int next_n = 10 * (n) + (*p - '0'); \ 00132 if (next_n / 10 != (n)) {\ 00133 rb_raise(rb_eArgError, #val " too big"); \ 00134 } \ 00135 (n) = next_n; \ 00136 } \ 00137 if (p >= end) { \ 00138 rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \ 00139 } 00140 00141 #define GETASTER(val) do { \ 00142 t = p++; \ 00143 n = 0; \ 00144 GETNUM(n, (val)); \ 00145 if (*p == '$') { \ 00146 tmp = GETPOSARG(n); \ 00147 } \ 00148 else { \ 00149 tmp = GETARG(); \ 00150 p = t; \ 00151 } \ 00152 (val) = NUM2INT(tmp); \ 00153 } while (0) 00154 00155 static VALUE 00156 get_hash(volatile VALUE *hash, int argc, const VALUE *argv) 00157 { 00158 VALUE tmp; 00159 00160 if (*hash != Qundef) return *hash; 00161 if (argc != 2) { 00162 rb_raise(rb_eArgError, "one hash required"); 00163 } 00164 tmp = rb_check_hash_type(argv[1]); 00165 if (NIL_P(tmp)) { 00166 rb_raise(rb_eArgError, "one hash required"); 00167 } 00168 return (*hash = tmp); 00169 } 00170 00171 /* 00172 * call-seq: 00173 * format(format_string [, arguments...] ) -> string 00174 * sprintf(format_string [, arguments...] ) -> string 00175 * 00176 * Returns the string resulting from applying <i>format_string</i> to 00177 * any additional arguments. Within the format string, any characters 00178 * other than format sequences are copied to the result. 00179 * 00180 * The syntax of a format sequence is follows. 00181 * 00182 * %[flags][width][.precision]type 00183 * 00184 * A format 00185 * sequence consists of a percent sign, followed by optional flags, 00186 * width, and precision indicators, then terminated with a field type 00187 * character. The field type controls how the corresponding 00188 * <code>sprintf</code> argument is to be interpreted, while the flags 00189 * modify that interpretation. 00190 * 00191 * The field type characters are: 00192 * 00193 * Field | Integer Format 00194 * ------+-------------------------------------------------------------- 00195 * b | Convert argument as a binary number. 00196 * | Negative numbers will be displayed as a two's complement 00197 * | prefixed with `..1'. 00198 * B | Equivalent to `b', but uses an uppercase 0B for prefix 00199 * | in the alternative format by #. 00200 * d | Convert argument as a decimal number. 00201 * i | Identical to `d'. 00202 * o | Convert argument as an octal number. 00203 * | Negative numbers will be displayed as a two's complement 00204 * | prefixed with `..7'. 00205 * u | Identical to `d'. 00206 * x | Convert argument as a hexadecimal number. 00207 * | Negative numbers will be displayed as a two's complement 00208 * | prefixed with `..f' (representing an infinite string of 00209 * | leading 'ff's). 00210 * X | Equivalent to `x', but uses uppercase letters. 00211 * 00212 * Field | Float Format 00213 * ------+-------------------------------------------------------------- 00214 * e | Convert floating point argument into exponential notation 00215 * | with one digit before the decimal point as [-]d.dddddde[+-]dd. 00216 * | The precision specifies the number of digits after the decimal 00217 * | point (defaulting to six). 00218 * E | Equivalent to `e', but uses an uppercase E to indicate 00219 * | the exponent. 00220 * f | Convert floating point argument as [-]ddd.dddddd, 00221 * | where the precision specifies the number of digits after 00222 * | the decimal point. 00223 * g | Convert a floating point number using exponential form 00224 * | if the exponent is less than -4 or greater than or 00225 * | equal to the precision, or in dd.dddd form otherwise. 00226 * | The precision specifies the number of significant digits. 00227 * G | Equivalent to `g', but use an uppercase `E' in exponent form. 00228 * a | Convert floating point argument as [-]0xh.hhhhp[+-]dd, 00229 * | which is consisted from optional sign, "0x", fraction part 00230 * | as hexadecimal, "p", and exponential part as decimal. 00231 * A | Equivalent to `a', but use uppercase `X' and `P'. 00232 * 00233 * Field | Other Format 00234 * ------+-------------------------------------------------------------- 00235 * c | Argument is the numeric code for a single character or 00236 * | a single character string itself. 00237 * p | The valuing of argument.inspect. 00238 * s | Argument is a string to be substituted. If the format 00239 * | sequence contains a precision, at most that many characters 00240 * | will be copied. 00241 * % | A percent sign itself will be displayed. No argument taken. 00242 * 00243 * The flags modifies the behavior of the formats. 00244 * The flag characters are: 00245 * 00246 * Flag | Applies to | Meaning 00247 * ---------+---------------+----------------------------------------- 00248 * space | bBdiouxX | Leave a space at the start of 00249 * | aAeEfgG | non-negative numbers. 00250 * | (numeric fmt) | For `o', `x', `X', `b' and `B', use 00251 * | | a minus sign with absolute value for 00252 * | | negative values. 00253 * ---------+---------------+----------------------------------------- 00254 * (digit)$ | all | Specifies the absolute argument number 00255 * | | for this field. Absolute and relative 00256 * | | argument numbers cannot be mixed in a 00257 * | | sprintf string. 00258 * ---------+---------------+----------------------------------------- 00259 * # | bBoxX | Use an alternative format. 00260 * | aAeEfgG | For the conversions `o', increase the precision 00261 * | | until the first digit will be `0' if 00262 * | | it is not formatted as complements. 00263 * | | For the conversions `x', `X', `b' and `B' 00264 * | | on non-zero, prefix the result with ``0x'', 00265 * | | ``0X'', ``0b'' and ``0B'', respectively. 00266 * | | For `a', `A', `e', `E', `f', `g', and 'G', 00267 * | | force a decimal point to be added, 00268 * | | even if no digits follow. 00269 * | | For `g' and 'G', do not remove trailing zeros. 00270 * ---------+---------------+----------------------------------------- 00271 * + | bBdiouxX | Add a leading plus sign to non-negative 00272 * | aAeEfgG | numbers. 00273 * | (numeric fmt) | For `o', `x', `X', `b' and `B', use 00274 * | | a minus sign with absolute value for 00275 * | | negative values. 00276 * ---------+---------------+----------------------------------------- 00277 * - | all | Left-justify the result of this conversion. 00278 * ---------+---------------+----------------------------------------- 00279 * 0 (zero) | bBdiouxX | Pad with zeros, not spaces. 00280 * | aAeEfgG | For `o', `x', `X', `b' and `B', radix-1 00281 * | (numeric fmt) | is used for negative numbers formatted as 00282 * | | complements. 00283 * ---------+---------------+----------------------------------------- 00284 * * | all | Use the next argument as the field width. 00285 * | | If negative, left-justify the result. If the 00286 * | | asterisk is followed by a number and a dollar 00287 * | | sign, use the indicated argument as the width. 00288 * 00289 * Examples of flags: 00290 * 00291 * # `+' and space flag specifies the sign of non-negative numbers. 00292 * sprintf("%d", 123) #=> "123" 00293 * sprintf("%+d", 123) #=> "+123" 00294 * sprintf("% d", 123) #=> " 123" 00295 * 00296 * # `#' flag for `o' increases number of digits to show `0'. 00297 * # `+' and space flag changes format of negative numbers. 00298 * sprintf("%o", 123) #=> "173" 00299 * sprintf("%#o", 123) #=> "0173" 00300 * sprintf("%+o", -123) #=> "-173" 00301 * sprintf("%o", -123) #=> "..7605" 00302 * sprintf("%#o", -123) #=> "..7605" 00303 * 00304 * # `#' flag for `x' add a prefix `0x' for non-zero numbers. 00305 * # `+' and space flag disables complements for negative numbers. 00306 * sprintf("%x", 123) #=> "7b" 00307 * sprintf("%#x", 123) #=> "0x7b" 00308 * sprintf("%+x", -123) #=> "-7b" 00309 * sprintf("%x", -123) #=> "..f85" 00310 * sprintf("%#x", -123) #=> "0x..f85" 00311 * sprintf("%#x", 0) #=> "0" 00312 * 00313 * # `#' for `X' uses the prefix `0X'. 00314 * sprintf("%X", 123) #=> "7B" 00315 * sprintf("%#X", 123) #=> "0X7B" 00316 * 00317 * # `#' flag for `b' add a prefix `0b' for non-zero numbers. 00318 * # `+' and space flag disables complements for negative numbers. 00319 * sprintf("%b", 123) #=> "1111011" 00320 * sprintf("%#b", 123) #=> "0b1111011" 00321 * sprintf("%+b", -123) #=> "-1111011" 00322 * sprintf("%b", -123) #=> "..10000101" 00323 * sprintf("%#b", -123) #=> "0b..10000101" 00324 * sprintf("%#b", 0) #=> "0" 00325 * 00326 * # `#' for `B' uses the prefix `0B'. 00327 * sprintf("%B", 123) #=> "1111011" 00328 * sprintf("%#B", 123) #=> "0B1111011" 00329 * 00330 * # `#' for `e' forces to show the decimal point. 00331 * sprintf("%.0e", 1) #=> "1e+00" 00332 * sprintf("%#.0e", 1) #=> "1.e+00" 00333 * 00334 * # `#' for `f' forces to show the decimal point. 00335 * sprintf("%.0f", 1234) #=> "1234" 00336 * sprintf("%#.0f", 1234) #=> "1234." 00337 * 00338 * # `#' for `g' forces to show the decimal point. 00339 * # It also disables stripping lowest zeros. 00340 * sprintf("%g", 123.4) #=> "123.4" 00341 * sprintf("%#g", 123.4) #=> "123.400" 00342 * sprintf("%g", 123456) #=> "123456" 00343 * sprintf("%#g", 123456) #=> "123456." 00344 * 00345 * The field width is an optional integer, followed optionally by a 00346 * period and a precision. The width specifies the minimum number of 00347 * characters that will be written to the result for this field. 00348 * 00349 * Examples of width: 00350 * 00351 * # padding is done by spaces, width=20 00352 * # 0 or radix-1. <------------------> 00353 * sprintf("%20d", 123) #=> " 123" 00354 * sprintf("%+20d", 123) #=> " +123" 00355 * sprintf("%020d", 123) #=> "00000000000000000123" 00356 * sprintf("%+020d", 123) #=> "+0000000000000000123" 00357 * sprintf("% 020d", 123) #=> " 0000000000000000123" 00358 * sprintf("%-20d", 123) #=> "123 " 00359 * sprintf("%-+20d", 123) #=> "+123 " 00360 * sprintf("%- 20d", 123) #=> " 123 " 00361 * sprintf("%020x", -123) #=> "..ffffffffffffffff85" 00362 * 00363 * For 00364 * numeric fields, the precision controls the number of decimal places 00365 * displayed. For string fields, the precision determines the maximum 00366 * number of characters to be copied from the string. (Thus, the format 00367 * sequence <code>%10.10s</code> will always contribute exactly ten 00368 * characters to the result.) 00369 * 00370 * Examples of precisions: 00371 * 00372 * # precision for `d', 'o', 'x' and 'b' is 00373 * # minimum number of digits <------> 00374 * sprintf("%20.8d", 123) #=> " 00000123" 00375 * sprintf("%20.8o", 123) #=> " 00000173" 00376 * sprintf("%20.8x", 123) #=> " 0000007b" 00377 * sprintf("%20.8b", 123) #=> " 01111011" 00378 * sprintf("%20.8d", -123) #=> " -00000123" 00379 * sprintf("%20.8o", -123) #=> " ..777605" 00380 * sprintf("%20.8x", -123) #=> " ..ffff85" 00381 * sprintf("%20.8b", -11) #=> " ..110101" 00382 * 00383 * # "0x" and "0b" for `#x' and `#b' is not counted for 00384 * # precision but "0" for `#o' is counted. <------> 00385 * sprintf("%#20.8d", 123) #=> " 00000123" 00386 * sprintf("%#20.8o", 123) #=> " 00000173" 00387 * sprintf("%#20.8x", 123) #=> " 0x0000007b" 00388 * sprintf("%#20.8b", 123) #=> " 0b01111011" 00389 * sprintf("%#20.8d", -123) #=> " -00000123" 00390 * sprintf("%#20.8o", -123) #=> " ..777605" 00391 * sprintf("%#20.8x", -123) #=> " 0x..ffff85" 00392 * sprintf("%#20.8b", -11) #=> " 0b..110101" 00393 * 00394 * # precision for `e' is number of 00395 * # digits after the decimal point <------> 00396 * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03" 00397 * 00398 * # precision for `f' is number of 00399 * # digits after the decimal point <------> 00400 * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000" 00401 * 00402 * # precision for `g' is number of 00403 * # significant digits <-------> 00404 * sprintf("%20.8g", 1234.56789) #=> " 1234.5679" 00405 * 00406 * # <-------> 00407 * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08" 00408 * 00409 * # precision for `s' is 00410 * # maximum number of characters <------> 00411 * sprintf("%20.8s", "string test") #=> " string t" 00412 * 00413 * Examples: 00414 * 00415 * sprintf("%d %04x", 123, 123) #=> "123 007b" 00416 * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" 00417 * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" 00418 * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" 00419 * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" 00420 * sprintf("%u", -123) #=> "-123" 00421 * 00422 * For more complex formatting, Ruby supports a reference by name. 00423 * %<name>s style uses format style, but %{name} style doesn't. 00424 * 00425 * Exapmles: 00426 * sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 }) 00427 * #=> 1 : 2.000000 00428 * sprintf("%{foo}f", { :foo => 1 }) 00429 * # => "1f" 00430 */ 00431 00432 VALUE 00433 rb_f_sprintf(int argc, const VALUE *argv) 00434 { 00435 return rb_str_format(argc - 1, argv + 1, GETNTHARG(0)); 00436 } 00437 00438 VALUE 00439 rb_str_format(int argc, const VALUE *argv, VALUE fmt) 00440 { 00441 rb_encoding *enc; 00442 const char *p, *end; 00443 char *buf; 00444 long blen, bsiz; 00445 VALUE result; 00446 00447 long scanned = 0; 00448 int coderange = ENC_CODERANGE_7BIT; 00449 int width, prec, flags = FNONE; 00450 int nextarg = 1; 00451 int posarg = 0; 00452 int tainted = 0; 00453 VALUE nextvalue; 00454 VALUE tmp; 00455 VALUE str; 00456 volatile VALUE hash = Qundef; 00457 00458 #define CHECK_FOR_WIDTH(f) \ 00459 if ((f) & FWIDTH) { \ 00460 rb_raise(rb_eArgError, "width given twice"); \ 00461 } \ 00462 if ((f) & FPREC0) { \ 00463 rb_raise(rb_eArgError, "width after precision"); \ 00464 } 00465 #define CHECK_FOR_FLAGS(f) \ 00466 if ((f) & FWIDTH) { \ 00467 rb_raise(rb_eArgError, "flag after width"); \ 00468 } \ 00469 if ((f) & FPREC0) { \ 00470 rb_raise(rb_eArgError, "flag after precision"); \ 00471 } 00472 00473 ++argc; 00474 --argv; 00475 if (OBJ_TAINTED(fmt)) tainted = 1; 00476 StringValue(fmt); 00477 enc = rb_enc_get(fmt); 00478 fmt = rb_str_new4(fmt); 00479 p = RSTRING_PTR(fmt); 00480 end = p + RSTRING_LEN(fmt); 00481 blen = 0; 00482 bsiz = 120; 00483 result = rb_str_buf_new(bsiz); 00484 rb_enc_copy(result, fmt); 00485 buf = RSTRING_PTR(result); 00486 memset(buf, 0, bsiz); 00487 ENC_CODERANGE_SET(result, coderange); 00488 00489 for (; p < end; p++) { 00490 const char *t; 00491 int n; 00492 ID id = 0; 00493 00494 for (t = p; t < end && *t != '%'; t++) ; 00495 PUSH(p, t - p); 00496 if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { 00497 scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); 00498 ENC_CODERANGE_SET(result, coderange); 00499 } 00500 if (t >= end) { 00501 /* end of fmt string */ 00502 goto sprint_exit; 00503 } 00504 p = t + 1; /* skip `%' */ 00505 00506 width = prec = -1; 00507 nextvalue = Qundef; 00508 retry: 00509 switch (*p) { 00510 default: 00511 if (rb_enc_isprint(*p, enc)) 00512 rb_raise(rb_eArgError, "malformed format string - %%%c", *p); 00513 else 00514 rb_raise(rb_eArgError, "malformed format string"); 00515 break; 00516 00517 case ' ': 00518 CHECK_FOR_FLAGS(flags); 00519 flags |= FSPACE; 00520 p++; 00521 goto retry; 00522 00523 case '#': 00524 CHECK_FOR_FLAGS(flags); 00525 flags |= FSHARP; 00526 p++; 00527 goto retry; 00528 00529 case '+': 00530 CHECK_FOR_FLAGS(flags); 00531 flags |= FPLUS; 00532 p++; 00533 goto retry; 00534 00535 case '-': 00536 CHECK_FOR_FLAGS(flags); 00537 flags |= FMINUS; 00538 p++; 00539 goto retry; 00540 00541 case '0': 00542 CHECK_FOR_FLAGS(flags); 00543 flags |= FZERO; 00544 p++; 00545 goto retry; 00546 00547 case '1': case '2': case '3': case '4': 00548 case '5': case '6': case '7': case '8': case '9': 00549 n = 0; 00550 GETNUM(n, width); 00551 if (*p == '$') { 00552 if (nextvalue != Qundef) { 00553 rb_raise(rb_eArgError, "value given twice - %d$", n); 00554 } 00555 nextvalue = GETPOSARG(n); 00556 p++; 00557 goto retry; 00558 } 00559 CHECK_FOR_WIDTH(flags); 00560 width = n; 00561 flags |= FWIDTH; 00562 goto retry; 00563 00564 case '<': 00565 case '{': 00566 { 00567 const char *start = p; 00568 char term = (*p == '<') ? '>' : '}'; 00569 int len; 00570 00571 for (; p < end && *p != term; ) { 00572 p += rb_enc_mbclen(p, end, enc); 00573 } 00574 if (p >= end) { 00575 rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); 00576 } 00577 #if SIZEOF_INT < SIZEOF_SIZE_T 00578 if ((size_t)(p - start) >= INT_MAX) { 00579 const int message_limit = 20; 00580 len = (int)(rb_enc_right_char_head(start, start + message_limit, p, enc) - start); 00581 rb_enc_raise(enc, rb_eArgError, 00582 "too long name (%"PRIdSIZE" bytes) - %.*s...%c", 00583 (size_t)(p - start - 2), len, start, term); 00584 } 00585 #endif 00586 len = (int)(p - start + 1); /* including parenthesis */ 00587 if (id) { 00588 rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>", 00589 len, start, rb_id2name(id)); 00590 } 00591 nextvalue = GETNAMEARG((id = rb_check_id_cstr(start + 1, 00592 len - 2 /* without parenthesis */, 00593 enc), 00594 ID2SYM(id)), 00595 start, len, enc); 00596 if (nextvalue == Qundef) { 00597 rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start); 00598 } 00599 if (term == '}') goto format_s; 00600 p++; 00601 goto retry; 00602 } 00603 00604 case '*': 00605 CHECK_FOR_WIDTH(flags); 00606 flags |= FWIDTH; 00607 GETASTER(width); 00608 if (width < 0) { 00609 flags |= FMINUS; 00610 width = -width; 00611 } 00612 p++; 00613 goto retry; 00614 00615 case '.': 00616 if (flags & FPREC0) { 00617 rb_raise(rb_eArgError, "precision given twice"); 00618 } 00619 flags |= FPREC|FPREC0; 00620 00621 prec = 0; 00622 p++; 00623 if (*p == '*') { 00624 GETASTER(prec); 00625 if (prec < 0) { /* ignore negative precision */ 00626 flags &= ~FPREC; 00627 } 00628 p++; 00629 goto retry; 00630 } 00631 00632 GETNUM(prec, precision); 00633 goto retry; 00634 00635 case '\n': 00636 case '\0': 00637 p--; 00638 case '%': 00639 if (flags != FNONE) { 00640 rb_raise(rb_eArgError, "invalid format character - %%"); 00641 } 00642 PUSH("%", 1); 00643 break; 00644 00645 case 'c': 00646 { 00647 VALUE val = GETARG(); 00648 VALUE tmp; 00649 unsigned int c; 00650 int n; 00651 00652 tmp = rb_check_string_type(val); 00653 if (!NIL_P(tmp)) { 00654 if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { 00655 rb_raise(rb_eArgError, "%%c requires a character"); 00656 } 00657 c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); 00658 RB_GC_GUARD(tmp); 00659 } 00660 else { 00661 c = NUM2INT(val); 00662 n = rb_enc_codelen(c, enc); 00663 } 00664 if (n <= 0) { 00665 rb_raise(rb_eArgError, "invalid character"); 00666 } 00667 if (!(flags & FWIDTH)) { 00668 CHECK(n); 00669 rb_enc_mbcput(c, &buf[blen], enc); 00670 blen += n; 00671 } 00672 else if ((flags & FMINUS)) { 00673 CHECK(n); 00674 rb_enc_mbcput(c, &buf[blen], enc); 00675 blen += n; 00676 FILL(' ', width-1); 00677 } 00678 else { 00679 FILL(' ', width-1); 00680 CHECK(n); 00681 rb_enc_mbcput(c, &buf[blen], enc); 00682 blen += n; 00683 } 00684 } 00685 break; 00686 00687 case 's': 00688 case 'p': 00689 format_s: 00690 { 00691 VALUE arg = GETARG(); 00692 long len, slen; 00693 00694 if (*p == 'p') arg = rb_inspect(arg); 00695 str = rb_obj_as_string(arg); 00696 if (OBJ_TAINTED(str)) tainted = 1; 00697 len = RSTRING_LEN(str); 00698 rb_str_set_len(result, blen); 00699 if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { 00700 int cr = coderange; 00701 scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); 00702 ENC_CODERANGE_SET(result, 00703 (cr == ENC_CODERANGE_UNKNOWN ? 00704 ENC_CODERANGE_BROKEN : (coderange = cr))); 00705 } 00706 enc = rb_enc_check(result, str); 00707 if (flags&(FPREC|FWIDTH)) { 00708 slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); 00709 if (slen < 0) { 00710 rb_raise(rb_eArgError, "invalid mbstring sequence"); 00711 } 00712 if ((flags&FPREC) && (prec < slen)) { 00713 char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str), 00714 prec, enc); 00715 slen = prec; 00716 len = p - RSTRING_PTR(str); 00717 } 00718 /* need to adjust multi-byte string pos */ 00719 if ((flags&FWIDTH) && (width > slen)) { 00720 width -= (int)slen; 00721 if (!(flags&FMINUS)) { 00722 CHECK(width); 00723 while (width--) { 00724 buf[blen++] = ' '; 00725 } 00726 } 00727 CHECK(len); 00728 memcpy(&buf[blen], RSTRING_PTR(str), len); 00729 RB_GC_GUARD(str); 00730 blen += len; 00731 if (flags&FMINUS) { 00732 CHECK(width); 00733 while (width--) { 00734 buf[blen++] = ' '; 00735 } 00736 } 00737 rb_enc_associate(result, enc); 00738 break; 00739 } 00740 } 00741 PUSH(RSTRING_PTR(str), len); 00742 RB_GC_GUARD(str); 00743 rb_enc_associate(result, enc); 00744 } 00745 break; 00746 00747 case 'd': 00748 case 'i': 00749 case 'o': 00750 case 'x': 00751 case 'X': 00752 case 'b': 00753 case 'B': 00754 case 'u': 00755 { 00756 volatile VALUE val = GETARG(); 00757 char fbuf[32], nbuf[64], *s; 00758 const char *prefix = 0; 00759 int sign = 0, dots = 0; 00760 char sc = 0; 00761 long v = 0; 00762 int base, bignum = 0; 00763 int len; 00764 00765 switch (*p) { 00766 case 'd': 00767 case 'i': 00768 case 'u': 00769 sign = 1; break; 00770 case 'o': 00771 case 'x': 00772 case 'X': 00773 case 'b': 00774 case 'B': 00775 if (flags&(FPLUS|FSPACE)) sign = 1; 00776 break; 00777 } 00778 if (flags & FSHARP) { 00779 switch (*p) { 00780 case 'o': 00781 prefix = "0"; break; 00782 case 'x': 00783 prefix = "0x"; break; 00784 case 'X': 00785 prefix = "0X"; break; 00786 case 'b': 00787 prefix = "0b"; break; 00788 case 'B': 00789 prefix = "0B"; break; 00790 } 00791 } 00792 00793 bin_retry: 00794 switch (TYPE(val)) { 00795 case T_FLOAT: 00796 if (FIXABLE(RFLOAT_VALUE(val))) { 00797 val = LONG2FIX((long)RFLOAT_VALUE(val)); 00798 goto bin_retry; 00799 } 00800 val = rb_dbl2big(RFLOAT_VALUE(val)); 00801 if (FIXNUM_P(val)) goto bin_retry; 00802 bignum = 1; 00803 break; 00804 case T_STRING: 00805 val = rb_str_to_inum(val, 0, TRUE); 00806 goto bin_retry; 00807 case T_BIGNUM: 00808 bignum = 1; 00809 break; 00810 case T_FIXNUM: 00811 v = FIX2LONG(val); 00812 break; 00813 default: 00814 val = rb_Integer(val); 00815 goto bin_retry; 00816 } 00817 00818 switch (*p) { 00819 case 'o': 00820 base = 8; break; 00821 case 'x': 00822 case 'X': 00823 base = 16; break; 00824 case 'b': 00825 case 'B': 00826 base = 2; break; 00827 case 'u': 00828 case 'd': 00829 case 'i': 00830 default: 00831 base = 10; break; 00832 } 00833 00834 if (!bignum) { 00835 if (base == 2) { 00836 val = rb_int2big(v); 00837 goto bin_retry; 00838 } 00839 if (sign) { 00840 char c = *p; 00841 if (c == 'i') c = 'd'; /* %d and %i are identical */ 00842 if (v < 0) { 00843 v = -v; 00844 sc = '-'; 00845 width--; 00846 } 00847 else if (flags & FPLUS) { 00848 sc = '+'; 00849 width--; 00850 } 00851 else if (flags & FSPACE) { 00852 sc = ' '; 00853 width--; 00854 } 00855 snprintf(fbuf, sizeof(fbuf), "%%l%c", c); 00856 snprintf(nbuf, sizeof(nbuf), fbuf, v); 00857 s = nbuf; 00858 } 00859 else { 00860 s = nbuf; 00861 if (v < 0) { 00862 dots = 1; 00863 } 00864 snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p); 00865 snprintf(++s, sizeof(nbuf) - 1, fbuf, v); 00866 if (v < 0) { 00867 char d = 0; 00868 00869 s = remove_sign_bits(s, base); 00870 switch (base) { 00871 case 16: 00872 d = 'f'; break; 00873 case 8: 00874 d = '7'; break; 00875 } 00876 if (d && *s != d) { 00877 *--s = d; 00878 } 00879 } 00880 } 00881 len = (int)strlen(s); 00882 } 00883 else { 00884 if (sign) { 00885 tmp = rb_big2str(val, base); 00886 s = RSTRING_PTR(tmp); 00887 if (s[0] == '-') { 00888 s++; 00889 sc = '-'; 00890 width--; 00891 } 00892 else if (flags & FPLUS) { 00893 sc = '+'; 00894 width--; 00895 } 00896 else if (flags & FSPACE) { 00897 sc = ' '; 00898 width--; 00899 } 00900 } 00901 else { 00902 if (!RBIGNUM_SIGN(val)) { 00903 val = rb_big_clone(val); 00904 rb_big_2comp(val); 00905 } 00906 tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val)); 00907 s = RSTRING_PTR(tmp); 00908 if (*s == '-') { 00909 dots = 1; 00910 if (base == 10) { 00911 rb_warning("negative number for %%u specifier"); 00912 } 00913 s = remove_sign_bits(++s, base); 00914 switch (base) { 00915 case 16: 00916 if (s[0] != 'f') *--s = 'f'; break; 00917 case 8: 00918 if (s[0] != '7') *--s = '7'; break; 00919 case 2: 00920 if (s[0] != '1') *--s = '1'; break; 00921 } 00922 } 00923 } 00924 len = rb_long2int(RSTRING_END(tmp) - s); 00925 } 00926 00927 if (dots) { 00928 prec -= 2; 00929 width -= 2; 00930 } 00931 00932 if (*p == 'X') { 00933 char *pp = s; 00934 int c; 00935 while ((c = (int)(unsigned char)*pp) != 0) { 00936 *pp = rb_enc_toupper(c, enc); 00937 pp++; 00938 } 00939 } 00940 if (prefix && !prefix[1]) { /* octal */ 00941 if (dots) { 00942 prefix = 0; 00943 } 00944 else if (len == 1 && *s == '0') { 00945 len = 0; 00946 if (flags & FPREC) prec--; 00947 } 00948 else if ((flags & FPREC) && (prec > len)) { 00949 prefix = 0; 00950 } 00951 } 00952 else if (len == 1 && *s == '0') { 00953 prefix = 0; 00954 } 00955 if (prefix) { 00956 width -= (int)strlen(prefix); 00957 } 00958 if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { 00959 prec = width; 00960 width = 0; 00961 } 00962 else { 00963 if (prec < len) { 00964 if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0; 00965 prec = len; 00966 } 00967 width -= prec; 00968 } 00969 if (!(flags&FMINUS)) { 00970 CHECK(width); 00971 while (width-- > 0) { 00972 buf[blen++] = ' '; 00973 } 00974 } 00975 if (sc) PUSH(&sc, 1); 00976 if (prefix) { 00977 int plen = (int)strlen(prefix); 00978 PUSH(prefix, plen); 00979 } 00980 CHECK(prec - len); 00981 if (dots) PUSH("..", 2); 00982 if (!bignum && v < 0) { 00983 char c = sign_bits(base, p); 00984 while (len < prec--) { 00985 buf[blen++] = c; 00986 } 00987 } 00988 else if ((flags & (FMINUS|FPREC)) != FMINUS) { 00989 char c; 00990 00991 if (!sign && bignum && !RBIGNUM_SIGN(val)) 00992 c = sign_bits(base, p); 00993 else 00994 c = '0'; 00995 while (len < prec--) { 00996 buf[blen++] = c; 00997 } 00998 } 00999 PUSH(s, len); 01000 RB_GC_GUARD(tmp); 01001 CHECK(width); 01002 while (width-- > 0) { 01003 buf[blen++] = ' '; 01004 } 01005 } 01006 break; 01007 01008 case 'f': 01009 case 'g': 01010 case 'G': 01011 case 'e': 01012 case 'E': 01013 case 'a': 01014 case 'A': 01015 { 01016 VALUE val = GETARG(); 01017 double fval; 01018 int i, need = 6; 01019 char fbuf[32]; 01020 01021 fval = RFLOAT_VALUE(rb_Float(val)); 01022 if (isnan(fval) || isinf(fval)) { 01023 const char *expr; 01024 01025 if (isnan(fval)) { 01026 expr = "NaN"; 01027 } 01028 else { 01029 expr = "Inf"; 01030 } 01031 need = (int)strlen(expr); 01032 if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) 01033 need++; 01034 if ((flags & FWIDTH) && need < width) 01035 need = width; 01036 01037 CHECK(need + 1); 01038 snprintf(&buf[blen], need + 1, "%*s", need, ""); 01039 if (flags & FMINUS) { 01040 if (!isnan(fval) && fval < 0.0) 01041 buf[blen++] = '-'; 01042 else if (flags & FPLUS) 01043 buf[blen++] = '+'; 01044 else if (flags & FSPACE) 01045 blen++; 01046 memcpy(&buf[blen], expr, strlen(expr)); 01047 } 01048 else { 01049 if (!isnan(fval) && fval < 0.0) 01050 buf[blen + need - strlen(expr) - 1] = '-'; 01051 else if (flags & FPLUS) 01052 buf[blen + need - strlen(expr) - 1] = '+'; 01053 else if ((flags & FSPACE) && need > width) 01054 blen++; 01055 memcpy(&buf[blen + need - strlen(expr)], expr, 01056 strlen(expr)); 01057 } 01058 blen += strlen(&buf[blen]); 01059 break; 01060 } 01061 01062 fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec); 01063 need = 0; 01064 if (*p != 'e' && *p != 'E') { 01065 i = INT_MIN; 01066 frexp(fval, &i); 01067 if (i > 0) 01068 need = BIT_DIGITS(i); 01069 } 01070 need += (flags&FPREC) ? prec : 6; 01071 if ((flags&FWIDTH) && need < width) 01072 need = width; 01073 need += 20; 01074 01075 CHECK(need); 01076 snprintf(&buf[blen], need, fbuf, fval); 01077 blen += strlen(&buf[blen]); 01078 } 01079 break; 01080 } 01081 flags = FNONE; 01082 } 01083 01084 sprint_exit: 01085 RB_GC_GUARD(fmt); 01086 /* XXX - We cannot validate the number of arguments if (digit)$ style used. 01087 */ 01088 if (posarg >= 0 && nextarg < argc) { 01089 const char *mesg = "too many arguments for format string"; 01090 if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg); 01091 if (RTEST(ruby_verbose)) rb_warn("%s", mesg); 01092 } 01093 rb_str_resize(result, blen); 01094 01095 if (tainted) OBJ_TAINT(result); 01096 return result; 01097 } 01098 01099 static void 01100 fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec) 01101 { 01102 char *end = buf + size; 01103 *buf++ = '%'; 01104 if (flags & FSHARP) *buf++ = '#'; 01105 if (flags & FPLUS) *buf++ = '+'; 01106 if (flags & FMINUS) *buf++ = '-'; 01107 if (flags & FZERO) *buf++ = '0'; 01108 if (flags & FSPACE) *buf++ = ' '; 01109 01110 if (flags & FWIDTH) { 01111 snprintf(buf, end - buf, "%d", width); 01112 buf += strlen(buf); 01113 } 01114 01115 if (flags & FPREC) { 01116 snprintf(buf, end - buf, ".%d", prec); 01117 buf += strlen(buf); 01118 } 01119 01120 *buf++ = c; 01121 *buf = '\0'; 01122 } 01123 01124 #undef FILE 01125 #define FILE rb_printf_buffer 01126 #define __sbuf rb_printf_sbuf 01127 #define __sFILE rb_printf_sfile 01128 #undef feof 01129 #undef ferror 01130 #undef clearerr 01131 #undef fileno 01132 #if SIZEOF_LONG < SIZEOF_VOIDP 01133 # if SIZEOF_LONG_LONG == SIZEOF_VOIDP 01134 # define _HAVE_SANE_QUAD_ 01135 # define _HAVE_LLP64_ 01136 # define quad_t LONG_LONG 01137 # define u_quad_t unsigned LONG_LONG 01138 # endif 01139 #elif SIZEOF_LONG != SIZEOF_LONG_LONG && SIZEOF_LONG_LONG == 8 01140 # define _HAVE_SANE_QUAD_ 01141 # define quad_t LONG_LONG 01142 # define u_quad_t unsigned LONG_LONG 01143 #endif 01144 #define FLOATING_POINT 1 01145 #define BSD__dtoa ruby_dtoa 01146 #define BSD__hdtoa ruby_hdtoa 01147 #include "vsnprintf.c" 01148 01149 typedef struct { 01150 rb_printf_buffer base; 01151 volatile VALUE value; 01152 } rb_printf_buffer_extra; 01153 01154 static int 01155 ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio) 01156 { 01157 struct __siov *iov; 01158 VALUE result = (VALUE)fp->_bf._base; 01159 char *buf = (char*)fp->_p; 01160 size_t len, n; 01161 size_t blen = buf - RSTRING_PTR(result), bsiz = fp->_w; 01162 01163 if (RBASIC(result)->klass) { 01164 rb_raise(rb_eRuntimeError, "rb_vsprintf reentered"); 01165 } 01166 if ((len = uio->uio_resid) == 0) 01167 return 0; 01168 CHECK(len); 01169 buf += blen; 01170 fp->_w = bsiz; 01171 for (iov = uio->uio_iov; len > 0; ++iov) { 01172 MEMCPY(buf, iov->iov_base, char, n = iov->iov_len); 01173 buf += n; 01174 len -= n; 01175 } 01176 fp->_p = (unsigned char *)buf; 01177 rb_str_set_len(result, buf - RSTRING_PTR(result)); 01178 return 0; 01179 } 01180 01181 static char * 01182 ruby__sfvextra(rb_printf_buffer *fp, size_t valsize, void *valp, long *sz, int sign) 01183 { 01184 VALUE value, result = (VALUE)fp->_bf._base; 01185 rb_encoding *enc; 01186 char *cp; 01187 01188 if (valsize != sizeof(VALUE)) return 0; 01189 value = *(VALUE *)valp; 01190 if (RBASIC(result)->klass) { 01191 rb_raise(rb_eRuntimeError, "rb_vsprintf reentered"); 01192 } 01193 if (sign == '+') { 01194 value = rb_inspect(value); 01195 } 01196 else { 01197 value = rb_obj_as_string(value); 01198 } 01199 enc = rb_enc_compatible(result, value); 01200 if (enc) { 01201 rb_enc_associate(result, enc); 01202 } 01203 else { 01204 enc = rb_enc_get(result); 01205 value = rb_str_conv_enc_opts(value, rb_enc_get(value), enc, 01206 ECONV_UNDEF_REPLACE|ECONV_INVALID_REPLACE, 01207 Qnil); 01208 *(volatile VALUE *)valp = value; 01209 } 01210 StringValueCStr(value); 01211 RSTRING_GETMEM(value, cp, *sz); 01212 ((rb_printf_buffer_extra *)fp)->value = value; 01213 OBJ_INFECT(result, value); 01214 return cp; 01215 } 01216 01217 VALUE 01218 rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap) 01219 { 01220 rb_printf_buffer_extra buffer; 01221 #define f buffer.base 01222 VALUE result; 01223 01224 f._flags = __SWR | __SSTR; 01225 f._bf._size = 0; 01226 f._w = 120; 01227 result = rb_str_buf_new(f._w); 01228 if (enc) { 01229 if (rb_enc_mbminlen(enc) > 1) { 01230 /* the implementation deeply depends on plain char */ 01231 rb_raise(rb_eArgError, "cannot construct wchar_t based encoding string: %s", 01232 rb_enc_name(enc)); 01233 } 01234 rb_enc_associate(result, enc); 01235 } 01236 f._bf._base = (unsigned char *)result; 01237 f._p = (unsigned char *)RSTRING_PTR(result); 01238 RBASIC(result)->klass = 0; 01239 f.vwrite = ruby__sfvwrite; 01240 f.vextra = ruby__sfvextra; 01241 buffer.value = 0; 01242 BSD_vfprintf(&f, fmt, ap); 01243 RBASIC(result)->klass = rb_cString; 01244 rb_str_resize(result, (char *)f._p - RSTRING_PTR(result)); 01245 #undef f 01246 01247 return result; 01248 } 01249 01250 VALUE 01251 rb_enc_sprintf(rb_encoding *enc, const char *format, ...) 01252 { 01253 VALUE result; 01254 va_list ap; 01255 01256 va_start(ap, format); 01257 result = rb_enc_vsprintf(enc, format, ap); 01258 va_end(ap); 01259 01260 return result; 01261 } 01262 01263 VALUE 01264 rb_vsprintf(const char *fmt, va_list ap) 01265 { 01266 return rb_enc_vsprintf(NULL, fmt, ap); 01267 } 01268 01269 VALUE 01270 rb_sprintf(const char *format, ...) 01271 { 01272 VALUE result; 01273 va_list ap; 01274 01275 va_start(ap, format); 01276 result = rb_vsprintf(format, ap); 01277 va_end(ap); 01278 01279 return result; 01280 } 01281 01282 VALUE 01283 rb_str_vcatf(VALUE str, const char *fmt, va_list ap) 01284 { 01285 rb_printf_buffer_extra buffer; 01286 #define f buffer.base 01287 VALUE klass; 01288 01289 StringValue(str); 01290 rb_str_modify(str); 01291 f._flags = __SWR | __SSTR; 01292 f._bf._size = 0; 01293 f._w = rb_str_capacity(str); 01294 f._bf._base = (unsigned char *)str; 01295 f._p = (unsigned char *)RSTRING_END(str); 01296 klass = RBASIC(str)->klass; 01297 RBASIC(str)->klass = 0; 01298 f.vwrite = ruby__sfvwrite; 01299 f.vextra = ruby__sfvextra; 01300 buffer.value = 0; 01301 BSD_vfprintf(&f, fmt, ap); 01302 RBASIC(str)->klass = klass; 01303 rb_str_resize(str, (char *)f._p - RSTRING_PTR(str)); 01304 #undef f 01305 01306 return str; 01307 } 01308 01309 VALUE 01310 rb_str_catf(VALUE str, const char *format, ...) 01311 { 01312 va_list ap; 01313 01314 va_start(ap, format); 01315 str = rb_str_vcatf(str, format, ap); 01316 va_end(ap); 01317 01318 return str; 01319 } 01320