Ruby
2.0.0p247(2013-06-27revision41674)
|
00001 /********************************************************************** 00002 00003 pack.c - 00004 00005 $Author: eregon $ 00006 created at: Thu Feb 10 15:17:05 JST 1994 00007 00008 Copyright (C) 1993-2007 Yukihiro Matsumoto 00009 00010 **********************************************************************/ 00011 00012 #include "ruby/ruby.h" 00013 #include "ruby/encoding.h" 00014 #include <sys/types.h> 00015 #include <ctype.h> 00016 #include <errno.h> 00017 00018 #define GCC_VERSION_SINCE(major, minor, patchlevel) \ 00019 (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \ 00020 ((__GNUC__ > (major)) || \ 00021 (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \ 00022 (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel)))) 00023 #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 00024 # define NATINT_PACK 00025 #endif 00026 00027 #ifdef DYNAMIC_ENDIAN 00028 /* for universal binary of NEXTSTEP and MacOS X */ 00029 /* useless since autoconf 2.63? */ 00030 static int 00031 is_bigendian(void) 00032 { 00033 static int init = 0; 00034 static int endian_value; 00035 char *p; 00036 00037 if (init) return endian_value; 00038 init = 1; 00039 p = (char*)&init; 00040 return endian_value = p[0]?0:1; 00041 } 00042 # define BIGENDIAN_P() (is_bigendian()) 00043 #elif defined(WORDS_BIGENDIAN) 00044 # define BIGENDIAN_P() 1 00045 #else 00046 # define BIGENDIAN_P() 0 00047 #endif 00048 00049 #ifdef NATINT_PACK 00050 # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len)) 00051 #else 00052 # define NATINT_LEN(type,len) ((int)sizeof(type)) 00053 #endif 00054 00055 #if SIZEOF_LONG == 8 00056 # define INT64toNUM(x) LONG2NUM(x) 00057 # define UINT64toNUM(x) ULONG2NUM(x) 00058 #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8 00059 # define INT64toNUM(x) LL2NUM(x) 00060 # define UINT64toNUM(x) ULL2NUM(x) 00061 #endif 00062 00063 #define define_swapx(x, xtype) \ 00064 static xtype \ 00065 TOKEN_PASTE(swap,x)(xtype z) \ 00066 { \ 00067 xtype r; \ 00068 xtype *zp; \ 00069 unsigned char *s, *t; \ 00070 int i; \ 00071 \ 00072 zp = xmalloc(sizeof(xtype)); \ 00073 *zp = z; \ 00074 s = (unsigned char*)zp; \ 00075 t = xmalloc(sizeof(xtype)); \ 00076 for (i=0; i<sizeof(xtype); i++) { \ 00077 t[sizeof(xtype)-i-1] = s[i]; \ 00078 } \ 00079 r = *(xtype *)t; \ 00080 xfree(t); \ 00081 xfree(zp); \ 00082 return r; \ 00083 } 00084 00085 #if GCC_VERSION_SINCE(4,3,0) 00086 # define swap32(x) __builtin_bswap32(x) 00087 # define swap64(x) __builtin_bswap64(x) 00088 #endif 00089 00090 #ifndef swap16 00091 # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF))) 00092 #endif 00093 00094 #ifndef swap32 00095 # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \ 00096 |(((x)>>24)&0xFF) \ 00097 |(((x)&0x0000FF00)<<8) \ 00098 |(((x)&0x00FF0000)>>8) )) 00099 #endif 00100 00101 #ifndef swap64 00102 # ifdef HAVE_INT64_T 00103 # define byte_in_64bit(n) ((uint64_t)0xff << (n)) 00104 # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \ 00105 |(((x)>>56)&0xFF) \ 00106 |(((x)&byte_in_64bit(8))<<40) \ 00107 |(((x)&byte_in_64bit(48))>>40) \ 00108 |(((x)&byte_in_64bit(16))<<24) \ 00109 |(((x)&byte_in_64bit(40))>>24) \ 00110 |(((x)&byte_in_64bit(24))<<8) \ 00111 |(((x)&byte_in_64bit(32))>>8))) 00112 # endif 00113 #endif 00114 00115 #if SIZEOF_SHORT == 2 00116 # define swaps(x) swap16(x) 00117 #elif SIZEOF_SHORT == 4 00118 # define swaps(x) swap32(x) 00119 #else 00120 define_swapx(s,short) 00121 #endif 00122 00123 #if SIZEOF_INT == 2 00124 # define swapi(x) swap16(x) 00125 #elif SIZEOF_INT == 4 00126 # define swapi(x) swap32(x) 00127 #else 00128 define_swapx(i,int) 00129 #endif 00130 00131 #if SIZEOF_LONG == 4 00132 # define swapl(x) swap32(x) 00133 #elif SIZEOF_LONG == 8 00134 # define swapl(x) swap64(x) 00135 #else 00136 define_swapx(l,long) 00137 #endif 00138 00139 #ifdef HAVE_LONG_LONG 00140 # if SIZEOF_LONG_LONG == 8 00141 # define swapll(x) swap64(x) 00142 # else 00143 define_swapx(ll,LONG_LONG) 00144 # endif 00145 #endif 00146 00147 #if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T) 00148 # define swapf(x) swap32(x) 00149 # define FLOAT_SWAPPER uint32_t 00150 #else 00151 define_swapx(f,float) 00152 #endif 00153 00154 #if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T) 00155 # define swapd(x) swap64(x) 00156 # define DOUBLE_SWAPPER uint64_t 00157 #elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T) 00158 static double 00159 swapd(const double d) 00160 { 00161 double dtmp = d; 00162 uint32_t utmp[2]; 00163 uint32_t utmp0; 00164 00165 utmp[0] = 0; utmp[1] = 0; 00166 memcpy(utmp,&dtmp,sizeof(double)); 00167 utmp0 = utmp[0]; 00168 utmp[0] = swap32(utmp[1]); 00169 utmp[1] = swap32(utmp0); 00170 memcpy(&dtmp,utmp,sizeof(double)); 00171 return dtmp; 00172 } 00173 #else 00174 define_swapx(d, double) 00175 #endif 00176 00177 #undef define_swapx 00178 00179 #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x)) 00180 #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x)) 00181 #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x)) 00182 #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x)) 00183 #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x)) 00184 #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x)) 00185 #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x)) 00186 #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x)) 00187 00188 #ifdef FLOAT_SWAPPER 00189 # define FLOAT_CONVWITH(y) FLOAT_SWAPPER y; 00190 # define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 00191 (y) = rb_htonf((FLOAT_SWAPPER)(y)), \ 00192 memcpy(&(x),&(y),sizeof(float)), \ 00193 (x)) 00194 # define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 00195 (y) = rb_htovf((FLOAT_SWAPPER)(y)), \ 00196 memcpy(&(x),&(y),sizeof(float)), \ 00197 (x)) 00198 # define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 00199 (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \ 00200 memcpy(&(x),&(y),sizeof(float)), \ 00201 (x)) 00202 # define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 00203 (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \ 00204 memcpy(&(x),&(y),sizeof(float)), \ 00205 (x)) 00206 #else 00207 # define FLOAT_CONVWITH(y) 00208 # define HTONF(x,y) rb_htonf(x) 00209 # define HTOVF(x,y) rb_htovf(x) 00210 # define NTOHF(x,y) rb_ntohf(x) 00211 # define VTOHF(x,y) rb_vtohf(x) 00212 #endif 00213 00214 #ifdef DOUBLE_SWAPPER 00215 # define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y; 00216 # define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 00217 (y) = rb_htond((DOUBLE_SWAPPER)(y)), \ 00218 memcpy(&(x),&(y),sizeof(double)), \ 00219 (x)) 00220 # define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 00221 (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \ 00222 memcpy(&(x),&(y),sizeof(double)), \ 00223 (x)) 00224 # define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 00225 (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \ 00226 memcpy(&(x),&(y),sizeof(double)), \ 00227 (x)) 00228 # define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 00229 (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \ 00230 memcpy(&(x),&(y),sizeof(double)), \ 00231 (x)) 00232 #else 00233 # define DOUBLE_CONVWITH(y) 00234 # define HTOND(x,y) rb_htond(x) 00235 # define HTOVD(x,y) rb_htovd(x) 00236 # define NTOHD(x,y) rb_ntohd(x) 00237 # define VTOHD(x,y) rb_vtohd(x) 00238 #endif 00239 00240 static unsigned long 00241 num2i32(VALUE x) 00242 { 00243 x = rb_to_int(x); /* is nil OK? (should not) */ 00244 00245 if (FIXNUM_P(x)) return FIX2LONG(x); 00246 if (RB_TYPE_P(x, T_BIGNUM)) { 00247 return rb_big2ulong_pack(x); 00248 } 00249 rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x)); 00250 00251 UNREACHABLE; 00252 } 00253 00254 #define MAX_INTEGER_PACK_SIZE 8 00255 /* #define FORCE_BIG_PACK */ 00256 00257 static const char toofew[] = "too few arguments"; 00258 00259 static void encodes(VALUE,const char*,long,int,int); 00260 static void qpencode(VALUE,VALUE,long); 00261 00262 static unsigned long utf8_to_uv(const char*,long*); 00263 00264 /* 00265 * call-seq: 00266 * arr.pack ( aTemplateString ) -> aBinaryString 00267 * 00268 * Packs the contents of <i>arr</i> into a binary sequence according to 00269 * the directives in <i>aTemplateString</i> (see the table below) 00270 * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, 00271 * which gives the width of the resulting field. The remaining 00272 * directives also may take a count, indicating the number of array 00273 * elements to convert. If the count is an asterisk 00274 * (``<code>*</code>''), all remaining array elements will be 00275 * converted. Any of the directives ``<code>sSiIlL</code>'' may be 00276 * followed by an underscore (``<code>_</code>'') or 00277 * exclamation mark (``<code>!</code>'') to use the underlying 00278 * platform's native size for the specified type; otherwise, they use a 00279 * platform-independent size. Spaces are ignored in the template 00280 * string. See also <code>String#unpack</code>. 00281 * 00282 * a = [ "a", "b", "c" ] 00283 * n = [ 65, 66, 67 ] 00284 * a.pack("A3A3A3") #=> "a b c " 00285 * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000" 00286 * n.pack("ccc") #=> "ABC" 00287 * 00288 * Directives for +pack+. 00289 * 00290 * Integer | Array | 00291 * Directive | Element | Meaning 00292 * --------------------------------------------------------------------------- 00293 * C | Integer | 8-bit unsigned (unsigned char) 00294 * S | Integer | 16-bit unsigned, native endian (uint16_t) 00295 * L | Integer | 32-bit unsigned, native endian (uint32_t) 00296 * Q | Integer | 64-bit unsigned, native endian (uint64_t) 00297 * | | 00298 * c | Integer | 8-bit signed (signed char) 00299 * s | Integer | 16-bit signed, native endian (int16_t) 00300 * l | Integer | 32-bit signed, native endian (int32_t) 00301 * q | Integer | 64-bit signed, native endian (int64_t) 00302 * | | 00303 * S_, S! | Integer | unsigned short, native endian 00304 * I, I_, I! | Integer | unsigned int, native endian 00305 * L_, L! | Integer | unsigned long, native endian 00306 * | | 00307 * s_, s! | Integer | signed short, native endian 00308 * i, i_, i! | Integer | signed int, native endian 00309 * l_, l! | Integer | signed long, native endian 00310 * | | 00311 * S> L> Q> | Integer | same as the directives without ">" except 00312 * s> l> q> | | big endian 00313 * S!> I!> | | (available since Ruby 1.9.3) 00314 * L!> | | "S>" is same as "n" 00315 * s!> i!> | | "L>" is same as "N" 00316 * l!> | | 00317 * | | 00318 * S< L< Q< | Integer | same as the directives without "<" except 00319 * s< l< q< | | little endian 00320 * S!< I!< | | (available since Ruby 1.9.3) 00321 * L!< | | "S<" is same as "v" 00322 * s!< i!< | | "L<" is same as "V" 00323 * l!< | | 00324 * | | 00325 * n | Integer | 16-bit unsigned, network (big-endian) byte order 00326 * N | Integer | 32-bit unsigned, network (big-endian) byte order 00327 * v | Integer | 16-bit unsigned, VAX (little-endian) byte order 00328 * V | Integer | 32-bit unsigned, VAX (little-endian) byte order 00329 * | | 00330 * U | Integer | UTF-8 character 00331 * w | Integer | BER-compressed integer 00332 * 00333 * Float | | 00334 * Directive | | Meaning 00335 * --------------------------------------------------------------------------- 00336 * D, d | Float | double-precision, native format 00337 * F, f | Float | single-precision, native format 00338 * E | Float | double-precision, little-endian byte order 00339 * e | Float | single-precision, little-endian byte order 00340 * G | Float | double-precision, network (big-endian) byte order 00341 * g | Float | single-precision, network (big-endian) byte order 00342 * 00343 * String | | 00344 * Directive | | Meaning 00345 * --------------------------------------------------------------------------- 00346 * A | String | arbitrary binary string (space padded, count is width) 00347 * a | String | arbitrary binary string (null padded, count is width) 00348 * Z | String | same as ``a'', except that null is added with * 00349 * B | String | bit string (MSB first) 00350 * b | String | bit string (LSB first) 00351 * H | String | hex string (high nibble first) 00352 * h | String | hex string (low nibble first) 00353 * u | String | UU-encoded string 00354 * M | String | quoted printable, MIME encoding (see RFC2045) 00355 * m | String | base64 encoded string (see RFC 2045, count is width) 00356 * | | (if count is 0, no line feed are added, see RFC 4648) 00357 * P | String | pointer to a structure (fixed-length string) 00358 * p | String | pointer to a null-terminated string 00359 * 00360 * Misc. | | 00361 * Directive | | Meaning 00362 * --------------------------------------------------------------------------- 00363 * @ | --- | moves to absolute position 00364 * X | --- | back up a byte 00365 * x | --- | null byte 00366 */ 00367 00368 static VALUE 00369 pack_pack(VALUE ary, VALUE fmt) 00370 { 00371 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0"; 00372 static const char spc10[] = " "; 00373 const char *p, *pend; 00374 VALUE res, from, associates = 0; 00375 char type; 00376 long items, len, idx, plen; 00377 const char *ptr; 00378 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ 00379 #ifdef NATINT_PACK 00380 int natint; /* native integer */ 00381 #endif 00382 int integer_size, bigendian_p; 00383 00384 StringValue(fmt); 00385 p = RSTRING_PTR(fmt); 00386 pend = p + RSTRING_LEN(fmt); 00387 res = rb_str_buf_new(0); 00388 00389 items = RARRAY_LEN(ary); 00390 idx = 0; 00391 00392 #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0) 00393 #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW) 00394 #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW) 00395 00396 while (p < pend) { 00397 int explicit_endian = 0; 00398 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { 00399 rb_raise(rb_eRuntimeError, "format string modified"); 00400 } 00401 type = *p++; /* get data type */ 00402 #ifdef NATINT_PACK 00403 natint = 0; 00404 #endif 00405 00406 if (ISSPACE(type)) continue; 00407 if (type == '#') { 00408 while ((p < pend) && (*p != '\n')) { 00409 p++; 00410 } 00411 continue; 00412 } 00413 00414 { 00415 static const char natstr[] = "sSiIlL"; 00416 static const char endstr[] = "sSiIlLqQ"; 00417 00418 modifiers: 00419 switch (*p) { 00420 case '_': 00421 case '!': 00422 if (strchr(natstr, type)) { 00423 #ifdef NATINT_PACK 00424 natint = 1; 00425 #endif 00426 p++; 00427 } 00428 else { 00429 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); 00430 } 00431 goto modifiers; 00432 00433 case '<': 00434 case '>': 00435 if (!strchr(endstr, type)) { 00436 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); 00437 } 00438 if (explicit_endian) { 00439 rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); 00440 } 00441 explicit_endian = *p++; 00442 goto modifiers; 00443 } 00444 } 00445 00446 if (*p == '*') { /* set data length */ 00447 len = strchr("@Xxu", type) ? 0 00448 : strchr("PMm", type) ? 1 00449 : items; 00450 p++; 00451 } 00452 else if (ISDIGIT(*p)) { 00453 errno = 0; 00454 len = STRTOUL(p, (char**)&p, 10); 00455 if (errno) { 00456 rb_raise(rb_eRangeError, "pack length too big"); 00457 } 00458 } 00459 else { 00460 len = 1; 00461 } 00462 00463 switch (type) { 00464 case 'U': 00465 /* if encoding is US-ASCII, upgrade to UTF-8 */ 00466 if (enc_info == 1) enc_info = 2; 00467 break; 00468 case 'm': case 'M': case 'u': 00469 /* keep US-ASCII (do nothing) */ 00470 break; 00471 default: 00472 /* fall back to BINARY */ 00473 enc_info = 0; 00474 break; 00475 } 00476 switch (type) { 00477 case 'A': case 'a': case 'Z': 00478 case 'B': case 'b': 00479 case 'H': case 'h': 00480 from = NEXTFROM; 00481 if (NIL_P(from)) { 00482 ptr = ""; 00483 plen = 0; 00484 } 00485 else { 00486 StringValue(from); 00487 ptr = RSTRING_PTR(from); 00488 plen = RSTRING_LEN(from); 00489 OBJ_INFECT(res, from); 00490 } 00491 00492 if (p[-1] == '*') 00493 len = plen; 00494 00495 switch (type) { 00496 case 'a': /* arbitrary binary string (null padded) */ 00497 case 'A': /* arbitrary binary string (ASCII space padded) */ 00498 case 'Z': /* null terminated string */ 00499 if (plen >= len) { 00500 rb_str_buf_cat(res, ptr, len); 00501 if (p[-1] == '*' && type == 'Z') 00502 rb_str_buf_cat(res, nul10, 1); 00503 } 00504 else { 00505 rb_str_buf_cat(res, ptr, plen); 00506 len -= plen; 00507 while (len >= 10) { 00508 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); 00509 len -= 10; 00510 } 00511 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); 00512 } 00513 break; 00514 00515 #define castchar(from) (char)((from) & 0xff) 00516 00517 case 'b': /* bit string (ascending) */ 00518 { 00519 int byte = 0; 00520 long i, j = 0; 00521 00522 if (len > plen) { 00523 j = (len - plen + 1)/2; 00524 len = plen; 00525 } 00526 for (i=0; i++ < len; ptr++) { 00527 if (*ptr & 1) 00528 byte |= 128; 00529 if (i & 7) 00530 byte >>= 1; 00531 else { 00532 char c = castchar(byte); 00533 rb_str_buf_cat(res, &c, 1); 00534 byte = 0; 00535 } 00536 } 00537 if (len & 7) { 00538 char c; 00539 byte >>= 7 - (len & 7); 00540 c = castchar(byte); 00541 rb_str_buf_cat(res, &c, 1); 00542 } 00543 len = j; 00544 goto grow; 00545 } 00546 break; 00547 00548 case 'B': /* bit string (descending) */ 00549 { 00550 int byte = 0; 00551 long i, j = 0; 00552 00553 if (len > plen) { 00554 j = (len - plen + 1)/2; 00555 len = plen; 00556 } 00557 for (i=0; i++ < len; ptr++) { 00558 byte |= *ptr & 1; 00559 if (i & 7) 00560 byte <<= 1; 00561 else { 00562 char c = castchar(byte); 00563 rb_str_buf_cat(res, &c, 1); 00564 byte = 0; 00565 } 00566 } 00567 if (len & 7) { 00568 char c; 00569 byte <<= 7 - (len & 7); 00570 c = castchar(byte); 00571 rb_str_buf_cat(res, &c, 1); 00572 } 00573 len = j; 00574 goto grow; 00575 } 00576 break; 00577 00578 case 'h': /* hex string (low nibble first) */ 00579 { 00580 int byte = 0; 00581 long i, j = 0; 00582 00583 if (len > plen) { 00584 j = (len + 1) / 2 - (plen + 1) / 2; 00585 len = plen; 00586 } 00587 for (i=0; i++ < len; ptr++) { 00588 if (ISALPHA(*ptr)) 00589 byte |= (((*ptr & 15) + 9) & 15) << 4; 00590 else 00591 byte |= (*ptr & 15) << 4; 00592 if (i & 1) 00593 byte >>= 4; 00594 else { 00595 char c = castchar(byte); 00596 rb_str_buf_cat(res, &c, 1); 00597 byte = 0; 00598 } 00599 } 00600 if (len & 1) { 00601 char c = castchar(byte); 00602 rb_str_buf_cat(res, &c, 1); 00603 } 00604 len = j; 00605 goto grow; 00606 } 00607 break; 00608 00609 case 'H': /* hex string (high nibble first) */ 00610 { 00611 int byte = 0; 00612 long i, j = 0; 00613 00614 if (len > plen) { 00615 j = (len + 1) / 2 - (plen + 1) / 2; 00616 len = plen; 00617 } 00618 for (i=0; i++ < len; ptr++) { 00619 if (ISALPHA(*ptr)) 00620 byte |= ((*ptr & 15) + 9) & 15; 00621 else 00622 byte |= *ptr & 15; 00623 if (i & 1) 00624 byte <<= 4; 00625 else { 00626 char c = castchar(byte); 00627 rb_str_buf_cat(res, &c, 1); 00628 byte = 0; 00629 } 00630 } 00631 if (len & 1) { 00632 char c = castchar(byte); 00633 rb_str_buf_cat(res, &c, 1); 00634 } 00635 len = j; 00636 goto grow; 00637 } 00638 break; 00639 } 00640 break; 00641 00642 case 'c': /* signed char */ 00643 case 'C': /* unsigned char */ 00644 while (len-- > 0) { 00645 char c; 00646 00647 from = NEXTFROM; 00648 c = (char)num2i32(from); 00649 rb_str_buf_cat(res, &c, sizeof(char)); 00650 } 00651 break; 00652 00653 case 's': /* signed short */ 00654 integer_size = NATINT_LEN(short, 2); 00655 bigendian_p = BIGENDIAN_P(); 00656 goto pack_integer; 00657 00658 case 'S': /* unsigned short */ 00659 integer_size = NATINT_LEN(short, 2); 00660 bigendian_p = BIGENDIAN_P(); 00661 goto pack_integer; 00662 00663 case 'i': /* signed int */ 00664 integer_size = (int)sizeof(int); 00665 bigendian_p = BIGENDIAN_P(); 00666 goto pack_integer; 00667 00668 case 'I': /* unsigned int */ 00669 integer_size = (int)sizeof(int); 00670 bigendian_p = BIGENDIAN_P(); 00671 goto pack_integer; 00672 00673 case 'l': /* signed long */ 00674 integer_size = NATINT_LEN(long, 4); 00675 bigendian_p = BIGENDIAN_P(); 00676 goto pack_integer; 00677 00678 case 'L': /* unsigned long */ 00679 integer_size = NATINT_LEN(long, 4); 00680 bigendian_p = BIGENDIAN_P(); 00681 goto pack_integer; 00682 00683 case 'q': /* signed quad (64bit) int */ 00684 integer_size = 8; 00685 bigendian_p = BIGENDIAN_P(); 00686 goto pack_integer; 00687 00688 case 'Q': /* unsigned quad (64bit) int */ 00689 integer_size = 8; 00690 bigendian_p = BIGENDIAN_P(); 00691 goto pack_integer; 00692 00693 case 'n': /* unsigned short (network byte-order) */ 00694 integer_size = 2; 00695 bigendian_p = 1; 00696 goto pack_integer; 00697 00698 case 'N': /* unsigned long (network byte-order) */ 00699 integer_size = 4; 00700 bigendian_p = 1; 00701 goto pack_integer; 00702 00703 case 'v': /* unsigned short (VAX byte-order) */ 00704 integer_size = 2; 00705 bigendian_p = 0; 00706 goto pack_integer; 00707 00708 case 'V': /* unsigned long (VAX byte-order) */ 00709 integer_size = 4; 00710 bigendian_p = 0; 00711 goto pack_integer; 00712 00713 pack_integer: 00714 if (explicit_endian) { 00715 bigendian_p = explicit_endian == '>'; 00716 } 00717 00718 switch (integer_size) { 00719 #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK) 00720 case SIZEOF_INT16_T: 00721 while (len-- > 0) { 00722 union { 00723 int16_t i; 00724 char a[sizeof(int16_t)]; 00725 } v; 00726 00727 from = NEXTFROM; 00728 v.i = (int16_t)num2i32(from); 00729 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 00730 rb_str_buf_cat(res, v.a, sizeof(int16_t)); 00731 } 00732 break; 00733 #endif 00734 00735 #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK) 00736 case SIZEOF_INT32_T: 00737 while (len-- > 0) { 00738 union { 00739 int32_t i; 00740 char a[sizeof(int32_t)]; 00741 } v; 00742 00743 from = NEXTFROM; 00744 v.i = (int32_t)num2i32(from); 00745 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 00746 rb_str_buf_cat(res, v.a, sizeof(int32_t)); 00747 } 00748 break; 00749 #endif 00750 00751 #if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK) 00752 case SIZEOF_INT64_T: 00753 while (len-- > 0) { 00754 union { 00755 int64_t i; 00756 char a[sizeof(int64_t)]; 00757 } v; 00758 00759 from = NEXTFROM; 00760 v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */ 00761 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 00762 rb_str_buf_cat(res, v.a, sizeof(int64_t)); 00763 } 00764 break; 00765 #endif 00766 00767 default: 00768 if (integer_size > MAX_INTEGER_PACK_SIZE) 00769 rb_bug("unexpected intger size for pack: %d", integer_size); 00770 while (len-- > 0) { 00771 union { 00772 unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG]; 00773 char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG]; 00774 } v; 00775 int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG; 00776 int i; 00777 00778 from = NEXTFROM; 00779 rb_big_pack(from, v.i, num_longs); 00780 if (bigendian_p) { 00781 for (i = 0; i < num_longs/2; i++) { 00782 unsigned long t = v.i[i]; 00783 v.i[i] = v.i[num_longs-1-i]; 00784 v.i[num_longs-1-i] = t; 00785 } 00786 } 00787 if (bigendian_p != BIGENDIAN_P()) { 00788 for (i = 0; i < num_longs; i++) 00789 v.i[i] = swapl(v.i[i]); 00790 } 00791 rb_str_buf_cat(res, 00792 bigendian_p ? 00793 v.a + sizeof(long)*num_longs - integer_size : 00794 v.a, 00795 integer_size); 00796 } 00797 break; 00798 } 00799 break; 00800 00801 case 'f': /* single precision float in native format */ 00802 case 'F': /* ditto */ 00803 while (len-- > 0) { 00804 float f; 00805 00806 from = NEXTFROM; 00807 f = (float)RFLOAT_VALUE(rb_to_float(from)); 00808 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 00809 } 00810 break; 00811 00812 case 'e': /* single precision float in VAX byte-order */ 00813 while (len-- > 0) { 00814 float f; 00815 FLOAT_CONVWITH(ftmp); 00816 00817 from = NEXTFROM; 00818 f = (float)RFLOAT_VALUE(rb_to_float(from)); 00819 f = HTOVF(f,ftmp); 00820 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 00821 } 00822 break; 00823 00824 case 'E': /* double precision float in VAX byte-order */ 00825 while (len-- > 0) { 00826 double d; 00827 DOUBLE_CONVWITH(dtmp); 00828 00829 from = NEXTFROM; 00830 d = RFLOAT_VALUE(rb_to_float(from)); 00831 d = HTOVD(d,dtmp); 00832 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 00833 } 00834 break; 00835 00836 case 'd': /* double precision float in native format */ 00837 case 'D': /* ditto */ 00838 while (len-- > 0) { 00839 double d; 00840 00841 from = NEXTFROM; 00842 d = RFLOAT_VALUE(rb_to_float(from)); 00843 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 00844 } 00845 break; 00846 00847 case 'g': /* single precision float in network byte-order */ 00848 while (len-- > 0) { 00849 float f; 00850 FLOAT_CONVWITH(ftmp); 00851 00852 from = NEXTFROM; 00853 f = (float)RFLOAT_VALUE(rb_to_float(from)); 00854 f = HTONF(f,ftmp); 00855 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 00856 } 00857 break; 00858 00859 case 'G': /* double precision float in network byte-order */ 00860 while (len-- > 0) { 00861 double d; 00862 DOUBLE_CONVWITH(dtmp); 00863 00864 from = NEXTFROM; 00865 d = RFLOAT_VALUE(rb_to_float(from)); 00866 d = HTOND(d,dtmp); 00867 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 00868 } 00869 break; 00870 00871 case 'x': /* null byte */ 00872 grow: 00873 while (len >= 10) { 00874 rb_str_buf_cat(res, nul10, 10); 00875 len -= 10; 00876 } 00877 rb_str_buf_cat(res, nul10, len); 00878 break; 00879 00880 case 'X': /* back up byte */ 00881 shrink: 00882 plen = RSTRING_LEN(res); 00883 if (plen < len) 00884 rb_raise(rb_eArgError, "X outside of string"); 00885 rb_str_set_len(res, plen - len); 00886 break; 00887 00888 case '@': /* null fill to absolute position */ 00889 len -= RSTRING_LEN(res); 00890 if (len > 0) goto grow; 00891 len = -len; 00892 if (len > 0) goto shrink; 00893 break; 00894 00895 case '%': 00896 rb_raise(rb_eArgError, "%% is not supported"); 00897 break; 00898 00899 case 'U': /* Unicode character */ 00900 while (len-- > 0) { 00901 SIGNED_VALUE l; 00902 char buf[8]; 00903 int le; 00904 00905 from = NEXTFROM; 00906 from = rb_to_int(from); 00907 l = NUM2LONG(from); 00908 if (l < 0) { 00909 rb_raise(rb_eRangeError, "pack(U): value out of range"); 00910 } 00911 le = rb_uv_to_utf8(buf, l); 00912 rb_str_buf_cat(res, (char*)buf, le); 00913 } 00914 break; 00915 00916 case 'u': /* uuencoded string */ 00917 case 'm': /* base64 encoded string */ 00918 from = NEXTFROM; 00919 StringValue(from); 00920 ptr = RSTRING_PTR(from); 00921 plen = RSTRING_LEN(from); 00922 00923 if (len == 0 && type == 'm') { 00924 encodes(res, ptr, plen, type, 0); 00925 ptr += plen; 00926 break; 00927 } 00928 if (len <= 2) 00929 len = 45; 00930 else if (len > 63 && type == 'u') 00931 len = 63; 00932 else 00933 len = len / 3 * 3; 00934 while (plen > 0) { 00935 long todo; 00936 00937 if (plen > len) 00938 todo = len; 00939 else 00940 todo = plen; 00941 encodes(res, ptr, todo, type, 1); 00942 plen -= todo; 00943 ptr += todo; 00944 } 00945 break; 00946 00947 case 'M': /* quoted-printable encoded string */ 00948 from = rb_obj_as_string(NEXTFROM); 00949 if (len <= 1) 00950 len = 72; 00951 qpencode(res, from, len); 00952 break; 00953 00954 case 'P': /* pointer to packed byte string */ 00955 from = THISFROM; 00956 if (!NIL_P(from)) { 00957 StringValue(from); 00958 if (RSTRING_LEN(from) < len) { 00959 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", 00960 RSTRING_LEN(from), len); 00961 } 00962 } 00963 len = 1; 00964 /* FALL THROUGH */ 00965 case 'p': /* pointer to string */ 00966 while (len-- > 0) { 00967 char *t; 00968 from = NEXTFROM; 00969 if (NIL_P(from)) { 00970 t = 0; 00971 } 00972 else { 00973 t = StringValuePtr(from); 00974 } 00975 if (!associates) { 00976 associates = rb_ary_new(); 00977 } 00978 rb_ary_push(associates, from); 00979 rb_obj_taint(from); 00980 rb_str_buf_cat(res, (char*)&t, sizeof(char*)); 00981 } 00982 break; 00983 00984 case 'w': /* BER compressed integer */ 00985 while (len-- > 0) { 00986 unsigned long ul; 00987 VALUE buf = rb_str_new(0, 0); 00988 char c, *bufs, *bufe; 00989 00990 from = NEXTFROM; 00991 if (RB_TYPE_P(from, T_BIGNUM)) { 00992 VALUE big128 = rb_uint2big(128); 00993 while (RB_TYPE_P(from, T_BIGNUM)) { 00994 from = rb_big_divmod(from, big128); 00995 c = castchar(NUM2INT(RARRAY_PTR(from)[1]) | 0x80); /* mod */ 00996 rb_str_buf_cat(buf, &c, sizeof(char)); 00997 from = RARRAY_PTR(from)[0]; /* div */ 00998 } 00999 } 01000 01001 { 01002 long l = NUM2LONG(from); 01003 if (l < 0) { 01004 rb_raise(rb_eArgError, "can't compress negative numbers"); 01005 } 01006 ul = l; 01007 } 01008 01009 while (ul) { 01010 c = castchar((ul & 0x7f) | 0x80); 01011 rb_str_buf_cat(buf, &c, sizeof(char)); 01012 ul >>= 7; 01013 } 01014 01015 if (RSTRING_LEN(buf)) { 01016 bufs = RSTRING_PTR(buf); 01017 bufe = bufs + RSTRING_LEN(buf) - 1; 01018 *bufs &= 0x7f; /* clear continue bit */ 01019 while (bufs < bufe) { /* reverse */ 01020 c = *bufs; 01021 *bufs++ = *bufe; 01022 *bufe-- = c; 01023 } 01024 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); 01025 } 01026 else { 01027 c = 0; 01028 rb_str_buf_cat(res, &c, sizeof(char)); 01029 } 01030 } 01031 break; 01032 01033 default: 01034 rb_warning("unknown pack directive '%c' in '%s'", 01035 type, RSTRING_PTR(fmt)); 01036 break; 01037 } 01038 } 01039 01040 if (associates) { 01041 rb_str_associate(res, associates); 01042 } 01043 OBJ_INFECT(res, fmt); 01044 switch (enc_info) { 01045 case 1: 01046 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); 01047 break; 01048 case 2: 01049 rb_enc_set_index(res, rb_utf8_encindex()); 01050 break; 01051 default: 01052 /* do nothing, keep ASCII-8BIT */ 01053 break; 01054 } 01055 return res; 01056 } 01057 01058 static const char uu_table[] = 01059 "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; 01060 static const char b64_table[] = 01061 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 01062 01063 static void 01064 encodes(VALUE str, const char *s, long len, int type, int tail_lf) 01065 { 01066 char buff[4096]; 01067 long i = 0; 01068 const char *trans = type == 'u' ? uu_table : b64_table; 01069 char padding; 01070 01071 if (type == 'u') { 01072 buff[i++] = (char)len + ' '; 01073 padding = '`'; 01074 } 01075 else { 01076 padding = '='; 01077 } 01078 while (len >= 3) { 01079 while (len >= 3 && sizeof(buff)-i >= 4) { 01080 buff[i++] = trans[077 & (*s >> 2)]; 01081 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; 01082 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; 01083 buff[i++] = trans[077 & s[2]]; 01084 s += 3; 01085 len -= 3; 01086 } 01087 if (sizeof(buff)-i < 4) { 01088 rb_str_buf_cat(str, buff, i); 01089 i = 0; 01090 } 01091 } 01092 01093 if (len == 2) { 01094 buff[i++] = trans[077 & (*s >> 2)]; 01095 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; 01096 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; 01097 buff[i++] = padding; 01098 } 01099 else if (len == 1) { 01100 buff[i++] = trans[077 & (*s >> 2)]; 01101 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; 01102 buff[i++] = padding; 01103 buff[i++] = padding; 01104 } 01105 if (tail_lf) buff[i++] = '\n'; 01106 rb_str_buf_cat(str, buff, i); 01107 } 01108 01109 static const char hex_table[] = "0123456789ABCDEF"; 01110 01111 static void 01112 qpencode(VALUE str, VALUE from, long len) 01113 { 01114 char buff[1024]; 01115 long i = 0, n = 0, prev = EOF; 01116 unsigned char *s = (unsigned char*)RSTRING_PTR(from); 01117 unsigned char *send = s + RSTRING_LEN(from); 01118 01119 while (s < send) { 01120 if ((*s > 126) || 01121 (*s < 32 && *s != '\n' && *s != '\t') || 01122 (*s == '=')) { 01123 buff[i++] = '='; 01124 buff[i++] = hex_table[*s >> 4]; 01125 buff[i++] = hex_table[*s & 0x0f]; 01126 n += 3; 01127 prev = EOF; 01128 } 01129 else if (*s == '\n') { 01130 if (prev == ' ' || prev == '\t') { 01131 buff[i++] = '='; 01132 buff[i++] = *s; 01133 } 01134 buff[i++] = *s; 01135 n = 0; 01136 prev = *s; 01137 } 01138 else { 01139 buff[i++] = *s; 01140 n++; 01141 prev = *s; 01142 } 01143 if (n > len) { 01144 buff[i++] = '='; 01145 buff[i++] = '\n'; 01146 n = 0; 01147 prev = '\n'; 01148 } 01149 if (i > 1024 - 5) { 01150 rb_str_buf_cat(str, buff, i); 01151 i = 0; 01152 } 01153 s++; 01154 } 01155 if (n > 0) { 01156 buff[i++] = '='; 01157 buff[i++] = '\n'; 01158 } 01159 if (i > 0) { 01160 rb_str_buf_cat(str, buff, i); 01161 } 01162 } 01163 01164 static inline int 01165 hex2num(char c) 01166 { 01167 switch (c) { 01168 case '0': case '1': case '2': case '3': case '4': 01169 case '5': case '6': case '7': case '8': case '9': 01170 return c - '0'; 01171 case 'a': case 'b': case 'c': 01172 case 'd': case 'e': case 'f': 01173 return c - 'a' + 10; 01174 case 'A': case 'B': case 'C': 01175 case 'D': case 'E': case 'F': 01176 return c - 'A' + 10; 01177 default: 01178 return -1; 01179 } 01180 } 01181 01182 #define PACK_LENGTH_ADJUST_SIZE(sz) do { \ 01183 tmp_len = 0; \ 01184 if (len > (long)((send-s)/(sz))) { \ 01185 if (!star) { \ 01186 tmp_len = len-(send-s)/(sz); \ 01187 } \ 01188 len = (send-s)/(sz); \ 01189 } \ 01190 } while (0) 01191 01192 #define PACK_ITEM_ADJUST() do { \ 01193 if (tmp_len > 0 && !block_p) \ 01194 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ 01195 } while (0) 01196 01197 static VALUE 01198 infected_str_new(const char *ptr, long len, VALUE str) 01199 { 01200 VALUE s = rb_str_new(ptr, len); 01201 01202 OBJ_INFECT(s, str); 01203 return s; 01204 } 01205 01206 /* 01207 * call-seq: 01208 * str.unpack(format) -> anArray 01209 * 01210 * Decodes <i>str</i> (which may contain binary data) according to the 01211 * format string, returning an array of each value extracted. The 01212 * format string consists of a sequence of single-character directives, 01213 * summarized in the table at the end of this entry. 01214 * Each directive may be followed 01215 * by a number, indicating the number of times to repeat with this 01216 * directive. An asterisk (``<code>*</code>'') will use up all 01217 * remaining elements. The directives <code>sSiIlL</code> may each be 01218 * followed by an underscore (``<code>_</code>'') or 01219 * exclamation mark (``<code>!</code>'') to use the underlying 01220 * platform's native size for the specified type; otherwise, it uses a 01221 * platform-independent consistent size. Spaces are ignored in the 01222 * format string. See also <code>Array#pack</code>. 01223 * 01224 * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] 01225 * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] 01226 * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] 01227 * "aa".unpack('b8B8') #=> ["10000110", "01100001"] 01228 * "aaa".unpack('h2H2c') #=> ["16", "61", 97] 01229 * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] 01230 * "now=20is".unpack('M*') #=> ["now is"] 01231 * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] 01232 * 01233 * This table summarizes the various formats and the Ruby classes 01234 * returned by each. 01235 * 01236 * Integer | | 01237 * Directive | Returns | Meaning 01238 * ----------------------------------------------------------------- 01239 * C | Integer | 8-bit unsigned (unsigned char) 01240 * S | Integer | 16-bit unsigned, native endian (uint16_t) 01241 * L | Integer | 32-bit unsigned, native endian (uint32_t) 01242 * Q | Integer | 64-bit unsigned, native endian (uint64_t) 01243 * | | 01244 * c | Integer | 8-bit signed (signed char) 01245 * s | Integer | 16-bit signed, native endian (int16_t) 01246 * l | Integer | 32-bit signed, native endian (int32_t) 01247 * q | Integer | 64-bit signed, native endian (int64_t) 01248 * | | 01249 * S_, S! | Integer | unsigned short, native endian 01250 * I, I_, I! | Integer | unsigned int, native endian 01251 * L_, L! | Integer | unsigned long, native endian 01252 * | | 01253 * s_, s! | Integer | signed short, native endian 01254 * i, i_, i! | Integer | signed int, native endian 01255 * l_, l! | Integer | signed long, native endian 01256 * | | 01257 * S> L> Q> | Integer | same as the directives without ">" except 01258 * s> l> q> | | big endian 01259 * S!> I!> | | (available since Ruby 1.9.3) 01260 * L!> Q!> | | "S>" is same as "n" 01261 * s!> i!> | | "L>" is same as "N" 01262 * l!> q!> | | 01263 * | | 01264 * S< L< Q< | Integer | same as the directives without "<" except 01265 * s< l< q< | | little endian 01266 * S!< I!< | | (available since Ruby 1.9.3) 01267 * L!< Q!< | | "S<" is same as "v" 01268 * s!< i!< | | "L<" is same as "V" 01269 * l!< q!< | | 01270 * | | 01271 * n | Integer | 16-bit unsigned, network (big-endian) byte order 01272 * N | Integer | 32-bit unsigned, network (big-endian) byte order 01273 * v | Integer | 16-bit unsigned, VAX (little-endian) byte order 01274 * V | Integer | 32-bit unsigned, VAX (little-endian) byte order 01275 * | | 01276 * U | Integer | UTF-8 character 01277 * w | Integer | BER-compressed integer (see Array.pack) 01278 * 01279 * Float | | 01280 * Directive | Returns | Meaning 01281 * ----------------------------------------------------------------- 01282 * D, d | Float | double-precision, native format 01283 * F, f | Float | single-precision, native format 01284 * E | Float | double-precision, little-endian byte order 01285 * e | Float | single-precision, little-endian byte order 01286 * G | Float | double-precision, network (big-endian) byte order 01287 * g | Float | single-precision, network (big-endian) byte order 01288 * 01289 * String | | 01290 * Directive | Returns | Meaning 01291 * ----------------------------------------------------------------- 01292 * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces) 01293 * a | String | arbitrary binary string 01294 * Z | String | null-terminated string 01295 * B | String | bit string (MSB first) 01296 * b | String | bit string (LSB first) 01297 * H | String | hex string (high nibble first) 01298 * h | String | hex string (low nibble first) 01299 * u | String | UU-encoded string 01300 * M | String | quoted-printable, MIME encoding (see RFC2045) 01301 * m | String | base64 encoded string (RFC 2045) (default) 01302 * | | base64 encoded string (RFC 4648) if followed by 0 01303 * P | String | pointer to a structure (fixed-length string) 01304 * p | String | pointer to a null-terminated string 01305 * 01306 * Misc. | | 01307 * Directive | Returns | Meaning 01308 * ----------------------------------------------------------------- 01309 * @ | --- | skip to the offset given by the length argument 01310 * X | --- | skip backward one byte 01311 * x | --- | skip forward one byte 01312 */ 01313 01314 static VALUE 01315 pack_unpack(VALUE str, VALUE fmt) 01316 { 01317 static const char hexdigits[] = "0123456789abcdef"; 01318 char *s, *send; 01319 char *p, *pend; 01320 VALUE ary; 01321 char type; 01322 long len, tmp_len; 01323 int star; 01324 #ifdef NATINT_PACK 01325 int natint; /* native integer */ 01326 #endif 01327 int block_p = rb_block_given_p(); 01328 int signed_p, integer_size, bigendian_p; 01329 #define UNPACK_PUSH(item) do {\ 01330 VALUE item_val = (item);\ 01331 if (block_p) {\ 01332 rb_yield(item_val);\ 01333 }\ 01334 else {\ 01335 rb_ary_push(ary, item_val);\ 01336 }\ 01337 } while (0) 01338 01339 StringValue(str); 01340 StringValue(fmt); 01341 s = RSTRING_PTR(str); 01342 send = s + RSTRING_LEN(str); 01343 p = RSTRING_PTR(fmt); 01344 pend = p + RSTRING_LEN(fmt); 01345 01346 ary = block_p ? Qnil : rb_ary_new(); 01347 while (p < pend) { 01348 int explicit_endian = 0; 01349 type = *p++; 01350 #ifdef NATINT_PACK 01351 natint = 0; 01352 #endif 01353 01354 if (ISSPACE(type)) continue; 01355 if (type == '#') { 01356 while ((p < pend) && (*p != '\n')) { 01357 p++; 01358 } 01359 continue; 01360 } 01361 01362 star = 0; 01363 { 01364 static const char natstr[] = "sSiIlL"; 01365 static const char endstr[] = "sSiIlLqQ"; 01366 01367 modifiers: 01368 switch (*p) { 01369 case '_': 01370 case '!': 01371 01372 if (strchr(natstr, type)) { 01373 #ifdef NATINT_PACK 01374 natint = 1; 01375 #endif 01376 p++; 01377 } 01378 else { 01379 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); 01380 } 01381 goto modifiers; 01382 01383 case '<': 01384 case '>': 01385 if (!strchr(endstr, type)) { 01386 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); 01387 } 01388 if (explicit_endian) { 01389 rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); 01390 } 01391 explicit_endian = *p++; 01392 goto modifiers; 01393 } 01394 } 01395 01396 if (p >= pend) 01397 len = 1; 01398 else if (*p == '*') { 01399 star = 1; 01400 len = send - s; 01401 p++; 01402 } 01403 else if (ISDIGIT(*p)) { 01404 errno = 0; 01405 len = STRTOUL(p, (char**)&p, 10); 01406 if (errno) { 01407 rb_raise(rb_eRangeError, "pack length too big"); 01408 } 01409 } 01410 else { 01411 len = (type != '@'); 01412 } 01413 01414 switch (type) { 01415 case '%': 01416 rb_raise(rb_eArgError, "%% is not supported"); 01417 break; 01418 01419 case 'A': 01420 if (len > send - s) len = send - s; 01421 { 01422 long end = len; 01423 char *t = s + len - 1; 01424 01425 while (t >= s) { 01426 if (*t != ' ' && *t != '\0') break; 01427 t--; len--; 01428 } 01429 UNPACK_PUSH(infected_str_new(s, len, str)); 01430 s += end; 01431 } 01432 break; 01433 01434 case 'Z': 01435 { 01436 char *t = s; 01437 01438 if (len > send-s) len = send-s; 01439 while (t < s+len && *t) t++; 01440 UNPACK_PUSH(infected_str_new(s, t-s, str)); 01441 if (t < send) t++; 01442 s = star ? t : s+len; 01443 } 01444 break; 01445 01446 case 'a': 01447 if (len > send - s) len = send - s; 01448 UNPACK_PUSH(infected_str_new(s, len, str)); 01449 s += len; 01450 break; 01451 01452 case 'b': 01453 { 01454 VALUE bitstr; 01455 char *t; 01456 int bits; 01457 long i; 01458 01459 if (p[-1] == '*' || len > (send - s) * 8) 01460 len = (send - s) * 8; 01461 bits = 0; 01462 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len)); 01463 t = RSTRING_PTR(bitstr); 01464 for (i=0; i<len; i++) { 01465 if (i & 7) bits >>= 1; 01466 else bits = *s++; 01467 *t++ = (bits & 1) ? '1' : '0'; 01468 } 01469 } 01470 break; 01471 01472 case 'B': 01473 { 01474 VALUE bitstr; 01475 char *t; 01476 int bits; 01477 long i; 01478 01479 if (p[-1] == '*' || len > (send - s) * 8) 01480 len = (send - s) * 8; 01481 bits = 0; 01482 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len)); 01483 t = RSTRING_PTR(bitstr); 01484 for (i=0; i<len; i++) { 01485 if (i & 7) bits <<= 1; 01486 else bits = *s++; 01487 *t++ = (bits & 128) ? '1' : '0'; 01488 } 01489 } 01490 break; 01491 01492 case 'h': 01493 { 01494 VALUE bitstr; 01495 char *t; 01496 int bits; 01497 long i; 01498 01499 if (p[-1] == '*' || len > (send - s) * 2) 01500 len = (send - s) * 2; 01501 bits = 0; 01502 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len)); 01503 t = RSTRING_PTR(bitstr); 01504 for (i=0; i<len; i++) { 01505 if (i & 1) 01506 bits >>= 4; 01507 else 01508 bits = *s++; 01509 *t++ = hexdigits[bits & 15]; 01510 } 01511 } 01512 break; 01513 01514 case 'H': 01515 { 01516 VALUE bitstr; 01517 char *t; 01518 int bits; 01519 long i; 01520 01521 if (p[-1] == '*' || len > (send - s) * 2) 01522 len = (send - s) * 2; 01523 bits = 0; 01524 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len)); 01525 t = RSTRING_PTR(bitstr); 01526 for (i=0; i<len; i++) { 01527 if (i & 1) 01528 bits <<= 4; 01529 else 01530 bits = *s++; 01531 *t++ = hexdigits[(bits >> 4) & 15]; 01532 } 01533 } 01534 break; 01535 01536 case 'c': 01537 PACK_LENGTH_ADJUST_SIZE(sizeof(char)); 01538 while (len-- > 0) { 01539 int c = *s++; 01540 if (c > (char)127) c-=256; 01541 UNPACK_PUSH(INT2FIX(c)); 01542 } 01543 PACK_ITEM_ADJUST(); 01544 break; 01545 01546 case 'C': 01547 PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char)); 01548 while (len-- > 0) { 01549 unsigned char c = *s++; 01550 UNPACK_PUSH(INT2FIX(c)); 01551 } 01552 PACK_ITEM_ADJUST(); 01553 break; 01554 01555 case 's': 01556 signed_p = 1; 01557 integer_size = NATINT_LEN(short, 2); 01558 bigendian_p = BIGENDIAN_P(); 01559 goto unpack_integer; 01560 01561 case 'S': 01562 signed_p = 0; 01563 integer_size = NATINT_LEN(short, 2); 01564 bigendian_p = BIGENDIAN_P(); 01565 goto unpack_integer; 01566 01567 case 'i': 01568 signed_p = 1; 01569 integer_size = (int)sizeof(int); 01570 bigendian_p = BIGENDIAN_P(); 01571 goto unpack_integer; 01572 01573 case 'I': 01574 signed_p = 0; 01575 integer_size = (int)sizeof(int); 01576 bigendian_p = BIGENDIAN_P(); 01577 goto unpack_integer; 01578 01579 case 'l': 01580 signed_p = 1; 01581 integer_size = NATINT_LEN(long, 4); 01582 bigendian_p = BIGENDIAN_P(); 01583 goto unpack_integer; 01584 01585 case 'L': 01586 signed_p = 0; 01587 integer_size = NATINT_LEN(long, 4); 01588 bigendian_p = BIGENDIAN_P(); 01589 goto unpack_integer; 01590 01591 case 'q': 01592 signed_p = 1; 01593 integer_size = 8; 01594 bigendian_p = BIGENDIAN_P(); 01595 goto unpack_integer; 01596 01597 case 'Q': 01598 signed_p = 0; 01599 integer_size = 8; 01600 bigendian_p = BIGENDIAN_P(); 01601 goto unpack_integer; 01602 01603 case 'n': 01604 signed_p = 0; 01605 integer_size = 2; 01606 bigendian_p = 1; 01607 goto unpack_integer; 01608 01609 case 'N': 01610 signed_p = 0; 01611 integer_size = 4; 01612 bigendian_p = 1; 01613 goto unpack_integer; 01614 01615 case 'v': 01616 signed_p = 0; 01617 integer_size = 2; 01618 bigendian_p = 0; 01619 goto unpack_integer; 01620 01621 case 'V': 01622 signed_p = 0; 01623 integer_size = 4; 01624 bigendian_p = 0; 01625 goto unpack_integer; 01626 01627 unpack_integer: 01628 if (explicit_endian) { 01629 bigendian_p = explicit_endian == '>'; 01630 } 01631 01632 switch (integer_size) { 01633 #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK) 01634 case SIZEOF_INT16_T: 01635 if (signed_p) { 01636 PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t)); 01637 while (len-- > 0) { 01638 union { 01639 int16_t i; 01640 char a[sizeof(int16_t)]; 01641 } v; 01642 memcpy(v.a, s, sizeof(int16_t)); 01643 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 01644 s += sizeof(int16_t); 01645 UNPACK_PUSH(INT2FIX(v.i)); 01646 } 01647 PACK_ITEM_ADJUST(); 01648 } 01649 else { 01650 PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t)); 01651 while (len-- > 0) { 01652 union { 01653 uint16_t i; 01654 char a[sizeof(uint16_t)]; 01655 } v; 01656 memcpy(v.a, s, sizeof(uint16_t)); 01657 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 01658 s += sizeof(uint16_t); 01659 UNPACK_PUSH(INT2FIX(v.i)); 01660 } 01661 PACK_ITEM_ADJUST(); 01662 } 01663 break; 01664 #endif 01665 01666 #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK) 01667 case SIZEOF_INT32_T: 01668 if (signed_p) { 01669 PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t)); 01670 while (len-- > 0) { 01671 union { 01672 int32_t i; 01673 char a[sizeof(int32_t)]; 01674 } v; 01675 memcpy(v.a, s, sizeof(int32_t)); 01676 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 01677 s += sizeof(int32_t); 01678 UNPACK_PUSH(INT2NUM(v.i)); 01679 } 01680 PACK_ITEM_ADJUST(); 01681 } 01682 else { 01683 PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t)); 01684 while (len-- > 0) { 01685 union { 01686 uint32_t i; 01687 char a[sizeof(uint32_t)]; 01688 } v; 01689 memcpy(v.a, s, sizeof(uint32_t)); 01690 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 01691 s += sizeof(uint32_t); 01692 UNPACK_PUSH(UINT2NUM(v.i)); 01693 } 01694 PACK_ITEM_ADJUST(); 01695 } 01696 break; 01697 #endif 01698 01699 #if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK) 01700 case SIZEOF_INT64_T: 01701 if (signed_p) { 01702 PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t)); 01703 while (len-- > 0) { 01704 union { 01705 int64_t i; 01706 char a[sizeof(int64_t)]; 01707 } v; 01708 memcpy(v.a, s, sizeof(int64_t)); 01709 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 01710 s += sizeof(int64_t); 01711 UNPACK_PUSH(INT64toNUM(v.i)); 01712 } 01713 PACK_ITEM_ADJUST(); 01714 } 01715 else { 01716 PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t)); 01717 while (len-- > 0) { 01718 union { 01719 uint64_t i; 01720 char a[sizeof(uint64_t)]; 01721 } v; 01722 memcpy(v.a, s, sizeof(uint64_t)); 01723 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 01724 s += sizeof(uint64_t); 01725 UNPACK_PUSH(UINT64toNUM(v.i)); 01726 } 01727 PACK_ITEM_ADJUST(); 01728 } 01729 break; 01730 #endif 01731 01732 default: 01733 if (integer_size > MAX_INTEGER_PACK_SIZE) 01734 rb_bug("unexpected integer size for pack: %d", integer_size); 01735 PACK_LENGTH_ADJUST_SIZE(integer_size); 01736 while (len-- > 0) { 01737 union { 01738 unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG]; 01739 char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG]; 01740 } v; 01741 int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG; 01742 int i; 01743 01744 if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0) 01745 memset(v.a, 0xff, sizeof(long)*num_longs); 01746 else 01747 memset(v.a, 0, sizeof(long)*num_longs); 01748 if (bigendian_p) 01749 memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size); 01750 else 01751 memcpy(v.a, s, integer_size); 01752 if (bigendian_p) { 01753 for (i = 0; i < num_longs/2; i++) { 01754 unsigned long t = v.i[i]; 01755 v.i[i] = v.i[num_longs-1-i]; 01756 v.i[num_longs-1-i] = t; 01757 } 01758 } 01759 if (bigendian_p != BIGENDIAN_P()) { 01760 for (i = 0; i < num_longs; i++) 01761 v.i[i] = swapl(v.i[i]); 01762 } 01763 s += integer_size; 01764 UNPACK_PUSH(rb_big_unpack(v.i, num_longs)); 01765 } 01766 PACK_ITEM_ADJUST(); 01767 break; 01768 } 01769 break; 01770 01771 case 'f': 01772 case 'F': 01773 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 01774 while (len-- > 0) { 01775 float tmp; 01776 memcpy(&tmp, s, sizeof(float)); 01777 s += sizeof(float); 01778 UNPACK_PUSH(DBL2NUM((double)tmp)); 01779 } 01780 PACK_ITEM_ADJUST(); 01781 break; 01782 01783 case 'e': 01784 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 01785 while (len-- > 0) { 01786 float tmp; 01787 FLOAT_CONVWITH(ftmp); 01788 01789 memcpy(&tmp, s, sizeof(float)); 01790 s += sizeof(float); 01791 tmp = VTOHF(tmp,ftmp); 01792 UNPACK_PUSH(DBL2NUM((double)tmp)); 01793 } 01794 PACK_ITEM_ADJUST(); 01795 break; 01796 01797 case 'E': 01798 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 01799 while (len-- > 0) { 01800 double tmp; 01801 DOUBLE_CONVWITH(dtmp); 01802 01803 memcpy(&tmp, s, sizeof(double)); 01804 s += sizeof(double); 01805 tmp = VTOHD(tmp,dtmp); 01806 UNPACK_PUSH(DBL2NUM(tmp)); 01807 } 01808 PACK_ITEM_ADJUST(); 01809 break; 01810 01811 case 'D': 01812 case 'd': 01813 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 01814 while (len-- > 0) { 01815 double tmp; 01816 memcpy(&tmp, s, sizeof(double)); 01817 s += sizeof(double); 01818 UNPACK_PUSH(DBL2NUM(tmp)); 01819 } 01820 PACK_ITEM_ADJUST(); 01821 break; 01822 01823 case 'g': 01824 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 01825 while (len-- > 0) { 01826 float tmp; 01827 FLOAT_CONVWITH(ftmp); 01828 01829 memcpy(&tmp, s, sizeof(float)); 01830 s += sizeof(float); 01831 tmp = NTOHF(tmp,ftmp); 01832 UNPACK_PUSH(DBL2NUM((double)tmp)); 01833 } 01834 PACK_ITEM_ADJUST(); 01835 break; 01836 01837 case 'G': 01838 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 01839 while (len-- > 0) { 01840 double tmp; 01841 DOUBLE_CONVWITH(dtmp); 01842 01843 memcpy(&tmp, s, sizeof(double)); 01844 s += sizeof(double); 01845 tmp = NTOHD(tmp,dtmp); 01846 UNPACK_PUSH(DBL2NUM(tmp)); 01847 } 01848 PACK_ITEM_ADJUST(); 01849 break; 01850 01851 case 'U': 01852 if (len > send - s) len = send - s; 01853 while (len > 0 && s < send) { 01854 long alen = send - s; 01855 unsigned long l; 01856 01857 l = utf8_to_uv(s, &alen); 01858 s += alen; len--; 01859 UNPACK_PUSH(ULONG2NUM(l)); 01860 } 01861 break; 01862 01863 case 'u': 01864 { 01865 VALUE buf = infected_str_new(0, (send - s)*3/4, str); 01866 char *ptr = RSTRING_PTR(buf); 01867 long total = 0; 01868 01869 while (s < send && *s > ' ' && *s < 'a') { 01870 long a,b,c,d; 01871 char hunk[4]; 01872 01873 hunk[3] = '\0'; 01874 len = (*s++ - ' ') & 077; 01875 total += len; 01876 if (total > RSTRING_LEN(buf)) { 01877 len -= total - RSTRING_LEN(buf); 01878 total = RSTRING_LEN(buf); 01879 } 01880 01881 while (len > 0) { 01882 long mlen = len > 3 ? 3 : len; 01883 01884 if (s < send && *s >= ' ') 01885 a = (*s++ - ' ') & 077; 01886 else 01887 a = 0; 01888 if (s < send && *s >= ' ') 01889 b = (*s++ - ' ') & 077; 01890 else 01891 b = 0; 01892 if (s < send && *s >= ' ') 01893 c = (*s++ - ' ') & 077; 01894 else 01895 c = 0; 01896 if (s < send && *s >= ' ') 01897 d = (*s++ - ' ') & 077; 01898 else 01899 d = 0; 01900 hunk[0] = (char)(a << 2 | b >> 4); 01901 hunk[1] = (char)(b << 4 | c >> 2); 01902 hunk[2] = (char)(c << 6 | d); 01903 memcpy(ptr, hunk, mlen); 01904 ptr += mlen; 01905 len -= mlen; 01906 } 01907 if (*s == '\r') s++; 01908 if (*s == '\n') s++; 01909 else if (s < send && (s+1 == send || s[1] == '\n')) 01910 s += 2; /* possible checksum byte */ 01911 } 01912 01913 rb_str_set_len(buf, total); 01914 UNPACK_PUSH(buf); 01915 } 01916 break; 01917 01918 case 'm': 01919 { 01920 VALUE buf = infected_str_new(0, (send - s)*3/4, str); 01921 char *ptr = RSTRING_PTR(buf); 01922 int a = -1,b = -1,c = 0,d = 0; 01923 static signed char b64_xtable[256]; 01924 01925 if (b64_xtable['/'] <= 0) { 01926 int i; 01927 01928 for (i = 0; i < 256; i++) { 01929 b64_xtable[i] = -1; 01930 } 01931 for (i = 0; i < 64; i++) { 01932 b64_xtable[(unsigned char)b64_table[i]] = (char)i; 01933 } 01934 } 01935 if (len == 0) { 01936 while (s < send) { 01937 a = b = c = d = -1; 01938 a = b64_xtable[(unsigned char)*s++]; 01939 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); 01940 b = b64_xtable[(unsigned char)*s++]; 01941 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); 01942 if (*s == '=') { 01943 if (s + 2 == send && *(s + 1) == '=') break; 01944 rb_raise(rb_eArgError, "invalid base64"); 01945 } 01946 c = b64_xtable[(unsigned char)*s++]; 01947 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); 01948 if (s + 1 == send && *s == '=') break; 01949 d = b64_xtable[(unsigned char)*s++]; 01950 if (d == -1) rb_raise(rb_eArgError, "invalid base64"); 01951 *ptr++ = castchar(a << 2 | b >> 4); 01952 *ptr++ = castchar(b << 4 | c >> 2); 01953 *ptr++ = castchar(c << 6 | d); 01954 } 01955 if (c == -1) { 01956 *ptr++ = castchar(a << 2 | b >> 4); 01957 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); 01958 } 01959 else if (d == -1) { 01960 *ptr++ = castchar(a << 2 | b >> 4); 01961 *ptr++ = castchar(b << 4 | c >> 2); 01962 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); 01963 } 01964 } 01965 else { 01966 while (s < send) { 01967 a = b = c = d = -1; 01968 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} 01969 if (s >= send) break; 01970 s++; 01971 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} 01972 if (s >= send) break; 01973 s++; 01974 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} 01975 if (*s == '=' || s >= send) break; 01976 s++; 01977 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} 01978 if (*s == '=' || s >= send) break; 01979 s++; 01980 *ptr++ = castchar(a << 2 | b >> 4); 01981 *ptr++ = castchar(b << 4 | c >> 2); 01982 *ptr++ = castchar(c << 6 | d); 01983 } 01984 if (a != -1 && b != -1) { 01985 if (c == -1 && *s == '=') 01986 *ptr++ = castchar(a << 2 | b >> 4); 01987 else if (c != -1 && *s == '=') { 01988 *ptr++ = castchar(a << 2 | b >> 4); 01989 *ptr++ = castchar(b << 4 | c >> 2); 01990 } 01991 } 01992 } 01993 rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); 01994 UNPACK_PUSH(buf); 01995 } 01996 break; 01997 01998 case 'M': 01999 { 02000 VALUE buf = infected_str_new(0, send - s, str); 02001 char *ptr = RSTRING_PTR(buf), *ss = s; 02002 int c1, c2; 02003 02004 while (s < send) { 02005 if (*s == '=') { 02006 if (++s == send) break; 02007 if (s+1 < send && *s == '\r' && *(s+1) == '\n') 02008 s++; 02009 if (*s != '\n') { 02010 if ((c1 = hex2num(*s)) == -1) break; 02011 if (++s == send) break; 02012 if ((c2 = hex2num(*s)) == -1) break; 02013 *ptr++ = castchar(c1 << 4 | c2); 02014 } 02015 } 02016 else { 02017 *ptr++ = *s; 02018 } 02019 s++; 02020 ss = s; 02021 } 02022 rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); 02023 rb_str_buf_cat(buf, ss, send-ss); 02024 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID); 02025 UNPACK_PUSH(buf); 02026 } 02027 break; 02028 02029 case '@': 02030 if (len > RSTRING_LEN(str)) 02031 rb_raise(rb_eArgError, "@ outside of string"); 02032 s = RSTRING_PTR(str) + len; 02033 break; 02034 02035 case 'X': 02036 if (len > s - RSTRING_PTR(str)) 02037 rb_raise(rb_eArgError, "X outside of string"); 02038 s -= len; 02039 break; 02040 02041 case 'x': 02042 if (len > send - s) 02043 rb_raise(rb_eArgError, "x outside of string"); 02044 s += len; 02045 break; 02046 02047 case 'P': 02048 if (sizeof(char *) <= (size_t)(send - s)) { 02049 VALUE tmp = Qnil; 02050 char *t; 02051 02052 memcpy(&t, s, sizeof(char *)); 02053 s += sizeof(char *); 02054 02055 if (t) { 02056 VALUE a, *p, *pend; 02057 02058 if (!(a = rb_str_associated(str))) { 02059 rb_raise(rb_eArgError, "no associated pointer"); 02060 } 02061 p = RARRAY_PTR(a); 02062 pend = p + RARRAY_LEN(a); 02063 while (p < pend) { 02064 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) { 02065 if (len < RSTRING_LEN(*p)) { 02066 tmp = rb_tainted_str_new(t, len); 02067 rb_str_associate(tmp, a); 02068 } 02069 else { 02070 tmp = *p; 02071 } 02072 break; 02073 } 02074 p++; 02075 } 02076 if (p == pend) { 02077 rb_raise(rb_eArgError, "non associated pointer"); 02078 } 02079 } 02080 UNPACK_PUSH(tmp); 02081 } 02082 break; 02083 02084 case 'p': 02085 if (len > (long)((send - s) / sizeof(char *))) 02086 len = (send - s) / sizeof(char *); 02087 while (len-- > 0) { 02088 if ((size_t)(send - s) < sizeof(char *)) 02089 break; 02090 else { 02091 VALUE tmp = Qnil; 02092 char *t; 02093 02094 memcpy(&t, s, sizeof(char *)); 02095 s += sizeof(char *); 02096 02097 if (t) { 02098 VALUE a, *p, *pend; 02099 02100 if (!(a = rb_str_associated(str))) { 02101 rb_raise(rb_eArgError, "no associated pointer"); 02102 } 02103 p = RARRAY_PTR(a); 02104 pend = p + RARRAY_LEN(a); 02105 while (p < pend) { 02106 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) { 02107 tmp = *p; 02108 break; 02109 } 02110 p++; 02111 } 02112 if (p == pend) { 02113 rb_raise(rb_eArgError, "non associated pointer"); 02114 } 02115 } 02116 UNPACK_PUSH(tmp); 02117 } 02118 } 02119 break; 02120 02121 case 'w': 02122 { 02123 unsigned long ul = 0; 02124 unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8); 02125 02126 while (len > 0 && s < send) { 02127 ul <<= 7; 02128 ul |= (*s & 0x7f); 02129 if (!(*s++ & 0x80)) { 02130 UNPACK_PUSH(ULONG2NUM(ul)); 02131 len--; 02132 ul = 0; 02133 } 02134 else if (ul & ulmask) { 02135 VALUE big = rb_uint2big(ul); 02136 VALUE big128 = rb_uint2big(128); 02137 while (s < send) { 02138 big = rb_big_mul(big, big128); 02139 big = rb_big_plus(big, rb_uint2big(*s & 0x7f)); 02140 if (!(*s++ & 0x80)) { 02141 UNPACK_PUSH(big); 02142 len--; 02143 ul = 0; 02144 break; 02145 } 02146 } 02147 } 02148 } 02149 } 02150 break; 02151 02152 default: 02153 rb_warning("unknown unpack directive '%c' in '%s'", 02154 type, RSTRING_PTR(fmt)); 02155 break; 02156 } 02157 } 02158 02159 return ary; 02160 } 02161 02162 #define BYTEWIDTH 8 02163 02164 int 02165 rb_uv_to_utf8(char buf[6], unsigned long uv) 02166 { 02167 if (uv <= 0x7f) { 02168 buf[0] = (char)uv; 02169 return 1; 02170 } 02171 if (uv <= 0x7ff) { 02172 buf[0] = castchar(((uv>>6)&0xff)|0xc0); 02173 buf[1] = castchar((uv&0x3f)|0x80); 02174 return 2; 02175 } 02176 if (uv <= 0xffff) { 02177 buf[0] = castchar(((uv>>12)&0xff)|0xe0); 02178 buf[1] = castchar(((uv>>6)&0x3f)|0x80); 02179 buf[2] = castchar((uv&0x3f)|0x80); 02180 return 3; 02181 } 02182 if (uv <= 0x1fffff) { 02183 buf[0] = castchar(((uv>>18)&0xff)|0xf0); 02184 buf[1] = castchar(((uv>>12)&0x3f)|0x80); 02185 buf[2] = castchar(((uv>>6)&0x3f)|0x80); 02186 buf[3] = castchar((uv&0x3f)|0x80); 02187 return 4; 02188 } 02189 if (uv <= 0x3ffffff) { 02190 buf[0] = castchar(((uv>>24)&0xff)|0xf8); 02191 buf[1] = castchar(((uv>>18)&0x3f)|0x80); 02192 buf[2] = castchar(((uv>>12)&0x3f)|0x80); 02193 buf[3] = castchar(((uv>>6)&0x3f)|0x80); 02194 buf[4] = castchar((uv&0x3f)|0x80); 02195 return 5; 02196 } 02197 if (uv <= 0x7fffffff) { 02198 buf[0] = castchar(((uv>>30)&0xff)|0xfc); 02199 buf[1] = castchar(((uv>>24)&0x3f)|0x80); 02200 buf[2] = castchar(((uv>>18)&0x3f)|0x80); 02201 buf[3] = castchar(((uv>>12)&0x3f)|0x80); 02202 buf[4] = castchar(((uv>>6)&0x3f)|0x80); 02203 buf[5] = castchar((uv&0x3f)|0x80); 02204 return 6; 02205 } 02206 rb_raise(rb_eRangeError, "pack(U): value out of range"); 02207 02208 UNREACHABLE; 02209 } 02210 02211 static const unsigned long utf8_limits[] = { 02212 0x0, /* 1 */ 02213 0x80, /* 2 */ 02214 0x800, /* 3 */ 02215 0x10000, /* 4 */ 02216 0x200000, /* 5 */ 02217 0x4000000, /* 6 */ 02218 0x80000000, /* 7 */ 02219 }; 02220 02221 static unsigned long 02222 utf8_to_uv(const char *p, long *lenp) 02223 { 02224 int c = *p++ & 0xff; 02225 unsigned long uv = c; 02226 long n; 02227 02228 if (!(uv & 0x80)) { 02229 *lenp = 1; 02230 return uv; 02231 } 02232 if (!(uv & 0x40)) { 02233 *lenp = 1; 02234 rb_raise(rb_eArgError, "malformed UTF-8 character"); 02235 } 02236 02237 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } 02238 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } 02239 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } 02240 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } 02241 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } 02242 else { 02243 *lenp = 1; 02244 rb_raise(rb_eArgError, "malformed UTF-8 character"); 02245 } 02246 if (n > *lenp) { 02247 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", 02248 n, *lenp); 02249 } 02250 *lenp = n--; 02251 if (n != 0) { 02252 while (n--) { 02253 c = *p++ & 0xff; 02254 if ((c & 0xc0) != 0x80) { 02255 *lenp -= n + 1; 02256 rb_raise(rb_eArgError, "malformed UTF-8 character"); 02257 } 02258 else { 02259 c &= 0x3f; 02260 uv = uv << 6 | c; 02261 } 02262 } 02263 } 02264 n = *lenp - 1; 02265 if (uv < utf8_limits[n]) { 02266 rb_raise(rb_eArgError, "redundant UTF-8 sequence"); 02267 } 02268 return uv; 02269 } 02270 02271 void 02272 Init_pack(void) 02273 { 02274 rb_define_method(rb_cArray, "pack", pack_pack, 1); 02275 rb_define_method(rb_cString, "unpack", pack_unpack, 1); 02276 } 02277