diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..35dfaf0 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ + + +test: + phpunit --coverage-text --whitelist=msgpack.php msgpackTest.php + +clean: + rm -f *~ diff --git a/README.md b/README.md index b5f521e..3dd968c 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,26 @@ MessagePack PHP functions ============= -The purpose of this project is to implement [MessagePack](http://msgpack.org/) serialization using only PHP. This might be useful for someone unable to install php-modules, or using [HipHop](https://github.com/facebook/hiphop-php) to compile PHP as C++. +The purpose of this project is to implement [MessagePack](http://msgpack.org/) serialization using only PHP. It aims to work even on very old versions of PHP, such as found in many long-term support server distros, while supporting the modern str8/bin8/bin16/bin32 additions to msgpack. + +Additions +----- + + - Supports decoding and encoding str8 and bin8/16/32 types. + - By analogy to the python implementation, msgpack_unpackb accepts $raw parameter determining handling of str types (default: $raw=True). + - By analogy to the python implementation, msgpack_packb accepts $use_bin_type parameter determining use of bin types (default: $use_bin_type=False). + - PHP has no concept of bytes vs unicode strings, so msgpack_packb also accepts $force_str_as_bin parameter forcing the use of bin types event for + strings that are valid utf-8. Caveats ----- - - Only msgpack_pack() and msgpack_unpack() are defined. - - It's only tested on [little endian](http://en.wikipedia.org/wiki/Endianness) architecture, but should work on big endian as well, please test it if able. - - The uint64 and int64 types probably requires 64-bit systems to work + - Only msgpack_packb() and msgpack_unpackb() are defined. + - It's only tested on [little endian](http://en.wikipedia.org/wiki/Endianness) architecture, but should work on big endian as well. - It uses is_x() to select the type, do your casts before using the functions - - Unlike the official lib you can't pack objects. If you know how to do this please fork. + - Unlike the official lib you can't pack objects. Feel free to submit a pull request to add this functionality. - It will always pack integers into the least amount of bits possible, and will prefer unsigned. - + - It does not support ext/fixext types. Benchmark ----- @@ -55,4 +63,4 @@ status : OK OK OK OK OK serialize : 0.0199 (100%) 0.0481 (242%) 0.0306 (153%) 0.0148 ( 74%) 0.8513 (4285%) 0.0763 ( 383%) unserialize: 0.0231 (100%) 0.0583 (252%) 0.0202 ( 87%) 0.0248 (107%) 1.4168 (6134%) 0.3156 (1366%) size : 406 (100%) 351 ( 86%) 346 ( 85%) 351 ( 86%) 351 ( 86%) 351 ( 86%) -``` \ No newline at end of file +``` diff --git a/msgpack.php b/msgpack.php index 39b765a..6341f85 100644 --- a/msgpack.php +++ b/msgpack.php @@ -1,302 +1,425 @@ =A0 + * 7. If first byte is F4, second byte must be <84 + * 8. If first byte is ED, second byte must be = 0xF5) return true; + return false; +} +function is_utf8($input) +{ + $pos = 0; + while ($pos < strlen($input)) { + $byte = ord(substr($input,$pos++,1)); + if (is_disallowed_utf8($byte)) return false; // not allowed character + if ($byte >= 0xC0 && $byte <= 0xDF) { + // two-byte sequence + if ($pos+1 > strlen($input)) return false; // too short + $byte2 = ord(substr($input,$pos++,1)); + if ($byte2 < 0x80 || $byte2 > 0xBF || is_disallowed_utf8($byte2)) return false; // not continuation byte + } else if ($byte >= 0xE0 && $byte <= 0xEF) { + // three-byte sequence + if ($pos+2 > strlen($input)) return false; // too short + $byte2 = ord(substr($input,$pos++,1)); + $byte3 = ord(substr($input,$pos++,1)); + if ($byte == 0xE0 && $byte2 < 0xA0) return false; // overlong (should be only two bytes) + if ($byte == 0xED && $byte2 >= 0xA0) return false; // surrogate halves reserved for UTF-16 + if ($byte2 < 0x80 || $byte2 > 0xBF || is_disallowed_utf8($byte2)) return false; // not continuation byte + if ($byte3 < 0x80 || $byte3 > 0xBF || is_disallowed_utf8($byte3)) return false; // not continuation byte + } else if ($byte >= 0xF0 && $byte <0xFF) { + // four-byte sequence + if ($pos+3 > strlen($input)) return false; // too short + $byte2 = ord(substr($input,$pos++,1)); + $byte3 = ord(substr($input,$pos++,1)); + $byte4 = ord(substr($input,$pos++,1)); + if ($byte == 0xF0 && $byte2 < 0xA0) return false; // overlong (should be only three bytes) + if ($byte2 < 0x80 || $byte2 > 0xBF || is_disallowed_utf8($byte2)) return false; // not continuation byte + if ($byte3 < 0x80 || $byte3 > 0xBF || is_disallowed_utf8($byte3)) return false; // not continuation byte + if ($byte4 < 0x80 || $byte4 > 0xBF || is_disallowed_utf8($byte4)) return false; // not continuation byte + } + // otherwise, 0x00-0x7F, valid UTF-8 bytes + } + return true; +} + + /** * Pack some input into msgpack format. - * Format specs: http://wiki.msgpack.org/display/MSGPACK/Format+specification - * + * Format specs: https://github.com/msgpack/msgpack/blob/master/spec.md + * * @param mixed $input + * @param boolean $use_bin_type=False + * @param boolean $force_str_as_bin=False * @return string * @throws \InvalidArgumentException */ -function msgpack_pack($input) +function msgpack_packb($input, $use_bin_type=False, $force_str_as_bin=False) { - static $bigendian; - if (!isset($bigendian)) $bigendian = (pack('S',1)==pack('n',1)); - - // null - if (is_null($input)) { - return pack('C',0xC0); - } - - // booleans - if (is_bool($input)) { - return pack('C',$input ? 0xC3 : 0xC2); - } - - // Integers - if (is_int($input)) { - // positive fixnum - if (($input|0x7F) == 0x7F) return pack('C',$input&0x7F); - // negative fixnum - if ($input < 0 && $input>=-32) return pack('c',$input); - // uint8 - if ($input > 0 && $input <= 0xFF) return pack('CC',0xCC,$input); - // uint16 - if ($input > 0 && $input <= 0xFFFF) return pack('Cn',0xCD,$input); - // uint32 - if ($input > 0 && $input <= 0xFFFFFFFF) return pack('CN',0xCE,$input); - // uint64 - if ($input > 0 && $input <= 0xFFFFFFFFFFFFFFFF) { - // pack() does not support 64-bit ints, so pack into two 32-bits - $h = ($input&0xFFFFFFFF00000000)>>32; - $l = $input&0xFFFFFFFF; - return $bigendian ? pack('CNN',0xCF,$l,$h) : pack('CNN',0xCF,$h,$l); - } - // int8 - if ($input < 0 && $input >= -0x80) return pack('Cc',0xD0,$input); - // int16 - if ($input < 0 && $input >= -0x8000) { - $p = pack('s',$input); - return pack('Ca2',0xD1,$bigendian ? $p : strrev($p)); - } - // int32 - if ($input < 0 && $input >= -0x80000000) { - $p = pack('l',$input); - return pack('Ca4',0xD2,$bigendian ? $p : strrev($p)); - } - // int64 - if ($input < 0 && $input >= -0x8000000000000000) { - // pack() does not support 64-bit ints either so pack into two 32-bits - $p1 = pack('l',$input&0xFFFFFFFF); - $p2 = pack('l',($input>>32)&0xFFFFFFFF); - return $bigendian ? pack('Ca4a4',0xD3,$p1,$p2) : pack('Ca4a4',0xD3,strrev($p2),strrev($p1)); - } - throw new \InvalidArgumentException('Invalid integer: '.$input); - } - - // Floats - if (is_float($input)) { - // Just pack into a double, don't take any chances with single precision - return pack('C',0xCB).($bigendian ? pack('d',$input) : strrev(pack('d',$input))); - } - - // Strings/Raw - if (is_string($input)) { - $len = strlen($input); - if ($len<32) { - return pack('Ca*',0xA0|$len,$input); - } else if ($len<=0xFFFF) { - return pack('Cna*',0xDA,$len,$input); - } else if ($len<=0xFFFFFFFF) { - return pack('CNa*',0xDB,$len,$input); - } else { - throw new \InvalidArgumentException('Input overflows (2^32)-1 byte max'); - } - } - - // Arrays & Maps - if (is_array($input)) { - $keys = array_keys($input); - $len = count($input); - - // Is this an associative array? - $isMap = false; - foreach ($keys as $key) { - if (!is_int($key)) { - $isMap = true; - break; - } - } - - $buf = ''; - if ($len<16) { - $buf .= pack('C',($isMap?0x80:0x90)|$len); - } else if ($len<=0xFFFF) { - $buf .= pack('Cn',($isMap?0xDE:0xDC),$len); - } else if ($len<=0xFFFFFFFF) { - $buf .= pack('CN',($isMap?0xDF:0xDD),$len); - } else { - throw new \InvalidArgumentException('Input overflows (2^32)-1 max elements'); - } - - foreach ($input as $key => $elm) { - if ($isMap) $buf .= msgpack_pack($key); - $buf .= msgpack_pack($elm); - } - return $buf; - - } - - throw new \InvalidArgumentException('Not able to pack/serialize input type: '.gettype($input)); + static $bigendian; + if (!isset($bigendian)) $bigendian = (pack('S',1)==pack('n',1)); + + // null + if (is_null($input)) { + return pack('C',0xC0); + } + + // booleans + if (is_bool($input)) { + return pack('C',$input ? 0xC3 : 0xC2); + } + + // Integers + if (is_int($input)) { + // positive fixnum + if (($input|0x7F) == 0x7F) return pack('C',$input&0x7F); + // negative fixnum + if ($input < 0 && $input>=-32) return pack('c',$input); + // uint8 + if ($input > 0 && $input <= 0xFF) return pack('CC',0xCC,$input); + // uint16 + if ($input > 0 && $input <= 0xFFFF) return pack('Cn',0xCD,$input); + // uint32 + if ($input > 0 && $input <= 0xFFFFFFFF) return pack('CN',0xCE,$input); + // uint64 + if ($input > 0 && $input <= 0xFFFFFFFFFFFFFFFF) { + // pack() does not support 64-bit ints, so pack into two 32-bits + $h = ($input&0xFFFFFFFF00000000)>>32; + $l = $input&0xFFFFFFFF; + return $bigendian ? pack('CNN',0xCF,$l,$h) : pack('CNN',0xCF,$h,$l); + } + // int8 + if ($input < 0 && $input >= -0x80) return pack('Cc',0xD0,$input); + // int16 + if ($input < 0 && $input >= -0x8000) { + $p = pack('s',$input); + return pack('Ca2',0xD1,$bigendian ? $p : strrev($p)); + } + // int32 + if ($input < 0 && $input >= -0x80000000) { + $p = pack('l',$input); + return pack('Ca4',0xD2,$bigendian ? $p : strrev($p)); + } + // int64 + if ($input < 0 && $input >= -0x8000000000000000) { + // pack() does not support 64-bit ints either so pack into two 32-bits + $p1 = pack('l',$input&0xFFFFFFFF); + $p2 = pack('l',($input>>32)&0xFFFFFFFF); + return $bigendian ? pack('Ca4a4',0xD3,$p1,$p2) : pack('Ca4a4',0xD3,strrev($p2),strrev($p1)); + } + throw new \InvalidArgumentException('Invalid integer: '.$input); + } + + // Floats + if (is_float($input)) { + // Just pack into a double, don't take any chances with single precision + return pack('C',0xCB).($bigendian ? pack('d',$input) : strrev(pack('d',$input))); + } + + // Strings / Binary + if (is_string($input) && (!$use_bin_type || (!$force_str_as_bin && is_utf8($input)))) { + $len = strlen($input); + if ($len<32) { //fixstr + return pack('Ca*',0xA0|$len,$input); + } else if ($len<=0xFF && $use_bin_type) { //str8 only if bin types are available + return pack('CCa*',0xD9,$len,$input); + } else if ($len<=0xFFFF) { //str16 + return pack('Cna*',0xDA,$len,$input); + } else if ($len<=0xFFFFFFFF) { //str32 + return pack('CNa*',0xDB,$len,$input); + } else { + throw new \InvalidArgumentException('Input overflows (2^32)-1 byte max'); + } + } + if (is_string($input) && ($use_bin_type && ($force_str_as_bin || !is_utf8($input)))) { + $len = strlen($input); + if ($len<=0xFF) { //bin8 + return pack('CCa*',0xC4,$len,$input); + } else if ($len<=0xFFFF) { //bin16 + return pack('Cna*',0xC5,$len,$input); + } else if ($len<=0xFFFFFFFF) { //bin32 + return pack('CNa*',0xC6,$len,$input); + } else { + throw new \InvalidArgumentException('Input overflows (2^32)-1 byte max'); + } + } + + + // Arrays & Maps + if (is_array($input)) { + $keys = array_keys($input); + $len = count($input); + + // Is this an associative array? + $isMap = false; + foreach ($keys as $key) { + if (!is_int($key)) { + $isMap = true; + break; + } + } + + $buf = ''; + if ($len<16) { + $buf .= pack('C',($isMap?0x80:0x90)|$len); + } else if ($len<=0xFFFF) { + $buf .= pack('Cn',($isMap?0xDE:0xDC),$len); + } else if ($len<=0xFFFFFFFF) { + $buf .= pack('CN',($isMap?0xDF:0xDD),$len); + } else { + throw new \InvalidArgumentException('Input overflows (2^32)-1 max elements'); + } + + foreach ($input as $key => $elm) { + if ($isMap) $buf .= msgpack_packb($key, $use_bin_type, $force_str_as_bin); + $buf .= msgpack_packb($elm, $use_bin_type, $force_str_as_bin); + } + return $buf; + + } + + throw new \InvalidArgumentException('Not able to pack/serialize input type: '.gettype($input)); } /** * Unpack data from a msgpack'ed string - * + * * @param string $input + * @param boolean $raw=True * @return mixed */ -function msgpack_unpack($input) +function msgpack_unpackb($input, $raw=True) { - static $bigendian; - if (!isset($bigendian)) $bigendian = (pack('S',1)==pack('n',1)); - - // Store input into a memory buffer so we can operate on it with filepointers - static $buffer; - static $pos; - if (!isset($buffer)) { - $buffer = $input; - $pos = 0; - } - - if ($pos==strlen($buffer)) { - $buffer = $input; - $pos = 0; - } - - // Read a single byte - $byte = substr($buffer,$pos++,1); - - - // null - if ($byte == "\xC0") return null; - - // booleans - if ($byte == "\xC2") return false; - if ($byte == "\xC3") return true; - - // positive fixnum - if (($byte & "\x80") == "\x00") { - return current(unpack('C',$byte&"\x7F")); - } - - // negative fixnum - if (($byte & "\xE0") == "\xE0") { - return current(unpack('c',$byte&"\xFF")); - } - - // fixed raw - if ((($byte ^ "\xA0") & "\xE0") == "\x00") { - $len = current(unpack('c',($byte ^ "\xA0"))); - if ($len == 0) return ""; - $d = substr($buffer,$pos,$len); - $pos+=$len; - return current(unpack('a'.$len,$d)); - } - - // Arrays - if ((($byte ^ "\x90") & "\xF0") == "\x00") { - // fixed array - $len = current(unpack('c',($byte ^ "\x90"))); - $data = array(); - for($i=0;$i<$len;$i++) { - $data[] = msgpack_unpack($input); - } - return $data; - } else if ($byte == "\xDC" || $byte == "\xDD") { - if ($byte == "\xDC") { - $d = substr($buffer,$pos,2); - $pos+=2; - $len = current(unpack('n',$d)); - } - if ($byte == "\xDD") { - $d = substr($buffer,$pos,4); - $pos+=4; - $len = current(unpack('N',$d)); - } - $data = array(); - for($i=0;$i<$len;$i++) { - $data[] = msgpack_unpack($input); - } - return $data; - } - - // Maps - if ((($byte ^ "\x80") & "\xF0") == "\x00") { - // fixed map - $len = current(unpack('c',($byte ^ "\x80"))); - $data = array(); - for($i=0;$i<$len;$i++) { - $key = msgpack_unpack($input); - $value = msgpack_unpack($input); - $data[$key] = $value; - } - return $data; - } else if ($byte == "\xDE" || $byte == "\xDF") { - if ($byte == "\xDE") { - $d = substr($buffer,$pos,2); - $pos+=2; - $len = current(unpack('n',$d)); - } - if ($byte == "\xDF") { - $d = substr($buffer,$pos,4); - $pos+=4; - $len = current(unpack('N',$d)); - } - $data = array(); - for($i=0;$i<$len;$i++) { - $key = msgpack_unpack($input); - $value = msgpack_unpack($input); - $data[$key] = $value; - } - return $data; - } - - switch ($byte) { - // Unsigned integers - case "\xCC": // uint 8 - return current(unpack('C',substr($buffer,$pos++,1))); - case "\xCD": // uint 16 - $d = substr($buffer,$pos,2); - $pos+=2; - return current(unpack('n',$d)); - case "\xCE": // uint 32 - $d = substr($buffer,$pos,4); - $pos+=4; - return current(unpack('N',$d)); - case "\xCF": // uint 64 - $d = substr($buffer,$pos,8); - $pos+=8; - // Unpack into two uint32 and re-assemble - $dat = unpack('Np1/Np2',$d); - $dat['p1'] = $dat['p1'] << 32; - return $dat['p1']|$dat['p2']; - - // Signed integers - case "\xD0": // int 8 - return current(unpack('c',substr($buffer,$pos++,1))); - case "\xD1": // int 16 - $d = substr($buffer,$pos,2); - $pos+=2; - return (current(unpack('n',~$d))+1)*-1; - case "\xD2": // int 32 - $d = substr($buffer,$pos,4); - $pos+=4; - return (current(unpack('N',~$d))+1)*-1; - case "\xD3": // int 64 - $d = substr($buffer,$pos,8); - $pos+=8; - $dat = unpack('Np1/Np2',~$d); - $dat['p1'] = $dat['p1'] << 32; - return (($dat['p1']|$dat['p2'])+1)*-1; - - // String / Raw - case "\xDA": // raw 16 - $d = substr($buffer,$pos,2); - $pos+=2; - $len = current(unpack('n',$d)); - $d = substr($buffer,$pos,$len); - $pos+=$len; - return current(unpack('a'.$len,$d)); - case "\xDB": // raw 32 - $d = substr($buffer,$pos,4); - $pos+=4; - $len = current(unpack('N',$d)); - $d = substr($buffer,$pos,$len); - $pos+=$len; - return current(unpack('a'.$len,$d)); - - // Floats - case "\xCA": // single-precision - $d = substr($buffer,$pos,4); - $pos+=4; - return current(unpack('f',$bigendian ? $d : strrev($d))); - case "\xCB": // double-precision - $d = substr($buffer,$pos,8); - $pos+=8; - return current(unpack('d',$bigendian ? $d : strrev($d))); - - } - - throw new \InvalidArgumentException('Can\'t unpack data with byte-header: '.$byte); + static $bigendian; + if (!isset($bigendian)) $bigendian = (pack('S',1)==pack('n',1)); + + // Use static variables so we can more easily handle recursive decoding + static $buffer; + static $pos; + if (!isset($buffer) || ($buffer!=$input) || $pos==strlen($buffer)) { + $buffer = $input; + $pos = 0; + } + + // Read a single byte + $byte = substr($buffer,$pos++,1); + + + // null + if ($byte == "\xC0") return null; + + // booleans + if ($byte == "\xC2") return false; + if ($byte == "\xC3") return true; + + // positive fixnum + if (($byte & "\x80") == "\x00") { + return current(unpack('C',$byte&"\x7F")); + } + + // negative fixnum + if (($byte & "\xE0") == "\xE0") { + return current(unpack('c',$byte&"\xFF")); + } + + // fixstr + if ((($byte ^ "\xA0") & "\xE0") == "\x00") { + $len = current(unpack('c',($byte ^ "\xA0"))); + if ($len == 0) return ""; + $d = substr($buffer,$pos,$len); + $pos+=$len; + $toret = current(unpack('a'.$len,$d)); + if ($raw || is_utf8($toret)) return $toret; + throw new \InvalidArgumentException('Can\'t unpack fixstr data that is not valid utf8: '.$toret); + } + + // fixarray, array16/32 + if ((($byte ^ "\x90") & "\xF0") == "\x00") { + // fixed array + $len = current(unpack('c',($byte ^ "\x90"))); + $data = array(); + for($i=0;$i<$len;$i++) { + $data[] = msgpack_unpackb($input, $raw); + } + return $data; + } else if ($byte == "\xDC" || $byte == "\xDD") { + if ($byte == "\xDC") { + $d = substr($buffer,$pos,2); + $pos+=2; + $len = current(unpack('n',$d)); + } + if ($byte == "\xDD") { + $d = substr($buffer,$pos,4); + $pos+=4; + $len = current(unpack('N',$d)); + } + $data = array(); + for($i=0;$i<$len;$i++) { + $data[] = msgpack_unpackb($input, $raw); + } + return $data; + } + + // fixmap, map16/32 + if ((($byte ^ "\x80") & "\xF0") == "\x00") { + // fixed map + $len = current(unpack('c',($byte ^ "\x80"))); + $data = array(); + for($i=0;$i<$len;$i++) { + $key = msgpack_unpackb($input, $raw); + $value = msgpack_unpackb($input, $raw); + $data[$key] = $value; + } + return $data; + } else if ($byte == "\xDE" || $byte == "\xDF") { + if ($byte == "\xDE") { + $d = substr($buffer,$pos,2); + $pos+=2; + $len = current(unpack('n',$d)); + } + if ($byte == "\xDF") { + $d = substr($buffer,$pos,4); + $pos+=4; + $len = current(unpack('N',$d)); + } + $data = array(); + for($i=0;$i<$len;$i++) { + $key = msgpack_unpackb($input, $raw); + $value = msgpack_unpackb($input, $raw); + $data[$key] = $value; + } + return $data; + } + + switch ($byte) { + // Unsigned integers + case "\xCC": // uint 8 + return current(unpack('C',substr($buffer,$pos++,1))); + case "\xCD": // uint 16 + $d = substr($buffer,$pos,2); + $pos+=2; + return current(unpack('n',$d)); + case "\xCE": // uint 32 + $d = substr($buffer,$pos,4); + $pos+=4; + return current(unpack('N',$d)); + case "\xCF": // uint 64 + $d = substr($buffer,$pos,8); + $pos+=8; + // Unpack into two uint32 and re-assemble + $dat = unpack('Np1/Np2',$d); + $dat['p1'] = $dat['p1'] << 32; + return $dat['p1']|$dat['p2']; + + // Signed integers + case "\xD0": // int 8 + return current(unpack('c',substr($buffer,$pos++,1))); + case "\xD1": // int 16 + $d = substr($buffer,$pos,2); + $pos+=2; + // PHP does not have a "signed short, big-endian" unpacker + // Get unsigned version and convert to negative if needed + $unsigned = current(unpack('n',$d)); + return ($unsigned < 0x8000) ? $unsigned : ($unsigned & 0x7FFF) - 0x8000; + case "\xD2": // int 32 + $d = substr($buffer,$pos,4); + $pos+=4; + // again, there is no "int32, big-endian" unpacker + // the following might work on 32-bit machines, but fails on 64-bit + //return (current(unpack('N',~$d))+1)*-1; + $unsigned = current(unpack('N', $d)); + return ($unsigned < 0x80000000) ? $unsigned : ($unsigned & 0x7FFFFFFF) - 0x80000000; + case "\xD3": // int 64 + $d = substr($buffer,$pos,8); + $pos+=8; + $dat = unpack('Np1/Np2',~$d); + // this next line will cause p1 to be negative if + // high bit is set, on 64-bit machines + $dat['p1'] = $dat['p1'] << 32; + return (($dat['p1']|$dat['p2'])+1)*-1; + + // str8/16/32 + case "\xD9": // str8 + $d = substr($buffer,$pos,1); + $pos+=1; + $len = current(unpack('C',$d)); + $d = substr($buffer,$pos,$len); + $pos+=$len; + $toret = current(unpack('a'.$len,$d)); + if ($raw || is_utf8($toret)) return $toret; + throw new \InvalidArgumentException('Can\'t unpack str8 data that is not valid utf8: '.$toret); + case "\xDA": // str16 + $d = substr($buffer,$pos,2); + $pos+=2; + $len = current(unpack('n',$d)); + $d = substr($buffer,$pos,$len); + $pos+=$len; + $toret = current(unpack('a'.$len,$d)); + if ($raw || is_utf8($toret)) return $toret; + throw new \InvalidArgumentException('Can\'t unpack str16 data that is not valid utf8: '.$toret); + case "\xDB": // str32 + $d = substr($buffer,$pos,4); + $pos+=4; + $len = current(unpack('N',$d)); + $d = substr($buffer,$pos,$len); + $pos+=$len; + $toret = current(unpack('a'.$len,$d)); + if ($raw || is_utf8($toret)) return $toret; + throw new \InvalidArgumentException('Can\'t unpack str32 data that is not valid utf8: '.$toret); + + // bin8/16/32 + case "\xC4": // bin8 + $d = substr($buffer,$pos,1); + $pos+=1; + $len = current(unpack('C',$d)); + $d = substr($buffer,$pos,$len); + $pos+=$len; + return current(unpack('a'.$len,$d)); + case "\xC5": // bin16 + $d = substr($buffer,$pos,2); + $pos+=2; + $len = current(unpack('n',$d)); + $d = substr($buffer,$pos,$len); + $pos+=$len; + return current(unpack('a'.$len,$d)); + case "\xC6": // bin32 + $d = substr($buffer,$pos,4); + $pos+=4; + $len = current(unpack('N',$d)); + $d = substr($buffer,$pos,$len); + $pos+=$len; + return current(unpack('a'.$len,$d)); + + // Floats + case "\xCA": // single-precision + $d = substr($buffer,$pos,4); + $pos+=4; + return current(unpack('f',$bigendian ? $d : strrev($d))); + case "\xCB": // double-precision + $d = substr($buffer,$pos,8); + $pos+=8; + return current(unpack('d',$bigendian ? $d : strrev($d))); + + } + + // Not handled: ext8/16/32, fixext1/2/4/8/16, (never used byte) + throw new \InvalidArgumentException('Can\'t unpack data with byte-header: '.$byte); } diff --git a/msgpackTest.php b/msgpackTest.php new file mode 100644 index 0000000..d7cc5df --- /dev/null +++ b/msgpackTest.php @@ -0,0 +1,131 @@ +assertEquals($val, msgpack_unpackb(msgpack_packb($val))); + } + + public function roundTripProvider() + { + return array( + 'zero: 0' => array(0), + 'small: 1' => array(1), + 'small: 5' => array(5), + 'small: -1' => array(-1), + 'small: -2' => array(-2), + 'small: 35' => array(35), + 'small: -35' => array(-35), + 'boundry: 127' => array(127), + 'boundry: -127' => array(-127), + 'boundry: 0x7F' => array(0x7f), + 'boundry: 0x80' => array(0x80), + 'boundry: -0x7F' => array(-0x7f), + 'boundry: -0x80' => array(-0x80), + 'boundry: 0xFF' => array(0xff), + 'boundry: 0x7FFF' => array(0x7FFF), + 'boundry: -0x7FFF' => array(-0x7FFF), + 'boundry: 0x8000' => array(0x8000), + 'boundry: -0x8000' => array(-0x8000), + 'boundry: 0xFFFF' => array(0xFFFF), + 'boundry: -0xFFFF' => array(-0xFFFF), + 'boundry: 0x7FFFFFFF' => array(0x7fFFFFFF), + 'boundry: 0x80000000' => array(0x80000000), + 'boundry: 0xFFFFFFFF' => array(0xFFFFFFFF), + 'small: 128' => array(128), + 'small: -128' => array(-128), + 'medium: 1000' => array(1000), + 'medium: -1000' => array(-1000), + 'large: 100000' => array(100000), + 'large: -100000' => array(-100000), + 'huge: 10000000000' => array(10000000000), + 'huge: -10000000000' => array(-10000000000), + 'gigant: -223372036854775807' => array(-223372036854775807), + 'gigant: -9223372036854775807' => array(-9223372036854775807), + 'null' => array(null), + 'true' => array(true), + 'false' => array(false), + 'double: 0.1' => array(0.1), + 'double: 1.1' => array(1.1), + 'double: 123.456' => array(123.456), + 'double: -123456789.123456789' => array(-123456789.123456789), + 'double: 1e128' => array(1e128), + 'empty: ""' => array(""), + 'string: "foobar"' => array("foobar"), + 'string: "Lorem ipsum dolor sit amet amet."' => array("Lorem ipsum dolor sit amet amet."), + 'string: ""' => array(""), + 'array("foo", "foo", "foo")' => array(array("foo", "foo", "foo")), + 'array("one" => 1, "two" => 2)' => array(array("one" => 1, "two" => 2)), + 'array("kek" => "lol", "lol" => "kek")' => array(array("kek" => "lol", "lol" => "kek")), + 'array("")' => array( array() ), + 'array(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)' => array(array(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), + 'associative array with more than 15 entries' => array(array("f1"=>1,"f2"=>2,"f3"=>3,"f4"=>4,"f5"=>5,"f6"=>6,"f7"=>7,"f8"=>8,"f9"=>9,"f10"=>10,"f11"=>11,"f12"=>12,"f13"=>13,"f14"=>14,"f15"=>15,"f16"=>16)), + ); + } + + // PHP has poor binary support for 16-bit integers, so just iterate of all of them + public function testShortIntTrip() + { + for ($i = -0x10000; $i <= 0x10000; $i += 29) { + $this->assertEquals($i, msgpack_unpackb(msgpack_packb($i))); + } + } + + /** + * test to make unpack(pack(val)) is identical + * + * @dataProvider negativesProvider + */ + public function testNegatives($hex, $val) + { + $this->assertEquals($val, msgpack_unpackb(hex2bin($hex))); + } + + public function negativesProvider() + { + return array( + // 8-bit signed integers + array("d000", 0), + array("d001", 1), + array("d0ff", -1), + array("d0fe", -2), + + // 16-bit signed integers + // –32,768 to 32,767 + array("d10000", 0), + array("d10001", 1), + array("d17fff", 0x7FFF), + array("d18000", -0x8000), + array("d1fffe", -2), + array("d1ffff", -1), + + // 32-bit signed integers + array("d200000000", 0), + array("d200000001", 1), + array("d27fffFFFF", 0x7FFFFFFF), + array("d280000000", -0x80000000), + array("d2ffFFFFfe", -2), + array("d2ffffFFFF", -1), + + // 64-bit signed integers + array("d30000000000000000", 0), + array("d30000000000000001", 1), + array("d3ffffffffffffffff", -1), + array("d30000000100000000", 0x100000000), + array("d300000001ffffffff", 0x1ffffffff), + array("d30fffffffffffffff", 0x0fffffffffffffff), + array("d31fffffffffffffff", 0x1fffffffffffffff) + ); + } +} +?> diff --git a/test.php b/test.php index 11368af..f910d6b 100644 --- a/test.php +++ b/test.php @@ -9,8 +9,8 @@ function test($type, $var) { echo "================\n".$type."\n"; - $e = msgpack_pack($var); - $d = msgpack_unpack($e); + $e = msgpack_packb($var); + $d = msgpack_unpackb($e); echo "\t".bin2hex($e)."\t".$e."\n\t"; echo str_replace("\n","\n\t",var_export($d,true))."\n"; @@ -53,4 +53,4 @@ function test($type, $var) test('array("foo", "foo", "foo")', array("foo", "foo", "foo")); test('array("one" => 1, "two" => 2))', array("one" => 1, "two" => 2)); test('array("kek" => "lol", "lol" => "kek")', array("kek" => "lol", "lol" => "kek")); -test('array("" => "empty")', array("" => "empty")); \ No newline at end of file +test('array("" => "empty")', array("" => "empty"));