Add class implementing MessagePack serialization
authorOri Livneh <ori@wikimedia.org>
Fri, 3 Jan 2014 01:07:10 +0000 (17:07 -0800)
committerOri Livneh <ori@wikimedia.org>
Fri, 3 Jan 2014 02:04:24 +0000 (18:04 -0800)
MessagePack is a space-efficient binary data interchange format. I am going to
use it to encode profiling data in ProfilerSimpleUDP. The official PHP
implementation is provided as a C extension, so using it would further
encumber migration to HHVM. This patch adds MWMessagePack, a class implementing
a pack() method for encoding native PHP values as MessagePack byte strings. The
implementation is based on <https://github.com/onlinecity/msgpack-php>, but
revised for clarity and conformity with MediaWiki coding conventions.

Change-Id: Id2833c5a9da659cb13ec1330de9dd57138ada9c8

includes/AutoLoader.php
includes/libs/MWMessagePack.php [new file with mode: 0644]
tests/phpunit/includes/libs/MWMessagePackTest.php [new file with mode: 0644]

index 1f81249..5b99c8d 100644 (file)
@@ -185,7 +185,6 @@ $wgAutoloadLocalClasses = array(
        'Revision' => 'includes/Revision.php',
        'RevisionList' => 'includes/RevisionList.php',
        'RSSFeed' => 'includes/Feed.php',
-       'RunningStat' => 'includes/profiler/RunningStat.php',
        'Sanitizer' => 'includes/Sanitizer.php',
        'SiteConfiguration' => 'includes/SiteConfiguration.php',
        'SiteStats' => 'includes/SiteStats.php',
@@ -688,6 +687,7 @@ $wgAutoloadLocalClasses = array(
        'JSParser' => 'includes/libs/jsminplus.php',
        'JSToken' => 'includes/libs/jsminplus.php',
        'JSTokenizer' => 'includes/libs/jsminplus.php',
+       'MWMessagePack' => 'includes/libs/MWMessagePack.php',
        'ScopedCallback' => 'includes/libs/ScopedCallback.php',
        'ScopedPHPTimeout' => 'includes/libs/ScopedPHPTimeout.php',
        'XmlTypeCheck' => 'includes/libs/XmlTypeCheck.php',
@@ -831,6 +831,7 @@ $wgAutoloadLocalClasses = array(
        'ProfilerSimpleUDP' => 'includes/profiler/ProfilerSimpleUDP.php',
        'ProfilerStub' => 'includes/profiler/ProfilerStub.php',
        'ProfileSection' => 'includes/profiler/Profiler.php',
+       'RunningStat' => 'includes/profiler/RunningStat.php',
 
        # includes/rcfeed
        'RCFeedEngine' => 'includes/rcfeed/RCFeedEngine.php',
diff --git a/includes/libs/MWMessagePack.php b/includes/libs/MWMessagePack.php
new file mode 100644 (file)
index 0000000..b44635d
--- /dev/null
@@ -0,0 +1,179 @@
+<?php
+/**
+ * MessagePack serializer
+ *
+ * MessagePack is a space-efficient binary data interchange format. This
+ * class provides a pack() method that encodes native PHP values as MessagePack
+ * binary strings. The implementation is derived from msgpack-php.
+ *
+ * Copyright (c) 2013 Ori Livneh <ori@wikimedia.org>
+ * Copyright (c) 2011 OnlineCity <https://github.com/onlinecity/msgpack-php>.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @see <http://msgpack.org/>
+ * @see <http://wiki.msgpack.org/display/MSGPACK/Format+specification>
+ *
+ * @since 1.23
+ * @file
+ */
+class MWMessagePack {
+
+       /** @var boolean|null Whether current system is bigendian. **/
+       public static $bigendian;
+
+       /**
+        * Encode a value using MessagePack
+        *
+        * This method supports null, boolean, integer, float, string and array
+        * (both indexed and associative) types. Object serialization is not
+        * supported.
+        *
+        * @param mixed $value
+        * @return string
+        */
+       public static function pack( $value ) {
+               if ( self::$bigendian === null ) {
+                       self::$bigendian = pack( 'S', 1 ) === pack( 'n', 1 );
+               }
+
+               switch ( gettype( $value ) ) {
+               case 'NULL':
+                       return "\xC0";
+
+               case 'boolean':
+                       return $value ? "\xC3" : "\xC2";
+
+               case 'double':
+               case 'float':
+                       return self::$bigendian
+                               ? "\xCB" . pack( 'd', $value )
+                               : "\xCB" . strrev( pack( 'd', $value ) );
+
+               case 'string':
+                       $length = strlen( $value );
+                       if ( $length < 32 ) {
+                               return pack( 'Ca*', 0xA0 | $length, $value );
+                       } elseif ( $length <= 0xFFFF ) {
+                               return pack( 'Cna*', 0xDA, $length, $value );
+                       } elseif ( $length <= 0xFFFFFFFF ) {
+                               return pack( 'CNa*', 0xDB, $length, $value );
+                       }
+                       throw new LengthException( "String too long: $length (max: 4294967295)." );
+
+               case 'integer':
+                       if ( $value >= 0 ) {
+                               if ( $value <= 0x7F ) {
+                                       // positive fixnum
+                                       return chr( $value );
+                               }
+                               if ( $value <= 0xFF ) {
+                                       // uint8
+                                       return pack( 'CC', 0xCC, $value );
+                               }
+                               if ( $value <= 0xFFFF ) {
+                                       // uint16
+                                       return pack( 'Cn', 0xCD, $value );
+                               }
+                               if ( $value <= 0xFFFFFFFF ) {
+                                       // uint32
+                                       return pack( 'CN', 0xCE, $value );
+                               }
+                               if ( $value <= 0xFFFFFFFFFFFFFFFF ) {
+                                       // uint64
+                                       $hi = ( $value & 0xFFFFFFFF00000000 ) >> 32;
+                                       $lo = $value & 0xFFFFFFFF;
+                                       return self::$bigendian
+                                               ? pack( 'CNN', 0xCF, $lo, $hi )
+                                               : pack( 'CNN', 0xCF, $hi, $lo );
+                               }
+                       } else {
+                               if ( $value >= -32 ) {
+                                       // negative fixnum
+                                       return pack( 'c', $value );
+                               }
+                               if ( $value >= -0x80 ) {
+                                       // int8
+                                       return pack( 'Cc', 0xD0, $value );
+                               }
+                               if ( $value >= -0x8000 ) {
+                                       // int16
+                                       $p = pack('s',$value);
+                                       return self::$bigendian
+                                               ? pack( 'Ca2', 0xD1, $p )
+                                               : pack( 'Ca2', 0xD1, strrev( $p ) );
+                               }
+                               if ( $value >= -0x80000000 ) {
+                                       // int32
+                                       $p = pack( 'l', $value );
+                                       return self::$bigendian
+                                               ? pack( 'Ca4', 0xD2, $p )
+                                               : pack( 'Ca4', 0xD2, strrev( $p ) );
+                               }
+                               if ( $value >= -0x8000000000000000 ) {
+                                       // int64
+                                       // pack() does not support 64-bit ints either so pack into two 32-bits
+                                       $p1 = pack( 'l', $value & 0xFFFFFFFF );
+                                       $p2 = pack( 'l', ( $value >> 32 ) & 0xFFFFFFFF );
+                                       return self::$bigendian
+                                               ? pack( 'Ca4a4', 0xD3, $p1, $p2 )
+                                               : pack( 'Ca4a4', 0xD3, strrev( $p2 ), strrev( $p1 ) );
+                               }
+                       }
+                       throw new LengthException( 'Invalid integer: ' . $value );
+
+               case 'array':
+                       $associative = array_values( $value ) !== $value;
+                       $length = count( $value );
+                       $buffer = '';
+
+                       if ( $length > 0xFFFFFFFF ) {
+                               throw new LengthException( "Array too long: $length (max: 4294967295)." );
+                       }
+
+                       if ( $associative ) {
+                               if ( $length < 16 ) {
+                                       $buffer .= pack( 'C', 0x80 | $length );
+                               } elseif ( $length <= 0xFFFF ) {
+                                       $buffer .= pack( 'Cn', 0xDE, $length );
+                               } else {
+                                       $buffer .= pack( 'CN', 0xDF, $length );
+                               }
+                               foreach ( $value as $k => $v ) {
+                                       $buffer .= self::pack( $k );
+                                       $buffer .= self::pack( $v );
+                               }
+                       } else {
+                               if ( $length < 16 ) {
+                                       $buffer .= pack( 'C', 0x90 | $length );
+                               } elseif ( $length <= 0xFFFF ) {
+                                       $buffer .= pack( 'Cn', 0xDC, $length );
+                               } else {
+                                       $buffer .= pack( 'CN', 0xDD, $length );
+                               }
+                               foreach ( $value as $v ) {
+                                       $buffer .= self::pack( $v );
+                               }
+                       }
+                       return $buffer;
+
+               default:
+                       throw new LengthException( 'Unsupported type: ' . gettype( $value ) );
+               }
+       }
+}
diff --git a/tests/phpunit/includes/libs/MWMessagePackTest.php b/tests/phpunit/includes/libs/MWMessagePackTest.php
new file mode 100644 (file)
index 0000000..de5848d
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * PHP Unit tests for MWMessagePack
+ * @covers MWMessagePack
+ */
+class MWMessagePackTest extends MediaWikiTestCase {
+
+       /* @var array Array of test cases, keyed by type. Each type is an array of
+        * (value, expected encoding as hex string). The expected values were
+        * generated using <https://github.com/onlinecity/msgpack-php>, which
+        * includes a serialization function.
+        */
+       public $data = array(
+               'integer' => array(
+                       array(  0, '00' ),
+                       array(  1, '01' ),
+                       array(  5, '05' ),
+                       array(  -1, 'ff' ),
+                       array(  -2, 'fe' ),
+                       array(  35, '23' ),
+                       array(  -35, 'd0dd' ),
+                       array(  128, 'cc80' ),
+                       array(  -128, 'd080' ),
+                       array(  1000, 'cd03e8' ),
+                       array(  -1000, 'd1fc18' ),
+                       array(  100000, 'ce000186a0' ),
+                       array(  -100000, 'd2fffe7960' ),
+                       array(  10000000000, 'cf00000002540be400' ),
+                       array(  -10000000000, 'd3fffffffdabf41c00' ),
+                       array(  -223372036854775807, 'd3fce66c50e2840001' ),
+                       array(  -9223372036854775807, 'd38000000000000001' ),
+               ),
+               'NULL' => array(
+                       array( null, 'c0' ),
+               ),
+               'boolean' => array(
+                       array( true, 'c3' ),
+                       array( false, 'c2' ),
+               ),
+               'double' => array(
+                       array(  0.1, 'cb3fb999999999999a' ),
+                       array(  1.1, 'cb3ff199999999999a' ),
+                       array(  123.456, 'cb405edd2f1a9fbe77' ),
+               ),
+               'string' => array(
+                       array(  '', 'a0' ),
+                       array( 'foobar', 'a6666f6f626172' ),
+                       array(
+                               'Lorem ipsum dolor sit amet amet.',
+                               'da00204c6f72656d20697073756d20646f6c6f722073697420616d657420616d65742e'
+                       ),
+               ),
+               'array' => array(
+                       array( array( 'abc', 'def', 'ghi' ), '93a3616263a3646566a3676869' ),
+                       array( array( 'one' => 1, 'two' => 2 ), '82a36f6e6501a374776f02' ),
+               ),
+       );
+
+       /**
+        * Verify that values are serialized correctly.
+        * @covers MWMessagePack::pack
+        */
+       public function testMessagePack() {
+               foreach( $this->data as $type => $cases ) {
+                       foreach( $cases as $case ) {
+                               list( $value, $expected ) = $case;
+                               $actual = bin2hex( MWMessagePack::pack( $value ) );
+                               $this->assertEquals( $actual, $expected, $type );
+                       }
+               }
+       }
+}