From: Ori Livneh Date: Fri, 23 Oct 2015 18:10:10 +0000 (-0700) Subject: Improve normalization and sanitization of memcached keys X-Git-Tag: 1.31.0-rc.0~9275^2 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=bf11cee6c5841188ef96f525626ff3c2d6ef82a4;hp=-c Improve normalization and sanitization of memcached keys The motivation for this patch came from seeing the following error in the memcached log: memcached ERROR: Memcached error for key "flowdb:flow_ref:wiki:by-source:\ v3:0:tawikinews:பூமிக்கு_அச்சுறுத்தலான_சிறுகோள்களைக்_கண்டுபிடிக்கும்_முயற்சியில்_தனியார்_நிறுவன0jம்:4.7" \ on server ":": A BAD KEY WAS PROVIDED/CHARACTERS OUT OF RANGE I submitted a fix to Flow in I26e531f6, but I noticed that AbuseFilter had a similar issue (fixed by Aaron in I27b51a4b), so I started thinking about how to solve this more generally: * The regular expression we current use to sanitize keys does not cover characters outside the ASCII range, but such characters can be illegal if one of their constituent bytes (when taken by itself) is an ASCII control character. So change the regular expression to cover any and all characters that fall outside the range \x22-\x7e (and '#' -- see below). * Enforce a key length limit of 255 bytes, which is the maximum length permitted by the memcached protocol. The Tamil segment in the key above is 84 characters, but 233 bytes in UTF-8, which become 684 characters when URL-encoded. To fix this, try to shrink any segment that would push the total key length over the limit by md5()ing it. If the end result is *still* over the limit (this would happen if, for example, $args consists of many short strings), then concatenate all args together and MD5 them. * MD5'd arguments are prefixed with '#'. Any "organic" '#'s in the key segments are URL-encoded. Change-Id: Ia46987d3b0a09bb6b1952abd936d4c72ea7c56a0 --- bf11cee6c5841188ef96f525626ff3c2d6ef82a4 diff --git a/includes/objectcache/MemcachedBagOStuff.php b/includes/objectcache/MemcachedBagOStuff.php index 95f5c8d55f..12ba83e7c7 100644 --- a/includes/objectcache/MemcachedBagOStuff.php +++ b/includes/objectcache/MemcachedBagOStuff.php @@ -104,6 +104,46 @@ class MemcachedBagOStuff extends BagOStuff { return $this->client; } + /** + * Construct a cache key. + * + * @since 1.27 + * @param string $keyspace + * @param array $args + * @return string + */ + public function makeKeyInternal( $keyspace, $args ) { + // Memcached keys have a maximum length of 255 characters. From that, + // subtract the number of characters we need for the keyspace and for + // the separator character needed for each argument. + $charsLeft = 255 - strlen( $keyspace ) - count( $args ); + + $that = $this; + $args = array_map( + function ( $arg ) use ( $that, &$charsLeft ) { + // Because MemcachedBagOStuff::encodeKey() will be called again + // with this input once the key is actually used, we have to + // encode pound signs here rather than in encodeKey(). + $arg = $that->encodeKey( str_replace( '#', '%23', $arg ) ); + + // 33 = 32 characters for the MD5 + 1 for the '#' prefix. + if ( $charsLeft > 33 && strlen( $arg ) > $charsLeft ) { + $arg = '#' . md5( $arg ); + } + + $charsLeft -= strlen( $arg ); + return $arg; + }, + $args + ); + + if ( $charsLeft < 0 ) { + $args = array( '##' . md5( implode( ':', $args ) ) ); + } + + return parent::makeKeyInternal( $keyspace, $args ); + } + /** * Encode a key for use on the wire inside the memcached protocol. * @@ -115,7 +155,7 @@ class MemcachedBagOStuff extends BagOStuff { * @return string */ public function encodeKey( $key ) { - return preg_replace_callback( '/[\x00-\x20\x25\x7f]+/', + return preg_replace_callback( '/[^\x21-\x7e]+/', array( $this, 'encodeKeyCallback' ), $key ); } diff --git a/tests/phpunit/includes/objectcache/MemcachedBagOStuffTest.php b/tests/phpunit/includes/objectcache/MemcachedBagOStuffTest.php new file mode 100644 index 0000000000..64478a2968 --- /dev/null +++ b/tests/phpunit/includes/objectcache/MemcachedBagOStuffTest.php @@ -0,0 +1,55 @@ +cache = new MemcachedBagOStuff( array( 'keyspace' => 'test' ) ); + } + + /** + * @covers MemcachedBagOStuff::makeKeyInternal + */ + public function testKeyNormalization() { + $this->assertEquals( + $this->cache->makeKey( 'vanilla' ), + 'test:vanilla' + ); + + $this->assertEquals( + $this->cache->makeKey( 'punctuation_marks_are_ok', '!@$%^&*()' ), + 'test:punctuation_marks_are_ok:!@$%^&*()' + ); + + $this->assertEquals( + $this->cache->makeKey( 'but spaces', 'hashes#', "and\nnewlines", 'are_not' ), + 'test:but%20spaces:hashes%23:and%0Anewlines:are_not' + ); + + $this->assertEquals( + $this->cache->makeKey( 'this', 'key', 'contains', '𝕞𝕦𝕝𝕥𝕚𝕓𝕪𝕥𝕖', 'characters' ), + 'test:this:key:contains:%F0%9D%95%9E%F0%9D%95%A6%F0%9D%95%9D%F0%9D%95%A5%F0%9' . + 'D%95%9A%F0%9D%95%93%F0%9D%95%AA%F0%9D%95%A5%F0%9D%95%96:characters' + ); + + $this->assertEquals( + $this->cache->makeKey( 'this', 'key', 'contains', '𝕥𝕠𝕠 𝕞𝕒𝕟𝕪 𝕞𝕦𝕝𝕥𝕚𝕓𝕪𝕥𝕖 𝕔𝕙𝕒𝕣𝕒𝕔𝕥𝕖𝕣𝕤' ), + 'test:this:key:contains:#60190c8f5a63ba5438b124f5c10b91d0' + ); + + $this->assertEquals( + $this->cache->makeKey( '𝕖𝕧𝕖𝕟', '𝕚𝕗', '𝕨𝕖', '𝕄𝔻𝟝', '𝕖𝕒𝕔𝕙', + '𝕒𝕣𝕘𝕦𝕞𝕖𝕟𝕥', '𝕥𝕙𝕚𝕤', '𝕜𝕖𝕪', '𝕨𝕠𝕦𝕝𝕕', '𝕤𝕥𝕚𝕝𝕝', '𝕓𝕖', '𝕥𝕠𝕠', '𝕝𝕠𝕟𝕘' ), + 'test:##5820ad1d105aa4dc698585c39df73e19' + ); + + $this->assertEquals( + $this->cache->makeKey( '##5820ad1d105aa4dc698585c39df73e19' ), + 'test:%23%235820ad1d105aa4dc698585c39df73e19' + ); + } +}