Fix message cache expiry semantics
authorTim Starling <tstarling@wikimedia.org>
Wed, 3 Apr 2013 10:54:34 +0000 (21:54 +1100)
committerAaron Schulz <aschulz@wikimedia.org>
Wed, 10 Apr 2013 03:40:20 +0000 (20:40 -0700)
* Use the stale message cache while the new one is being generated
* Revert I811755d4 (make message cache load failure fatal). This
  escalated several very plausible temporary site issues from barely
  noticeable to complete downtime -- for example, memcached being down
  on a site with only one memcached server.
* Remove $wgLocalMessageCacheSerialized, it's always been pointless
* Clarify a couple of comments.
* Increased lock wait timeout to 30s
* Make lock() fail immediately on memcached connection refused

Tests done:
* With local cache enabled: normal cold refill; refill local from
  global cache; use stale local cache during remote refill; use stale
  global cache during remote refill; cold cache wait for remote refill;
  saveToCaches() failure; memcached connection refused.
* With local cache disabled: saveToCaches() failure; cache disabled due
  to "error" status key; memcached connection refused.

Setting a 1-day expiry in memcached, with a ~10s CPU cost to replace, is
not the best idea since it inevitably leads to a cache stampede. Dealing
with the stampede by waiting for a lock is not ideal, even if it were
implemented properly, since it's not necessary to deliver perfectly
fresh message cache data to all clients.

This is especially obvious when you note that barring bugs, expiry and
regeneration always gives you back the exact same data, because we have
incremental updates (MessageCache::replace()). Keeping all clients
waiting for 10s just to give them the data they have already is pretty
pointless.

So, continue to serve the site from the stale message cache while the
new one is being generated.

One caveat: if local caching enabled, when the message cache becomes
stale, a sudden spike in network bandwidth may result due to the full
array (also typically stale) being fetched from the shared cache.

Bug: 43516
Change-Id: Ia145fd90da33956d8aac127634606aaecfaa176b

RELEASE-NOTES-1.22
includes/DefaultSettings.php
includes/cache/MessageCache.php

index 116c58e..a77daec 100644 (file)
@@ -10,6 +10,7 @@ production.
 
 === Configuration changes in 1.22 ===
 * $wgRedirectScript was removed. It was unused.
+* Removed $wgLocalMessageCacheSerialized, it is now always true.
 
 === New features in 1.22 ===
 * (bug 44525) mediawiki.jqueryMsg can now parse (whitelisted) HTML elements and attributes.
index d660f50..53df457 100644 (file)
@@ -1892,13 +1892,6 @@ $wgMemCachedTimeout = 500000;
  */
 $wgUseLocalMessageCache = false;
 
-/**
- * Defines format of local cache.
- *  - true: Serialized object
- *  - false: PHP source file (Warning - security risk)
- */
-$wgLocalMessageCacheSerialized = true;
-
 /**
  * Instead of caching everything, only cache those messages which have
  * been customised in the site content language. This means that
index 6cd167c..8c1a068 100644 (file)
@@ -25,8 +25,8 @@
  *
  */
 define( 'MSG_LOAD_TIMEOUT', 60 );
-define( 'MSG_LOCK_TIMEOUT', 10 );
-define( 'MSG_WAIT_TIMEOUT', 10 );
+define( 'MSG_LOCK_TIMEOUT', 30 );
+define( 'MSG_WAIT_TIMEOUT', 30 );
 define( 'MSG_CACHE_VERSION', 1 );
 
 /**
@@ -119,15 +119,13 @@ class MessageCache {
 
        /**
         * Try to load the cache from a local file.
-        * Actual format of the file depends on the $wgLocalMessageCacheSerialized
-        * setting.
         *
         * @param string $hash the hash of contents, to check validity.
         * @param $code Mixed: Optional language code, see documenation of load().
-        * @return bool on failure.
+        * @return The cache array
         */
-       function loadFromLocal( $hash, $code ) {
-               global $wgCacheDirectory, $wgLocalMessageCacheSerialized;
+       function getLocalCache( $hash, $code ) {
+               global $wgCacheDirectory;
 
                $filename = "$wgCacheDirectory/messages-" . wfWikiID() . "-$code";
 
@@ -139,31 +137,19 @@ class MessageCache {
                        return false; // No cache file
                }
 
-               if ( $wgLocalMessageCacheSerialized ) {
-                       // Check to see if the file has the hash specified
-                       $localHash = fread( $file, 32 );
-                       if ( $hash === $localHash ) {
-                               // All good, get the rest of it
-                               $serialized = '';
-                               while ( !feof( $file ) ) {
-                                       $serialized .= fread( $file, 100000 );
-                               }
-                               fclose( $file );
-                               return $this->setCache( unserialize( $serialized ), $code );
-                       } else {
-                               fclose( $file );
-                               return false; // Wrong hash
+               // Check to see if the file has the hash specified
+               $localHash = fread( $file, 32 );
+               if ( $hash === $localHash ) {
+                       // All good, get the rest of it
+                       $serialized = '';
+                       while ( !feof( $file ) ) {
+                               $serialized .= fread( $file, 100000 );
                        }
+                       fclose( $file );
+                       return unserialize( $serialized );
                } else {
-                       $localHash = substr( fread( $file, 40 ), 8 );
                        fclose( $file );
-                       if ( $hash != $localHash ) {
-                               return false; // Wrong hash
-                       }
-
-                       # Require overwrites the member variable or just shadows it?
-                       require( $filename );
-                       return $this->setCache( $this->mCache, $code );
+                       return false; // Wrong hash
                }
        }
 
@@ -192,55 +178,6 @@ class MessageCache {
                wfRestoreWarnings();
        }
 
-       function saveToScript( $array, $hash, $code ) {
-               global $wgCacheDirectory;
-
-               $filename = "$wgCacheDirectory/messages-" . wfWikiID() . "-$code";
-               $tempFilename = $filename . '.tmp';
-               wfMkdirParents( $wgCacheDirectory, null, __METHOD__ ); // might fail
-
-               wfSuppressWarnings();
-               $file = fopen( $tempFilename, 'w' );
-               wfRestoreWarnings();
-
-               if ( !$file ) {
-                       wfDebug( "Unable to open local cache file for writing\n" );
-                       return;
-               }
-
-               fwrite( $file, "<?php\n//$hash\n\n \$this->mCache = array(" );
-
-               foreach ( $array as $key => $message ) {
-                       $key = $this->escapeForScript( $key );
-                       $message = $this->escapeForScript( $message );
-                       fwrite( $file, "'$key' => '$message',\n" );
-               }
-
-               fwrite( $file, ");\n?>" );
-               fclose( $file);
-               rename( $tempFilename, $filename );
-       }
-
-       function escapeForScript( $string ) {
-               $string = str_replace( '\\', '\\\\', $string );
-               $string = str_replace( '\'', '\\\'', $string );
-               return $string;
-       }
-
-       /**
-        * Set the cache to $cache, if it is valid. Otherwise set the cache to false.
-        *
-        * @return bool
-        */
-       function setCache( $cache, $code ) {
-               if ( isset( $cache['VERSION'] ) && $cache['VERSION'] == MSG_CACHE_VERSION ) {
-                       $this->mCache[$code] = $cache;
-                       return true;
-               } else {
-                       return false;
-               }
-       }
-
        /**
         * Loads messages from caches or from database in this order:
         * (1) local message cache (if $wgUseLocalMessageCache is enabled)
@@ -264,8 +201,6 @@ class MessageCache {
        function load( $code = false ) {
                global $wgUseLocalMessageCache;
 
-               $exception = null; // deferred error
-
                if( !is_string( $code ) ) {
                        # This isn't really nice, so at least make a note about it and try to
                        # fall back
@@ -291,96 +226,161 @@ class MessageCache {
                # Loading code starts
                wfProfileIn( __METHOD__ );
                $success = false; # Keep track of success
+               $staleCache = false; # a cache array with expired data, or false if none has been loaded
                $where = array(); # Debug info, delayed to avoid spamming debug log too much
                $cacheKey = wfMemcKey( 'messages', $code ); # Key in memc for messages
 
-               # (1) local cache
+               # Local cache
                # Hash of the contents is stored in memcache, to detect if local cache goes
-               # out of date (due to update in other thread?)
+               # out of date (e.g. due to replace() on some other server)
                if ( $wgUseLocalMessageCache ) {
                        wfProfileIn( __METHOD__ . '-fromlocal' );
 
                        $hash = $this->mMemc->get( wfMemcKey( 'messages', $code, 'hash' ) );
                        if ( $hash ) {
-                               $success = $this->loadFromLocal( $hash, $code );
-                               if ( $success ) $where[] = 'got from local cache';
+                               $cache = $this->getLocalCache( $hash, $code );
+                               if ( !$cache ) {
+                                       $where[] = 'local cache is empty or has the wrong hash';
+                               } elseif ( $this->isCacheExpired( $cache ) ) {
+                                       $where[] = 'local cache is expired';
+                                       $staleCache = $cache;
+                               } else {
+                                       $where[] = 'got from local cache';
+                                       $success = true;
+                                       $this->mCache[$code] = $cache;
+                               }
                        }
                        wfProfileOut( __METHOD__ . '-fromlocal' );
                }
 
-               # (2) memcache
-               # Fails if nothing in cache, or in the wrong version.
                if ( !$success ) {
-                       wfProfileIn( __METHOD__ . '-fromcache' );
-                       $cache = $this->mMemc->get( $cacheKey );
-                       $success = $this->setCache( $cache, $code );
-                       if ( $success ) {
-                               $where[] = 'got from global cache';
-                               $this->saveToCaches( $cache, false, $code );
-                       }
-                       wfProfileOut( __METHOD__ . '-fromcache' );
-               }
+                       # Try the global cache. If it is empty, try to acquire a lock. If
+                       # the lock can't be acquired, wait for the other thread to finish
+                       # and then try the global cache a second time.
+                       for ( $failedAttempts = 0; $failedAttempts < 2; $failedAttempts++ ) {
+                               wfProfileIn( __METHOD__ . '-fromcache' );
+                               $cache = $this->mMemc->get( $cacheKey );
+                               if ( !$cache ) {
+                                       $where[] = 'global cache is empty';
+                               } elseif ( $this->isCacheExpired( $cache ) ) {
+                                       $where[] = 'global cache is expired';
+                                       $staleCache = $cache;
+                               } else {
+                                       $where[] = 'got from global cache';
+                                       $this->mCache[$code] = $cache;
+                                       $this->saveToCaches( $cache, 'local-only', $code );
+                                       $success = true;
+                               }
 
-               # (3)
-               # Nothing in caches... so we need create one and store it in caches
-               if ( !$success ) {
-                       $where[] = 'cache is empty';
-                       $where[] = 'loading from database';
-
-                       if ( $this->lock( $cacheKey ) ) {
-                               $that = $this;
-                               $osc = new ScopedCallback( function() use ( $that, $cacheKey ) {
-                                       $that->unlock( $cacheKey );
-                               } );
-                       }
-                       # Limit the concurrency of loadFromDB to a single process
-                       # This prevents the site from going down when the cache expires
-                       $statusKey = wfMemcKey( 'messages', $code, 'status' );
-                       $success = $this->mMemc->add( $statusKey, 'loading', MSG_LOAD_TIMEOUT );
-                       if ( $success ) { // acquired lock
-                               $cache = $this->mMemc;
-                               $isc = new ScopedCallback( function() use ( $cache, $statusKey ) {
-                                       $cache->delete( $statusKey );
-                               } );
-                               $cache = $this->loadFromDB( $code );
-                               $success = $this->setCache( $cache, $code );
-                               if ( $success ) { // messages loaded
-                                       $success = $this->saveToCaches( $cache, true, $code );
-                                       $isc = null; // unlock
-                                       if ( !$success ) {
-                                               $this->mMemc->set( $statusKey, 'error', 60 * 5 );
-                                               wfDebug( __METHOD__ . ": set() error: restart memcached server!\n" );
-                                               $exception = new MWException( "Could not save cache for '$code'." );
+                               wfProfileOut( __METHOD__ . '-fromcache' );
+
+                               if ( $success ) {
+                                       # Done, no need to retry
+                                       break;
+                               }
+
+                               # We need to call loadFromDB. Limit the concurrency to a single
+                               # process. This prevents the site from going down when the cache
+                               # expires.
+                               $statusKey = wfMemcKey( 'messages', $code, 'status' );
+                               $acquired = $this->mMemc->add( $statusKey, 'loading', MSG_LOAD_TIMEOUT );
+                               if ( $acquired ) {
+                                       # Unlock the status key if there is an exception
+                                       $that = $this;
+                                       $statusUnlocker = new ScopedCallback( function() use ( $that, $statusKey ) {
+                                               $that->mMemc->delete( $statusKey );
+                                       } );
+
+                                       # Now let's regenerate
+                                       $where[] = 'loading from database';
+
+                                       # Lock the cache to prevent conflicting writes
+                                       # If this lock fails, it doesn't really matter, it just means the
+                                       # write is potentially non-atomic, e.g. the results of a replace()
+                                       # may be discarded.
+                                       if ( $this->lock( $cacheKey ) ) {
+                                               $mainUnlocker = new ScopedCallback( function() use ( $that, $cacheKey ) {
+                                                       $that->unlock( $cacheKey );
+                                               } );
+                                       } else {
+                                               $mainUnlocker = null;
+                                               $where[] = 'could not acquire main lock';
                                        }
+
+                                       $cache = $this->loadFromDB( $code );
+                                       $this->mCache[$code] = $cache;
+                                       $success = true;
+                                       $saveSuccess = $this->saveToCaches( $cache, 'all', $code );
+
+                                       # Unlock
+                                       ScopedCallback::consume( $mainUnlocker );
+                                       ScopedCallback::consume( $statusUnlocker );
+
+                                       if ( !$saveSuccess ) {
+                                               # Cache save has failed.
+                                               # There are two main scenarios where this could be a problem:
+                                               #
+                                               #   - The cache is more than the maximum size (typically
+                                               #     1MB compressed).
+                                               #
+                                               #   - Memcached has no space remaining in the relevant slab
+                                               #     class. This is unlikely with recent versions of
+                                               #     memcached.
+                                               #
+                                               # Either way, if there is a local cache, nothing bad will
+                                               # happen. If there is no local cache, disabling the message
+                                               # cache for all requests avoids incurring a loadFromDB()
+                                               # overhead on every request, and thus saves the wiki from
+                                               # complete downtime under moderate traffic conditions.
+                                               if ( !$wgUseLocalMessageCache ) {
+                                                       $this->mMemc->set( $statusKey, 'error', 60 * 5 );
+                                                       $where[] = 'could not save cache, disabled globally for 5 minutes';
+                                               } else {
+                                                       $where[] = "could not save global cache";
+                                               }
+                                       }
+
+                                       # Load from DB complete, no need to retry
+                                       break;
+                               } elseif ( $staleCache ) {
+                                       # Use the stale cache while some other thread constructs the new one
+                                       $where[] = 'using stale cache';
+                                       $this->mCache[$code] = $staleCache;
+                                       $success = true;
+                                       break;
+                               } elseif ( $failedAttempts > 0 ) {
+                                       # Already retried once, still failed, so don't do another lock/unlock cycle
+                                       # This case will typically be hit if memcached is down, or if
+                                       # loadFromDB() takes longer than MSG_WAIT_TIMEOUT
+                                       $where[] = "could not acquire status key.";
+                                       break;
                                } else {
-                                       $isc = null; // unlock
-                                       $exception = new MWException( "Could not load cache from DB for '$code'." );
+                                       $status = $this->mMemc->get( $statusKey );
+                                       if ( $status === 'error' ) {
+                                               # Disable cache
+                                               break;
+                                       } else {
+                                               # Wait for the other thread to finish, then retry
+                                               $where[] = 'waited for other thread to complete';
+                                               $this->lock( $cacheKey );
+                                               $this->unlock( $cacheKey );
+                                       }
                                }
-                       } else {
-                               $exception = new MWException( "Could not acquire '$statusKey' lock." );
                        }
-                       $osc = null; // unlock
                }
 
                if ( !$success ) {
+                       $where[] = 'loading FAILED - cache is disabled';
                        $this->mDisable = true;
                        $this->mCache = false;
-                       // This used to go on, but that led to lots of nasty side
-                       // effects like gadgets and sidebar getting cached with their
-                       // default content
-                       if ( $exception instanceof Exception ) {
-                               wfProfileOut( __METHOD__ );
-                               throw $exception;
-                       } else {
-                               wfProfileOut( __METHOD__ );
-                               throw new MWException( "MessageCache failed to load messages" );
-                       }
+                       # This used to throw an exception, but that led to nasty side effects like
+                       # the whole wiki being instantly down if the memcached server died
                } else {
                        # All good, just record the success
-                       $info = implode( ', ', $where );
-                       wfDebug( __METHOD__ . ": Loading $code... $info\n" );
                        $this->mLoadedLanguages[$code] = true;
                }
+               $info = implode( ', ', $where );
+               wfDebug( __METHOD__ . ": Loading $code... $info\n" );
                wfProfileOut( __METHOD__ );
                return $success;
        }
@@ -463,6 +463,7 @@ class MessageCache {
                }
 
                $cache['VERSION'] = MSG_CACHE_VERSION;
+               $cache['EXPIRY'] = wfTimestamp( TS_MW, time() + $this->mExpiry );
                wfProfileOut( __METHOD__ );
                return $cache;
        }
@@ -504,7 +505,7 @@ class MessageCache {
                }
 
                # Update caches
-               $this->saveToCaches( $this->mCache[$code], true, $code );
+               $this->saveToCaches( $this->mCache[$code], 'all', $code );
                $this->unlock( $cacheKey );
 
                // Also delete cached sidebar... just in case it is affected
@@ -530,22 +531,40 @@ class MessageCache {
                wfProfileOut( __METHOD__ );
        }
 
+       /**
+        * Is the given cache array expired due to time passing or a version change?
+        */
+       protected function isCacheExpired( $cache ) {
+               if ( !isset( $cache['VERSION'] ) || !isset( $cache['EXPIRY'] ) ) {
+                       return true;
+               }
+               if ( $cache['VERSION'] != MSG_CACHE_VERSION ) {
+                       return true;
+               }
+               if ( wfTimestampNow() >= $cache['EXPIRY'] ) {
+                       return true;
+               }
+               return false;
+       }
+
+
        /**
         * Shortcut to update caches.
         *
-        * @param array $cache cached messages with a version.
-        * @param bool $memc Wether to update or not memcache.
-        * @param string $code Language code.
+        * @param $cache array: cached messages with a version.
+        * @param $dest string: Either "local-only" to save to local caches only
+        *   or "all" to save to all caches.
+        * @param $code string: Language code.
         * @return bool on somekind of error.
         */
-       protected function saveToCaches( $cache, $memc = true, $code = false ) {
+       protected function saveToCaches( $cache, $dest, $code = false ) {
                wfProfileIn( __METHOD__ );
-               global $wgUseLocalMessageCache, $wgLocalMessageCacheSerialized;
+               global $wgUseLocalMessageCache;
 
                $cacheKey = wfMemcKey( 'messages', $code );
 
-               if ( $memc ) {
-                       $success = $this->mMemc->set( $cacheKey, $cache, $this->mExpiry );
+               if ( $dest === 'all' ) {
+                       $success = $this->mMemc->set( $cacheKey, $cache );
                } else {
                        $success = true;
                }
@@ -554,12 +573,8 @@ class MessageCache {
                if ( $wgUseLocalMessageCache ) {
                        $serialized = serialize( $cache );
                        $hash = md5( $serialized );
-                       $this->mMemc->set( wfMemcKey( 'messages', $code, 'hash' ), $hash, $this->mExpiry );
-                       if ( $wgLocalMessageCacheSerialized ) {
-                               $this->saveToLocal( $serialized, $hash, $code );
-                       } else {
-                               $this->saveToScript( $cache, $hash, $code );
-                       }
+                       $this->mMemc->set( wfMemcKey( 'messages', $code, 'hash' ), $hash );
+                       $this->saveToLocal( $serialized, $hash, $code );
                }
 
                wfProfileOut( __METHOD__ );
@@ -575,11 +590,25 @@ class MessageCache {
         */
        function lock( $key ) {
                $lockKey = $key . ':lock';
-               for ( $i = 0; $i < MSG_WAIT_TIMEOUT && !$this->mMemc->add( $lockKey, 1, MSG_LOCK_TIMEOUT ); $i++ ) {
+               $acquired = false;
+               $testDone = false;
+               for ( $i = 0; $i < MSG_WAIT_TIMEOUT && !$acquired; $i++ ) {
+                       $acquired = $this->mMemc->add( $lockKey, 1, MSG_LOCK_TIMEOUT );
+                       if ( $acquired ) {
+                               break;
+                       }
+
+                       # Fail fast if memcached is totally down
+                       if ( !$testDone ) {
+                               $testDone = true;
+                               if ( !$this->mMemc->set( wfMemcKey( 'test' ), 'test', 1 ) ) {
+                                       break;
+                               }
+                       }
                        sleep( 1 );
                }
 
-               return $i >= MSG_WAIT_TIMEOUT;
+               return $acquired;
        }
 
        function unlock( $key ) {
@@ -935,9 +964,12 @@ class MessageCache {
                        // Apparently load() failed
                        return null;
                }
-               $cache = $this->mCache[$code]; // Copy the cache
-               unset( $cache['VERSION'] ); // Remove the VERSION key
-               $cache = array_diff( $cache, array( '!NONEXISTENT' ) ); // Remove any !NONEXISTENT keys
+               // Remove administrative keys
+               $cache = $this->mCache[$code];
+               unset( $cache['VERSION'] );
+               unset( $cache['EXPIRY'] );
+               // Remove any !NONEXISTENT keys
+               $cache = array_diff( $cache, array( '!NONEXISTENT' ) );
                // Keys may appear with a capital first letter. lcfirst them.
                return array_map( array( $wgContLang, 'lcfirst' ), array_keys( $cache ) );
        }