Merge "API: Correct 'from_namespace' logic in ApiQueryBacklinksprop"

[lhc/web/wiklou.git] / includes / libs / objectcache / WANObjectCache.php
diff --git a/includes/libs/objectcache/WANObjectCache.php b/includes/libs/objectcache/WANObjectCache.php

index dcd413d..c78b299 100644 (file)
--- a/includes/libs/objectcache/WANObjectCache.php
+++ b/includes/libs/objectcache/WANObjectCache.php
@@ -23,17 +23,18 @@
  /**
   * Multi-datacenter aware caching interface
   *
- * All operations go to the local cache, except the delete()
- * and touchCheckKey(), which broadcast to all clusters.
+ * All operations go to the local datacenter cache, except for delete(),
+ * touchCheckKey(), and resetCheckKey(), which broadcast to all clusters.
+ *
   * This class is intended for caching data from primary stores.
   * If the get() method does not return a value, then the caller
   * should query the new value and backfill the cache using set().
- * When the source data changes, the delete() method should be called.
- * Since delete() is expensive, it should be avoided. One can do so if:
+ * When the source data changes, a purge method should be called.
+ * Since purges are expensive, they should be avoided. One can do so if:
   *   - a) The object cached is immutable; or
   *   - b) Validity is checked against the source after get(); or
   *   - c) Using a modest TTL is reasonably correct and performant
- * Consider using getWithSetCallback() instead of the get()/set() cycle.
+ * The simplest purge method is delete().
   *
   * Instances of this class must be configured to point to a valid
   * PubSub endpoint, and there must be listeners on the cache servers
@@ -73,11 +74,17 @@ class WANObjectCache {
         const CHECK_KEY_TTL = 31536000; // 1 year
         /** Seconds to keep lock keys around */
         const LOCK_TTL = 5;
+       /** Default remaining TTL at which to consider pre-emptive regeneration */
+       const LOW_TTL = 10;
+       /** Default time-since-expiry on a miss that makes a key "hot" */
+       const LOCK_TSE = 1;
  
         /** Idiom for set()/getWithSetCallback() TTL */
         const TTL_NONE = 0;
         /** Idiom for getWithSetCallback() callbacks to avoid calling set() */
         const TTL_UNCACHEABLE = -1;
+       /** Idiom for getWithSetCallback() callbacks to 'lockTSE' logic */
+       const TSE_NONE = -1;
  
         /** Cache format version number */
         const VERSION = 1;
@@ -146,7 +153,15 @@ class WANObjectCache {
          *   - c) When the source data that "check" keys represent changes,
          *        the touchCheckKey() method is called on them
          *
-        * For keys that are hot/expensive, consider using getWithSetCallback() instead.
+        * Source data entities might exists in a DB that uses snapshot isolation
+        * (e.g. the default REPEATABLE-READ in innoDB). Even for mutable data, that
+        * isolation can largely be maintained by doing the following:
+        *   - a) Calling delete() on entity change *and* creation, before DB commit
+        *   - b) Keeping transaction duration shorter than delete() hold-off TTL
+        * However, pre-snapshot values might still be seen due to delete() relay lag.
+        *
+        * Consider using getWithSetCallback() instead of get()/set() cycles.
+        * That method has cache slam avoiding features for hot/expensive keys.
          *
          * @param string $key Cache key
          * @param mixed $curTTL Approximate TTL left on the key if present [returned]
@@ -256,23 +271,50 @@ class WANObjectCache {
         /**
          * Purge a key from all clusters
          *
-        * This deletes the key and instantiates a hold-off period where the key
-        * cannot be written to for the next few seconds (HOLDOFF_TTL). This is to
-        * avoid the following race condition:
-        *   a) Some DB data changes and delete() is called on a corresponding key
-        *   b) A request refills the key with a stale value from a lagged DB
-        *   c) The stale value is stuck there until the key is expired/evicted
+        * This should only be called when the underlying data (being cached)
+        * changes in a significant way. This deletes the key and starts a hold-off
+        * period where the key cannot be written to for a few seconds (HOLDOFF_TTL).
+        * This is done to avoid the following race condition:
+        *   - a) Some DB data changes and delete() is called on a corresponding key
+        *   - b) A request refills the key with a stale value from a lagged DB
+        *   - c) The stale value is stuck there until the key is expired/evicted
          *
          * This is implemented by storing a special "tombstone" value at the cache
          * key that this class recognizes; get() calls will return false for the key
          * and any set() calls will refuse to replace tombstone values at the key.
          * For this to always avoid writing stale values, the following must hold:
-        *   a) Replication lag is bounded to being less than HOLDOFF_TTL; or
-        *   b) If lag is higher, the DB will have gone into read-only mode already
+        *   - a) Replication lag is bounded to being less than HOLDOFF_TTL; or
+        *   - b) If lag is higher, the DB will have gone into read-only mode already
+        *
+        * When using potentially long-running ACID transactions, a good pattern is
+        * to use a pre-commit hook to issue the delete. This means that immediately
+        * after commit, callers will see the tombstone in cache in the local datacenter
+        * and in the others upon relay. It also avoids the following race condition:
+        *   - a) T1 begins, changes a row, and calls delete()
+        *   - b) The HOLDOFF_TTL passes, expiring the delete() tombstone
+        *   - c) T2 starts, reads the row and calls set() due to a cache miss
+        *   - d) T1 finally commits
+        *   - e) Stale value is stuck in cache
          *
-        * This should only be called when the underlying data (being cached)
-        * changes in a significant way. If called twice on the same key, then
-        * the last TTL takes precedence.
+        * Example usage:
+        * @code
+        *     $dbw->begin(); // start of request
+        *     ... <execute some stuff> ...
+        *     // Update the row in the DB
+        *     $dbw->update( ... );
+        *     $key = wfMemcKey( 'homes', $homeId );
+        *     // Purge the corresponding cache entry just before committing
+        *     $dbw->onTransactionPreCommitOrIdle( function() use ( $cache, $key ) {
+        *         $cache->delete( $key );
+        *     } );
+        *     ... <execute some stuff> ...
+        *     $dbw->commit(); // end of request
+        * @endcode
+        *
+        * If called twice on the same key, then the last hold-off TTL takes
+        * precedence. For idempotence, the $ttl should not vary for different
+        * delete() calls on the same key. Also note that lowering $ttl reduces
+        * the effective range of the 'lockTSE' parameter to getWithSetCallback().
          *
          * @param string $key Cache key
          * @param integer $ttl How long to block writes to the key [seconds]
@@ -362,11 +404,11 @@ class WANObjectCache {
          *
          * This is similar to touchCheckKey() in that keys using it via
          * getWithSetCallback() will be invalidated. The differences are:
-        *   a) The timestamp will be deleted from all caches and lazily
-        *      re-initialized when accessed (rather than set everywhere)
-        *   b) Thus, dependent keys will be known to be invalid, but not
-        *      for how long (they are treated as "just" purged), which
-        *      effects any lockTSE logic in getWithSetCallback()
+        *   - a) The timestamp will be deleted from all caches and lazily
+        *        re-initialized when accessed (rather than set everywhere)
+        *   - b) Thus, dependent keys will be known to be invalid, but not
+        *        for how long (they are treated as "just" purged), which
+        *        effects any lockTSE logic in getWithSetCallback()
          * The advantage is that this does not place high TTL keys on every cache
          * server, making it better for code that will cache many different keys
          * and either does not use lockTSE or uses a low enough TTL anyway.
@@ -405,15 +447,15 @@ class WANObjectCache {
          * to maintain "most recent X" values that come from time or sequence
          * based source data, provided that the "as of" id/time is tracked.
          *
-        * Usage of $checkKeys is similar to get()/getMulti(). However,
+        * Usage of $checkKeys is similar to get() and getMulti(). However,
          * rather than the caller having to inspect a "current time left"
          * variable (e.g. $curTTL, $curTTLs), a cache regeneration will be
          * triggered using the callback.
          *
          * The simplest way to avoid stampedes for hot keys is to use
          * the 'lockTSE' option in $opts. If cache purges are needed, also:
-        *   a) Pass $key into $checkKeys
-        *   b) Use touchCheckKey( $key ) instead of delete( $key )
+        *   - a) Pass $key into $checkKeys
+        *   - b) Use touchCheckKey( $key ) instead of delete( $key )
          * Following this pattern lets the old cache be used until a
          * single thread updates it as needed. Also consider tweaking
          * the 'lowTTL' parameter.
@@ -459,23 +501,24 @@ class WANObjectCache {
          *   - lowTTL  : consider pre-emptive updates when the current TTL (sec)
          *               of the key is less than this. It becomes more likely
          *               over time, becoming a certainty once the key is expired.
+        *               [Default: WANObjectCache::LOW_TTL seconds]
          *   - lockTSE : if the key is tombstoned or expired (by $checkKeys) less
          *               than this many seconds ago, then try to have a single
          *               thread handle cache regeneration at any given time.
          *               Other threads will try to use stale values if possible.
          *               If, on miss, the time since expiration is low, the assumption
          *               is that the key is hot and that a stampede is worth avoiding.
-        *   - tempTTL : TTL of the temp key used to cache values while a key is tombstoned.
-        *               This avoids excessive regeneration of hot keys on delete() but may
-        *               result in stale values.
+        *               Setting this above WANObjectCache::HOLDOFF_TTL makes no difference.
+        *               The higher this is set, the higher the worst-case staleness can be.
+        *               Use WANObjectCache::TSE_NONE to disable this logic.
+        *               [Default: WANObjectCache::TSE_NONE]
          * @return mixed Value to use for the key
          */
         final public function getWithSetCallback(
                 $key, $callback, $ttl, array $checkKeys = array(), array $opts = array()
         ) {
-               $lowTTL = isset( $opts['lowTTL'] ) ? $opts['lowTTL'] : min( 10, $ttl );
-               $lockTSE = isset( $opts['lockTSE'] ) ? $opts['lockTSE'] : -1;
-               $tempTTL = isset( $opts['tempTTL'] ) ? $opts['tempTTL'] : 5;
+               $lowTTL = isset( $opts['lowTTL'] ) ? $opts['lowTTL'] : min( self::LOW_TTL, $ttl );
+               $lockTSE = isset( $opts['lockTSE'] ) ? $opts['lockTSE'] : self::TSE_NONE;
  
                 // Get the current key value
                 $curTTL = null;
@@ -487,29 +530,33 @@ class WANObjectCache {
                         return $value;
                 }
  
+               // A deleted key with a negative TTL left must be tombstoned
                 $isTombstone = ( $curTTL !== null && $value === false );
                 // Assume a key is hot if requested soon after invalidation
                 $isHot = ( $curTTL !== null && $curTTL <= 0 && abs( $curTTL ) <= $lockTSE );
-
-               $locked = false;
-               if ( $isHot ) {
+               // Decide whether a single thread should handle regenerations.
+               // This avoids stampedes when $checkKeys are bumped and when preemptive
+               // renegerations take too long. It also reduces regenerations while $key
+               // is tombstoned. This balances cache freshness with avoiding DB load.
+               $useMutex = ( $isHot || ( $isTombstone && $lockTSE > 0 ) );
+
+               $lockAcquired = false;
+               if ( $useMutex ) {
                         // Acquire a cluster-local non-blocking lock
                         if ( $this->cache->lock( $key, 0, self::LOCK_TTL ) ) {
                                 // Lock acquired; this thread should update the key
-                               $locked = true;
+                               $lockAcquired = true;
                         } elseif ( $value !== false ) {
                                 // If it cannot be acquired; then the stale value can be used
                                 return $value;
-                       }
-               }
-
-               if ( !$locked && ( $isTombstone || $isHot ) ) {
-                       // Use the stash value for tombstoned keys to reduce regeneration load.
-                       // For hot keys, either another thread has the lock or the lock failed;
-                       // use the stash value from the last thread that regenerated it.
-                       $value = $this->cache->get( self::STASH_KEY_PREFIX . $key );
-                       if ( $value !== false ) {
-                               return $value;
+                       } else {
+                               // Use the stash value for tombstoned keys to reduce regeneration load.
+                               // For hot keys, either another thread has the lock or the lock failed;
+                               // use the stash value from the last thread that regenerated it.
+                               $value = $this->cache->get( self::STASH_KEY_PREFIX . $key );
+                               if ( $value !== false ) {
+                                       return $value;
+                               }
                         }
                 }
  
@@ -521,11 +568,12 @@ class WANObjectCache {
                 $value = call_user_func_array( $callback, array( $cValue, &$ttl ) );
                 // When delete() is called, writes are write-holed by the tombstone,
                 // so use a special stash key to pass the new value around threads.
-               if ( $value !== false && ( $isHot || $isTombstone ) && $ttl >= 0 ) {
+               if ( $useMutex && $value !== false && $ttl >= 0 ) {
+                       $tempTTL = max( 1, (int)$lockTSE ); // set() expects seconds
                         $this->cache->set( self::STASH_KEY_PREFIX . $key, $value, $tempTTL );
                 }
  
-               if ( $locked ) {
+               if ( $lockAcquired ) {
                         $this->cache->unlock( $key );
                 }