/**
* Multi-datacenter aware caching interface
*
- * All operations go to the local cache, except the delete()
- * and touchCheckKey(), which broadcast to all clusters.
+ * All operations go to the local datacenter cache, except for delete(),
+ * touchCheckKey(), and resetCheckKey(), which broadcast to all clusters.
+ *
* This class is intended for caching data from primary stores.
* If the get() method does not return a value, then the caller
* should query the new value and backfill the cache using set().
- * When the source data changes, the delete() method should be called.
- * Since delete() is expensive, it should be avoided. One can do so if:
+ * When the source data changes, a purge method should be called.
+ * Since purges are expensive, they should be avoided. One can do so if:
* - a) The object cached is immutable; or
* - b) Validity is checked against the source after get(); or
* - c) Using a modest TTL is reasonably correct and performant
- * Consider using getWithSetCallback() instead of the get()/set() cycle.
+ * The simplest purge method is delete().
*
* Instances of this class must be configured to point to a valid
* PubSub endpoint, and there must be listeners on the cache servers
const CHECK_KEY_TTL = 31536000; // 1 year
/** Seconds to keep lock keys around */
const LOCK_TTL = 5;
+ /** Default remaining TTL at which to consider pre-emptive regeneration */
+ const LOW_TTL = 10;
+ /** Default time-since-expiry on a miss that makes a key "hot" */
+ const LOCK_TSE = 1;
/** Idiom for set()/getWithSetCallback() TTL */
const TTL_NONE = 0;
/** Idiom for getWithSetCallback() callbacks to avoid calling set() */
const TTL_UNCACHEABLE = -1;
+ /** Idiom for getWithSetCallback() callbacks to 'lockTSE' logic */
+ const TSE_NONE = -1;
/** Cache format version number */
const VERSION = 1;
* - c) When the source data that "check" keys represent changes,
* the touchCheckKey() method is called on them
*
- * For keys that are hot/expensive, consider using getWithSetCallback() instead.
+ * Source data entities might exists in a DB that uses snapshot isolation
+ * (e.g. the default REPEATABLE-READ in innoDB). Even for mutable data, that
+ * isolation can largely be maintained by doing the following:
+ * - a) Calling delete() on entity change *and* creation, before DB commit
+ * - b) Keeping transaction duration shorter than delete() hold-off TTL
+ * However, pre-snapshot values might still be seen due to delete() relay lag.
+ *
+ * Consider using getWithSetCallback() instead of get()/set() cycles.
+ * That method has cache slam avoiding features for hot/expensive keys.
*
* @param string $key Cache key
* @param mixed $curTTL Approximate TTL left on the key if present [returned]
/**
* Purge a key from all clusters
*
- * This deletes the key and instantiates a hold-off period where the key
- * cannot be written to for the next few seconds (HOLDOFF_TTL). This is to
- * avoid the following race condition:
- * a) Some DB data changes and delete() is called on a corresponding key
- * b) A request refills the key with a stale value from a lagged DB
- * c) The stale value is stuck there until the key is expired/evicted
+ * This should only be called when the underlying data (being cached)
+ * changes in a significant way. This deletes the key and starts a hold-off
+ * period where the key cannot be written to for a few seconds (HOLDOFF_TTL).
+ * This is done to avoid the following race condition:
+ * - a) Some DB data changes and delete() is called on a corresponding key
+ * - b) A request refills the key with a stale value from a lagged DB
+ * - c) The stale value is stuck there until the key is expired/evicted
*
* This is implemented by storing a special "tombstone" value at the cache
* key that this class recognizes; get() calls will return false for the key
* and any set() calls will refuse to replace tombstone values at the key.
* For this to always avoid writing stale values, the following must hold:
- * a) Replication lag is bounded to being less than HOLDOFF_TTL; or
- * b) If lag is higher, the DB will have gone into read-only mode already
+ * - a) Replication lag is bounded to being less than HOLDOFF_TTL; or
+ * - b) If lag is higher, the DB will have gone into read-only mode already
+ *
+ * When using potentially long-running ACID transactions, a good pattern is
+ * to use a pre-commit hook to issue the delete. This means that immediately
+ * after commit, callers will see the tombstone in cache in the local datacenter
+ * and in the others upon relay. It also avoids the following race condition:
+ * - a) T1 begins, changes a row, and calls delete()
+ * - b) The HOLDOFF_TTL passes, expiring the delete() tombstone
+ * - c) T2 starts, reads the row and calls set() due to a cache miss
+ * - d) T1 finally commits
+ * - e) Stale value is stuck in cache
*
- * This should only be called when the underlying data (being cached)
- * changes in a significant way. If called twice on the same key, then
- * the last TTL takes precedence.
+ * Example usage:
+ * @code
+ * $dbw->begin(); // start of request
+ * ... <execute some stuff> ...
+ * // Update the row in the DB
+ * $dbw->update( ... );
+ * $key = wfMemcKey( 'homes', $homeId );
+ * // Purge the corresponding cache entry just before committing
+ * $dbw->onTransactionPreCommitOrIdle( function() use ( $cache, $key ) {
+ * $cache->delete( $key );
+ * } );
+ * ... <execute some stuff> ...
+ * $dbw->commit(); // end of request
+ * @endcode
+ *
+ * If called twice on the same key, then the last hold-off TTL takes
+ * precedence. For idempotence, the $ttl should not vary for different
+ * delete() calls on the same key. Also note that lowering $ttl reduces
+ * the effective range of the 'lockTSE' parameter to getWithSetCallback().
*
* @param string $key Cache key
* @param integer $ttl How long to block writes to the key [seconds]
*
* This is similar to touchCheckKey() in that keys using it via
* getWithSetCallback() will be invalidated. The differences are:
- * a) The timestamp will be deleted from all caches and lazily
- * re-initialized when accessed (rather than set everywhere)
- * b) Thus, dependent keys will be known to be invalid, but not
- * for how long (they are treated as "just" purged), which
- * effects any lockTSE logic in getWithSetCallback()
+ * - a) The timestamp will be deleted from all caches and lazily
+ * re-initialized when accessed (rather than set everywhere)
+ * - b) Thus, dependent keys will be known to be invalid, but not
+ * for how long (they are treated as "just" purged), which
+ * effects any lockTSE logic in getWithSetCallback()
* The advantage is that this does not place high TTL keys on every cache
* server, making it better for code that will cache many different keys
* and either does not use lockTSE or uses a low enough TTL anyway.
* to maintain "most recent X" values that come from time or sequence
* based source data, provided that the "as of" id/time is tracked.
*
- * Usage of $checkKeys is similar to get()/getMulti(). However,
+ * Usage of $checkKeys is similar to get() and getMulti(). However,
* rather than the caller having to inspect a "current time left"
* variable (e.g. $curTTL, $curTTLs), a cache regeneration will be
* triggered using the callback.
*
* The simplest way to avoid stampedes for hot keys is to use
* the 'lockTSE' option in $opts. If cache purges are needed, also:
- * a) Pass $key into $checkKeys
- * b) Use touchCheckKey( $key ) instead of delete( $key )
+ * - a) Pass $key into $checkKeys
+ * - b) Use touchCheckKey( $key ) instead of delete( $key )
* Following this pattern lets the old cache be used until a
* single thread updates it as needed. Also consider tweaking
* the 'lowTTL' parameter.
* - lowTTL : consider pre-emptive updates when the current TTL (sec)
* of the key is less than this. It becomes more likely
* over time, becoming a certainty once the key is expired.
+ * [Default: WANObjectCache::LOW_TTL seconds]
* - lockTSE : if the key is tombstoned or expired (by $checkKeys) less
* than this many seconds ago, then try to have a single
* thread handle cache regeneration at any given time.
* Other threads will try to use stale values if possible.
* If, on miss, the time since expiration is low, the assumption
* is that the key is hot and that a stampede is worth avoiding.
- * - tempTTL : TTL of the temp key used to cache values while a key is tombstoned.
- * This avoids excessive regeneration of hot keys on delete() but may
- * result in stale values.
+ * Setting this above WANObjectCache::HOLDOFF_TTL makes no difference.
+ * The higher this is set, the higher the worst-case staleness can be.
+ * Use WANObjectCache::TSE_NONE to disable this logic.
+ * [Default: WANObjectCache::TSE_NONE]
* @return mixed Value to use for the key
*/
final public function getWithSetCallback(
$key, $callback, $ttl, array $checkKeys = array(), array $opts = array()
) {
- $lowTTL = isset( $opts['lowTTL'] ) ? $opts['lowTTL'] : min( 10, $ttl );
- $lockTSE = isset( $opts['lockTSE'] ) ? $opts['lockTSE'] : -1;
- $tempTTL = isset( $opts['tempTTL'] ) ? $opts['tempTTL'] : 5;
+ $lowTTL = isset( $opts['lowTTL'] ) ? $opts['lowTTL'] : min( self::LOW_TTL, $ttl );
+ $lockTSE = isset( $opts['lockTSE'] ) ? $opts['lockTSE'] : self::TSE_NONE;
// Get the current key value
$curTTL = null;
return $value;
}
+ // A deleted key with a negative TTL left must be tombstoned
$isTombstone = ( $curTTL !== null && $value === false );
// Assume a key is hot if requested soon after invalidation
$isHot = ( $curTTL !== null && $curTTL <= 0 && abs( $curTTL ) <= $lockTSE );
-
- $locked = false;
- if ( $isHot ) {
+ // Decide whether a single thread should handle regenerations.
+ // This avoids stampedes when $checkKeys are bumped and when preemptive
+ // renegerations take too long. It also reduces regenerations while $key
+ // is tombstoned. This balances cache freshness with avoiding DB load.
+ $useMutex = ( $isHot || ( $isTombstone && $lockTSE > 0 ) );
+
+ $lockAcquired = false;
+ if ( $useMutex ) {
// Acquire a cluster-local non-blocking lock
if ( $this->cache->lock( $key, 0, self::LOCK_TTL ) ) {
// Lock acquired; this thread should update the key
- $locked = true;
+ $lockAcquired = true;
} elseif ( $value !== false ) {
// If it cannot be acquired; then the stale value can be used
return $value;
- }
- }
-
- if ( !$locked && ( $isTombstone || $isHot ) ) {
- // Use the stash value for tombstoned keys to reduce regeneration load.
- // For hot keys, either another thread has the lock or the lock failed;
- // use the stash value from the last thread that regenerated it.
- $value = $this->cache->get( self::STASH_KEY_PREFIX . $key );
- if ( $value !== false ) {
- return $value;
+ } else {
+ // Use the stash value for tombstoned keys to reduce regeneration load.
+ // For hot keys, either another thread has the lock or the lock failed;
+ // use the stash value from the last thread that regenerated it.
+ $value = $this->cache->get( self::STASH_KEY_PREFIX . $key );
+ if ( $value !== false ) {
+ return $value;
+ }
}
}
$value = call_user_func_array( $callback, array( $cValue, &$ttl ) );
// When delete() is called, writes are write-holed by the tombstone,
// so use a special stash key to pass the new value around threads.
- if ( $value !== false && ( $isHot || $isTombstone ) && $ttl >= 0 ) {
+ if ( $useMutex && $value !== false && $ttl >= 0 ) {
+ $tempTTL = max( 1, (int)$lockTSE ); // set() expects seconds
$this->cache->set( self::STASH_KEY_PREFIX . $key, $value, $tempTTL );
}
- if ( $locked ) {
+ if ( $lockAcquired ) {
$this->cache->unlock( $key );
}