Current File : /home/jvzmxxx/wiki/extensions/Flow/includes/Data/Index/FeatureIndex.php
<?php

namespace Flow\Data\Index;

use Flow\Container;
use Flow\Data\BufferedCache;
use Flow\Data\Compactor;
use Flow\Data\Compactor\FeatureCompactor;
use Flow\Data\Compactor\ShallowCompactor;
use Flow\Data\Index;
use Flow\Data\ObjectManager;
use Flow\Data\ObjectMapper;
use Flow\Data\ObjectStorage;
use Flow\Model\UUID;
use FormatJson;
use Flow\Exception\DataModelException;

/**
 * Index objects with equal features($indexedColumns) into the same buckets.
 */
abstract class FeatureIndex implements Index {

	/**
	 * @var BufferedCache
	 */
	protected $cache;

	/**
	 * @var ObjectStorage
	 */
	protected $storage;

	/**
	 * @var ObjectMapper
	 */
	protected $mapper;

	/**
	 * @var string
	 */
	protected $prefix;

	/**
	 * @var Compactor
	 */
	protected $rowCompactor;

	/**
	 * @var string[]
	 */
	protected $indexed;

	/**
	 * @var string[] The indexed columns in alphabetical order. This is
	 *  ordered so that cache keys can be generated in a stable manner.
	 */
	protected $indexedOrdered;

	/**
	 * @var array
	 */
	protected $options;

	/**
	 * {@inheritDoc}
	 */
	abstract public function getLimit();

	/**
	 * @return array The options used for querying self::$storage
	 */
	abstract public function queryOptions();

	/**
	 * @todo this doesn't need to be abstract
	 * @param array $values The current contents of a single feature bucket
	 * @return array $values trimmed to respect self::getLimit()
	 */
	abstract public function limitIndexSize( array $values );

	/**
	 * @todo Could the cache key be passed in instead of $indexed?
	 * @param array $indexed The portion of $row that makes up the cache key
	 * @param array $row A single row of data to add to its related feature bucket
	 */
	abstract protected function addToIndex( array $indexed, array $row );

	/**
	 * @todo Similar, Could the cache key be passed in instead of $indexed?
	 * @param array $indexed The portion of $row that makes up the cache key
	 * @param array $row A single row of data to remove from its related feature bucket
	 */
	abstract protected function removeFromIndex( array $indexed, array $row );

	/**
	 * @param BufferedCache $cache
	 * @param ObjectStorage $storage
	 * @param ObjectMapper $mapper
	 * @param string $prefix Prefix to utilize for all cache keys
	 * @param array $indexedColumns List of columns to index,
	 */
	public function __construct( BufferedCache $cache, ObjectStorage $storage, ObjectMapper $mapper, $prefix, array $indexedColumns ) {
		$this->cache = $cache;
		$this->storage = $storage;
		$this->mapper = $mapper;
		$this->prefix = $prefix;
		$this->rowCompactor = new FeatureCompactor( $indexedColumns );
		$this->indexed = $indexedColumns;
		// sort this and ksort in self::cacheKey to always have cache key
		// fields in same order
		sort( $indexedColumns );
		$this->indexedOrdered = $indexedColumns;
	}

	/**
	 * @return string[] The list of columns to bucket database rows by in
	 *  the same order as provided to the constructor.
	 */
	public function getPrimaryKeyColumns() {
		return $this->indexed;
	}

	/**
	 * {@inheritDoc}
	 */
	public function canAnswer( array $featureColumns, array $options ) {
		sort( $featureColumns );
		if ( $featureColumns !== $this->indexedOrdered ) {
			return false;
		}

		// This can probably be moved to TopKIndex if it's not used
		// by anything else.
		if ( isset( $options['limit'] ) ) {
			$max = $options['limit'];
			if ( isset( $options['offset'] ) ) {
				$max += $options['offset'];
			}
			if ( $max > $this->getLimit() ) {
				return false;
			}
		}
		return true;
	}

	/**
	 * Rows are first sorted based on the first term of the result, then ties
	 * are broken by evaluating the second term and so on.
	 *
	 * @return string[]|false The columns to sort by, or false if no sorting is defined
	 */
	public function getSort() {
		return isset( $this->options['sort'] ) ? $this->options['sort'] : false;
	}

	/**
	 * {@inheritDoc}
	 */
	public function getOrder() {
		if ( isset( $this->options['order'] ) && strtoupper( $this->options['order'] ) === 'ASC' ) {
			return 'ASC';
		} else {
			return 'DESC';
		}
	}

	/**
	 * Delete any feature bucket $object would be contained in from the cache
	 *
	 * @param object $object
	 * @param array $row
	 * @throws DataModelException
	 */
	public function cachePurge( $object, array $row ) {
		$indexed = ObjectManager::splitFromRow( $row, $this->indexed );
		if ( !$indexed ) {
			throw new DataModelException( 'Un-indexable row: ' . FormatJson::encode( $row ), 'process-data' );
		}
		// We don't want to just remove this object from the index, then the index would be incorrect.
		// We want to delete the bucket that contains this object.
		$this->cache->delete( $this->cacheKey( $indexed ) );
	}

	/**
	 * {@inheritDoc}
	 */
	public function onAfterInsert( $object, array $new, array $metadata ) {
		$indexed = ObjectManager::splitFromRow( $new , $this->indexed );
		// is un-indexable a bail-worthy occasion? Probably not but makes debugging easier
		if ( !$indexed ) {
			throw new DataModelException( 'Un-indexable row: ' . FormatJson::encode( $new ), 'process-data' );
		}
		$compacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $new, 'alphadecimal' ) );
		// give implementing index option to create rather than append
		if ( !$this->maybeCreateIndex( $indexed, $new, $compacted ) ) {
			// fall back to append
			$this->addToIndex( $indexed, $compacted );
		}
	}

	/**
	 * {@inheritDoc}
	 */
	public function onAfterUpdate( $object, array $old, array $new, array $metadata ) {
		$oldIndexed = ObjectManager::splitFromRow( $old, $this->indexed );
		$newIndexed = ObjectManager::splitFromRow( $new, $this->indexed );
		if ( !$oldIndexed ) {
			throw new DataModelException( 'Un-indexable row: ' . FormatJson::encode( $oldIndexed ), 'process-data' );
		}
		if ( !$newIndexed ) {
			throw new DataModelException( 'Un-indexable row: ' . FormatJson::encode( $newIndexed ), 'process-data' );
		}
		$oldCompacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $old, 'alphadecimal' ) );
		$newCompacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $new, 'alphadecimal' ) );
		if ( ObjectManager::arrayEquals( $oldIndexed, $newIndexed ) ) {
			if ( ObjectManager::arrayEquals( $oldCompacted, $newCompacted ) ) {
				// Nothing changed in the index
				return;
			}
			// object representation in feature bucket has changed
			$this->replaceInIndex( $oldIndexed, $oldCompacted, $newCompacted );
		} else {
			// object has moved from one feature bucket to another
			$this->removeFromIndex( $oldIndexed, $oldCompacted );
			$this->addToIndex( $newIndexed, $newCompacted );
		}
	}

	/**
	 * {@inheritDoc}
	 */
	public function onAfterRemove( $object, array $old, array $metadata ) {
		$indexed = ObjectManager::splitFromRow( $old, $this->indexed );
		if ( !$indexed ) {
			throw new DataModelException( 'Unindexable row: ' . FormatJson::encode( $old ), 'process-data' );
		}
		$compacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $old, 'alphadecimal' ) );
		$this->removeFromIndex( $indexed, $compacted );
	}

	/**
	 * {@inheritDoc}
	 */
	public function onAfterLoad( $object, array $old ) {
		// nothing to do
	}

	/**
	 * {@inheritDoc}
	 */
	public function onAfterClear() {
		// nothing to do
	}

	/**
	 * {@inheritDoc}
	 */
	public function find( array $attributes, array $options = array() ) {
		$results = $this->findMulti( array( $attributes ), $options );
		return reset( $results );
	}

	/**
	 * {@inheritDoc}
	 */
	public function findMulti( array $queries, array $options = array() ) {
		if ( !$queries ) {
			return array();
		}

		// get cache keys for all queries
		$cacheKeys = $this->getCacheKeys( $queries );

		// retrieve from cache (only query duplicate queries once)
		// $fromCache will be an array containing compacted results as value and
		// cache keys as key
		$fromCache = $this->cache->getMulti( array_unique( $cacheKeys ) );

		// figure out what queries were resolved in cache
		// $keysFromCache will be an array where values are cache keys and keys
		// are the same index as their corresponding $queries
		// (intersect with $cacheKeys to guarantee order)
		$keysFromCache = array_intersect( $cacheKeys, array_keys( $fromCache ) );

		// filter out all queries that have been resolved from cache and fetch
		// them from storage
		// $fromStorage will be an array containing (expanded) results as value
		// and indexes matching $query as key
		$storageQueries = array_diff_key( $queries, $keysFromCache );
		$fromStorage = array();
		if ( $storageQueries ) {
			$fromStorage = $this->backingStoreFindMulti( $storageQueries );
		}

		$results = $fromStorage;

		// $queries may have had duplicates that we've ignored to minimize
		// cache requests - now re-duplicate values from cache & match the
		// results against their respective original keys in $queries
		foreach ( $keysFromCache as $index => $cacheKey ) {
			$results[$index] = $fromCache[$cacheKey];
		}

		// now that we have all data, both from cache & backing storage, filter
		// out all data we don't need
		$results = $this->filterResults( $results, $options );

		// if we have no data from cache, there's nothing left - quit early
		if ( !$fromCache ) {
			return $results;
		}

		// because we may have combined data from 2 different sources, chances
		// are the order of the data is no longer in sync with the order
		// $queries were in - fix that by replacing $queries values with
		// the corresponding $results value
		// note that there may be missing results, hence the intersect ;)
		$order = array_intersect_key( $queries, $results );
		$results = array_replace( $order, $results );

		$keyToQuery = array();
		foreach ( $keysFromCache as $index => $key ) {
			// all redundant data has been stripped, now expand all cache values
			// (we're only doing this now to avoid expanding redundant data)
			$fromCache[$key] = $results[$index];

			// to expand rows, we'll need the $query info mapped to the cache
			// key instead of the $query index
			if ( !isset( $keyToQuery[$key] ) ) {
				$keyToQuery[$key] = $queries[$index];
				$keyToQuery[$key] = UUID::convertUUIDs( $keyToQuery[$key], 'alphadecimal' );
			}
		}

		// expand and replace the stubs in $results with complete data
		$fromCache = $this->rowCompactor->expandCacheResult( $fromCache, $keyToQuery );
		foreach ( $keysFromCache as $index => $cacheKey ) {
			$results[$index] = $fromCache[$cacheKey];
		}

		return $results;
	}

	/**
	 * Get rid of unneeded, according to the given $options.
	 *
	 * This is used to strip entries before expanding them;
	 * basically, at that point, we may only have a list of ids, which we need
	 * to expand (= fetch from cache) - don't want to do this for more than
	 * what is needed
	 *
	 * @param array $results
	 * @param array[optional] $options
	 * @return array
	 */
	protected function filterResults( array $results, array $options = array() ) {
		// Overriden in TopKIndex
		return $results;
	}

	/**
	 * Returns a boolean true/false if the find()-operation for the given
	 * attributes has already been resolves and doesn't need to query any
	 * outside cache/database.
	 * Determining if a find() has not yet been resolved may be useful so that
	 * additional data may be loaded at once.
	 *
	 * @param array $attributes Attributes to find()
	 * @param array[optional] $options Options to find()
	 * @return bool
	 */
	public function found( array $attributes, array $options = array() ) {
		return $this->foundMulti( array( $attributes ), $options );
	}

	/**
	 * Returns a boolean true/false if the findMulti()-operation for the given
	 * attributes has already been resolves and doesn't need to query any
	 * outside cache/database.
	 * Determining if a find() has not yet been resolved may be useful so that
	 * additional data may be loaded at once.
	 *
	 * @param array $queries Queries to findMulti()
	 * @param array[optional] $options Options to findMulti()
	 * @return bool
	 */
	public function foundMulti( array $queries, array $options = array() ) {
		if ( !$queries ) {
			return true;
		}

		// get cache keys for all queries
		$cacheKeys = $this->getCacheKeys( $queries );

		// check if cache has a way of identifying what's stored locally
		if ( !method_exists( $this->cache, 'has' ) ) {
			return false;
		}

		// check if keys matching given queries are already known in local cache
		foreach ( $cacheKeys as $key ) {
			if ( !$this->cache->has( $key ) ) {
				return false;
			}
		}

		$keyToQuery = array();
		foreach ( $cacheKeys as $i => $key ) {
			// These results will be merged into the query results, and as such need binary
			// uuid's as would be received from storage
			if ( !isset( $keyToQuery[$key] ) ) {
				$keyToQuery[$key] = $queries[$i];
			}
		}

		// retrieve from cache - this is cheap, it's is local storage
		$cached = $this->cache->getMulti( $cacheKeys );
		foreach ( $cached as $i => $result ) {
			$limit = isset( $options['limit'] ) ? $options['limit'] : $this->getLimit();
			$cached[$i] = array_splice( $result, 0, $limit );
		}

		// if we have a shallow compactor, the returned data are PKs of objects
		// that need to be fetched too
		if ( $this->rowCompactor instanceof ShallowCompactor ) {
			// test of the keys to be expanded are already in local cache
			$duplicator = $this->rowCompactor->getResultDuplicator( $cached, $keyToQuery );
			$queries = $duplicator->getUniqueQueries();
			if ( !$this->rowCompactor->getShallow()->foundMulti( $queries ) ) {
				return false;
			}
		}

		return true;
	}

	/**
	 * Build a map from cache key to its index in $queries.
	 *
	 * @param array $queries
	 * @return array Array of [query index => cache key]
	 * @throws DataModelException
	 */
	protected function getCacheKeys( $queries ) {
		$idxToKey = array();
		foreach ( $queries as $idx => $query ) {
			ksort( $query );
			if ( array_keys( $query ) !== $this->indexedOrdered ) {
				throw new DataModelException(
					'Cannot answer query for columns: ' . implode( ', ', array_keys( $queries[$idx] ) ), 'process-data'
				);
			}
			$key = $this->cacheKey( $query );
			$idxToKey[$idx] = $key;
		}

		return $idxToKey;
	}

	/**
	 * Query persistent storage for data not found in cache.  Note that this
	 * does not use the query options because an individual bucket contents is
	 * based on constructor options, and not query options.  Query options merely
	 * change what part of the bucket is returned(or if the query has to fail over
	 * to direct from storage due to being beyond the set of cached values).
	 *
	 * @param array $queries
	 * @return array
	 */
	protected function backingStoreFindMulti( array $queries ) {
		// query backing store
		$options = $this->queryOptions();
		$stored = $this->storage->findMulti( $queries, $options );
		$results = array();

		// map store results to cache key
		foreach ( $stored as $idx => $rows ) {
			if ( !$rows ) {
				// Nothing found,  should we cache failures as well as success?
				continue;
			}
			$results[$idx] = $rows;
			unset( $queries[$idx] );
		}

		if ( count( $queries ) !== 0 ) {
			// Log something about not finding everything?
		}

		return $results;
	}

	/**
	 * Called prior to self::addToIndex only when new objects as inserted.  Gives the
	 * opportunity for indexes to create rather than append if this object signifies a new
	 * feature list.
	 *
	 * @todo again, could just pass cache key instead of $indexed?
	 * @param array $indexed The values that make up the cache key
	 * @param array $sourceRow The input database row
	 * @param array $compacted The database row reduced in size for storage within the index
	 * @return boolean True if an index was created, or false if $sourceRow should be merged
	 *  into the index via self::addToIndex
	 */
	protected function maybeCreateIndex( array $indexed, array $sourceRow, array $compacted ) {
		return false;
	}

	/**
	 * Called to update a row's data within a feature bucket.
	 *
	 * Note that this naive implementation does two round trips, likely an implementing
	 * class can do this in a single round trip.
	 *
	 * @todo again, could just pass cache key instead of $indexed?
	 * @param array $indexed The values that make up the cache key
	 * @param array $old The database row that was previously retrieved from cache
	 * @param array $new The new version of that replacement row
	 */
	protected function replaceInIndex( array $indexed, array $old, array $new ) {
		$this->removeFromIndex( $indexed, $old );
		$this->addToIndex( $indexed, $new );
	}

	/**
	 * Generate the cache key representing the attributes
	 * @param array $attributes
	 * @return string
	 */
	protected function cacheKey( array $attributes ) {
		foreach( $attributes as $key => $attr ) {
			if ( $attr instanceof UUID ) {
				$attributes[$key] = $attr->getAlphadecimal();
			} elseif ( strlen( $attr ) === UUID::BIN_LEN && substr( $key, -3 ) === '_id' ) {
				$attributes[$key] = UUID::create( $attr )->getAlphadecimal();
			}
		}

		// values in $attributes may not always be in the exact same order,
		// which would lead to differences in cache key if we don't force that
		ksort( $attributes );

		return wfForeignMemcKey( self::cachedDbId(), '', $this->prefix, md5( implode( ':', $attributes ) ), Container::get( 'cache.version' ) );
	}

	/**
	 * @return string The id of the database being cached
	 */
	static public function cachedDbId() {
		global $wgFlowDefaultWikiDb;
		if ( $wgFlowDefaultWikiDb === false ) {
			return wfWikiID();
		} else {
			return $wgFlowDefaultWikiDb;
		}
	}
}