[Customisation Database Commits] r1783 [25/63] - in /branches/stable: language/en/acp/ language/en/mods/ language/pt_br/ language/pt_br/acp/ language/pt_br/mods/ titania/ titania/authors/ titania/contributions/ titania/docs/ titania/download/ titania/download/modx/ titania/files/contrib_temp/ titania/files/modx_files/ titania/images/ titania/includes/ titania/includes/core/ titania/includes/hooks/ titania/includes/library/Zend/ titania/includes/library/Zend/Search/ titania/includes/library/Zend/Search/Lucene/ titania/includes/library/Zend/Search/Lucene/Analysis/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/TextNum/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8/ titania/includes/library/Zend/Search/Lucene/Analysis/Analyzer/Common/Utf8Num/ titania/includes/library/Zend/Search/Lucene/Analysis/TokenFilter/ titania/includes/library/Zend/Search/Lucene/Document/ titania/includes/library/Zend/Search/Lucene/Index/ titania/includes/library/Zend/Search/Lucene/Index/SegmentWriter/ titania/includes/library/Zend/Search/Lucene/Index/TermsStream/ titania/includes/library/Zend/Search/Lucene/Search/ titania/includes/library/Zend/Search/Lucene/Search/Highlighter/ titania/includes/library/Zend/Search/Lucene/Search/Query/ titania/includes/library/Zend/Search/Lucene/Search/Query/Preprocessing/ titania/includes/library/Zend/Search/Lucene/Search/QueryEntry/ titania/includes/library/Zend/Search/Lucene/Search/Similarity/ titania/includes/library/Zend/Search/Lucene/Search/Weight/ titania/includes/library/Zend/Search/Lucene/Storage/ titania/includes/library/Zend/Search/Lucene/Storage/Directory/ titania/includes/library/Zend/Search/Lucene/Storage/File/ titania/includes/library/automod/ titania/includes/library/ezcomponents/ titania/includes/library/ezcomponents/Base/ titania/includes/library/ezcomponents/Base/interfaces/ titania/includes/library/translations/ titania/includes/manage_tools/ titania/includes/objects/ titania/includes/overlords/ titania/includes/tools/ titania/includes/types/ titania/js/ titania/language/ titania/language/en/ titania/language/en/email/ titania/language/en/manage_tools/ titania/language/en/types/ titania/language/pt_br/ titania/language/pt_br/email/ titania/language/pt_br/manage_tools/ titania/language/pt_br/types/ titania/manage/ titania/store/ titania/store/phpbb_packages/ titania/store/phpbb_packages/extracted/ titania/store/search/ titania/styles/default/template/ titania/styles/default/template/authors/ titania/styles/default/template/common/ titania/styles/default/template/contributions/ titania/styles/default/template/manage/ titania/styles/default/template/posting/ titania/styles/default/template/posting/attachments/ titania/styles/default/template/posting/panels/ titania/styles/default/theme/ titania/styles/default/theme/en-gb/ titania/styles/prosilver/template/ titania/styles/prosilver/template/authors/ titania/styles/prosilver/template/common/ titania/styles/prosilver/theme/ titania/styles/prosilver/theme/en-gb/ umil/ umil/error_files/ umil/language/en/ umil/style/
Nathan Guse
exreaction at phpbb.com
Sat Nov 20 18:23:23 GMT 2010
Modified: branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Fuzzy.php
==============================================================================
*** branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Fuzzy.php (original)
--- branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Fuzzy.php Sat Nov 20 18:23:22 2010
***************
*** 1,493 ****
! <?php
! /**
! * Zend Framework
! *
! * LICENSE
! *
! * This source file is subject to the new BSD license that is bundled
! * with this package in the file LICENSE.txt.
! * It is also available through the world-wide-web at this URL:
! * http://framework.zend.com/license/new-bsd
! * If you did not receive a copy of the license and are unable to
! * obtain it through the world-wide-web, please send an email
! * to license at zend.com so we can send you a copy immediately.
! *
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! * @version $Id$
! */
!
!
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
!
!
! /**
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! */
! class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Query
! {
! /** Default minimum similarity */
! const DEFAULT_MIN_SIMILARITY = 0.5;
!
! /**
! * Maximum number of matched terms.
! * Apache Lucene defines this limitation as boolean query maximum number of clauses:
! * org.apache.lucene.search.BooleanQuery.getMaxClauseCount()
! */
! const MAX_CLAUSE_COUNT = 1024;
!
! /**
! * Array of precalculated max distances
! *
! * keys are integers representing a word size
! */
! private $_maxDistances = array();
!
! /**
! * Base searching term.
! *
! * @var Zend_Search_Lucene_Index_Term
! */
! private $_term;
!
! /**
! * A value between 0 and 1 to set the required similarity
! * between the query term and the matching terms. For example, for a
! * _minimumSimilarity of 0.5 a term of the same length
! * as the query term is considered similar to the query term if the edit distance
! * between both terms is less than length(term)*0.5
! *
! * @var float
! */
! private $_minimumSimilarity;
!
! /**
! * The length of common (non-fuzzy) prefix
! *
! * @var integer
! */
! private $_prefixLength;
!
! /**
! * Matched terms.
! *
! * Matched terms list.
! * It's filled during the search (rewrite operation) and may be used for search result
! * post-processing
! *
! * Array of Zend_Search_Lucene_Index_Term objects
! *
! * @var array
! */
! private $_matches = null;
!
! /**
! * Matched terms scores
! *
! * @var array
! */
! private $_scores = null;
!
! /**
! * Array of the term keys.
! * Used to sort terms in alphabetical order if terms have the same socres
! *
! * @var array
! */
! private $_termKeys = null;
!
! /**
! * Default non-fuzzy prefix length
! *
! * @var integer
! */
! private static $_defaultPrefixLength = 3;
!
! /**
! * Zend_Search_Lucene_Search_Query_Wildcard constructor.
! *
! * @param Zend_Search_Lucene_Index_Term $term
! * @param float $minimumSimilarity
! * @param integer $prefixLength
! * @throws Zend_Search_Lucene_Exception
! */
! public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = null)
! {
! if ($minimumSimilarity < 0) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
! }
! if ($minimumSimilarity >= 1) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
! }
! if ($prefixLength < 0) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
! }
!
! $this->_term = $term;
! $this->_minimumSimilarity = $minimumSimilarity;
! $this->_prefixLength = ($prefixLength !== null)? $prefixLength : self::$_defaultPrefixLength;
! }
!
! /**
! * Get default non-fuzzy prefix length
! *
! * @return integer
! */
! public static function getDefaultPrefixLength()
! {
! return self::$_defaultPrefixLength;
! }
!
! /**
! * Set default non-fuzzy prefix length
! *
! * @param integer $defaultPrefixLength
! */
! public static function setDefaultPrefixLength($defaultPrefixLength)
! {
! self::$_defaultPrefixLength = $defaultPrefixLength;
! }
!
! /**
! * Calculate maximum distance for specified word length
! *
! * @param integer $prefixLength
! * @param integer $termLength
! * @param integer $length
! * @return integer
! */
! private function _calculateMaxDistance($prefixLength, $termLength, $length)
! {
! $this->_maxDistances[$length] = (int) ((1 - $this->_minimumSimilarity)*(min($termLength, $length) + $prefixLength));
! return $this->_maxDistances[$length];
! }
!
! /**
! * Re-write query into primitive queries in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! * @throws Zend_Search_Lucene_Exception
! */
! public function rewrite(Zend_Search_Lucene_Interface $index)
! {
! $this->_matches = array();
! $this->_scores = array();
! $this->_termKeys = array();
!
! if ($this->_term->field === null) {
! // Search through all fields
! $fields = $index->getFieldNames(true /* indexed fields list */);
! } else {
! $fields = array($this->_term->field);
! }
!
! require_once 'Zend/Search/Lucene/Index/Term.php';
! $prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
! $prefixByteLength = strlen($prefix);
! $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
!
! $termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
!
! $termRest = substr($this->_term->text, $prefixByteLength);
! // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
! $termRestLength = strlen($termRest);
!
! $scaleFactor = 1/(1 - $this->_minimumSimilarity);
!
! require_once 'Zend/Search/Lucene.php';
! $maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
! foreach ($fields as $field) {
! $index->resetTermsStream();
!
! require_once 'Zend/Search/Lucene/Index/Term.php';
! if ($prefix != '') {
! $index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
!
! while ($index->currentTerm() !== null &&
! $index->currentTerm()->field == $field &&
! substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) {
! // Calculate similarity
! $target = substr($index->currentTerm()->text, $prefixByteLength);
!
! $maxDistance = isset($this->_maxDistances[strlen($target)])?
! $this->_maxDistances[strlen($target)] :
! $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
!
! if ($termRestLength == 0) {
! // we don't have anything to compare. That means if we just add
! // the letters for current term we get the new word
! $similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
! } else if (strlen($target) == 0) {
! $similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
! } else if ($maxDistance < abs($termRestLength - strlen($target))){
! //just adding the characters of term to target or vice-versa results in too many edits
! //for example "pre" length is 3 and "prefixes" length is 8. We can see that
! //given this optimal circumstance, the edit distance cannot be less than 5.
! //which is 8-3 or more precisesly abs(3-8).
! //if our maximum edit distance is 4, then we can discard this word
! //without looking at it.
! $similarity = 0;
! } else {
! $similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
! }
!
! if ($similarity > $this->_minimumSimilarity) {
! $this->_matches[] = $index->currentTerm();
! $this->_termKeys[] = $index->currentTerm()->key();
! $this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
!
! if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
! }
! }
!
! $index->nextTerm();
! }
! } else {
! $index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
!
! while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
! // Calculate similarity
! $target = $index->currentTerm()->text;
!
! $maxDistance = isset($this->_maxDistances[strlen($target)])?
! $this->_maxDistances[strlen($target)] :
! $this->_calculateMaxDistance(0, $termRestLength, strlen($target));
!
! if ($maxDistance < abs($termRestLength - strlen($target))){
! //just adding the characters of term to target or vice-versa results in too many edits
! //for example "pre" length is 3 and "prefixes" length is 8. We can see that
! //given this optimal circumstance, the edit distance cannot be less than 5.
! //which is 8-3 or more precisesly abs(3-8).
! //if our maximum edit distance is 4, then we can discard this word
! //without looking at it.
! $similarity = 0;
! } else {
! $similarity = 1 - levenshtein($termRest, $target)/min($termRestLength, strlen($target));
! }
!
! if ($similarity > $this->_minimumSimilarity) {
! $this->_matches[] = $index->currentTerm();
! $this->_termKeys[] = $index->currentTerm()->key();
! $this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
!
! if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
! }
! }
!
! $index->nextTerm();
! }
! }
!
! $index->closeTermsStream();
! }
!
! if (count($this->_matches) == 0) {
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! } else if (count($this->_matches) == 1) {
! require_once 'Zend/Search/Lucene/Search/Query/Term.php';
! return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
! } else {
! require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
! $rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean();
!
! array_multisort($this->_scores, SORT_DESC, SORT_NUMERIC,
! $this->_termKeys, SORT_ASC, SORT_STRING,
! $this->_matches);
!
! $termCount = 0;
! require_once 'Zend/Search/Lucene/Search/Query/Term.php';
! foreach ($this->_matches as $id => $matchedTerm) {
! $subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm);
! $subquery->setBoost($this->_scores[$id]);
!
! $rewrittenQuery->addSubquery($subquery);
!
! $termCount++;
! if ($termCount >= self::MAX_CLAUSE_COUNT) {
! break;
! }
! }
!
! return $rewrittenQuery;
! }
! }
!
! /**
! * Optimize query in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function optimize(Zend_Search_Lucene_Interface $index)
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
! /**
! * Return query terms
! *
! * @return array
! * @throws Zend_Search_Lucene_Exception
! */
! public function getQueryTerms()
! {
! if ($this->_matches === null) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
! }
!
! return $this->_matches;
! }
!
! /**
! * Constructs an appropriate Weight implementation for this query.
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @return Zend_Search_Lucene_Search_Weight
! * @throws Zend_Search_Lucene_Exception
! */
! public function createWeight(Zend_Search_Lucene_Interface $reader)
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
!
! /**
! * Execute query in context of index reader
! * It also initializes necessary internal structures
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
! * @throws Zend_Search_Lucene_Exception
! */
! public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
! /**
! * Get document ids likely matching the query
! *
! * It's an array with document ids as keys (performance considerations)
! *
! * @return array
! * @throws Zend_Search_Lucene_Exception
! */
! public function matchedDocs()
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
! /**
! * Score specified document
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! * @throws Zend_Search_Lucene_Exception
! */
! public function score($docId, Zend_Search_Lucene_Interface $reader)
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
! /**
! * Query specific matches highlighting
! *
! * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
! */
! protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
! {
! $words = array();
!
! require_once 'Zend/Search/Lucene/Index/Term.php';
! $prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
! $prefixByteLength = strlen($prefix);
! $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
!
! $termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
!
! $termRest = substr($this->_term->text, $prefixByteLength);
! // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
! $termRestLength = strlen($termRest);
!
! $scaleFactor = 1/(1 - $this->_minimumSimilarity);
!
! $docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
! require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
! $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
! foreach ($tokens as $token) {
! $termText = $token->getTermText();
!
! if (substr($termText, 0, $prefixByteLength) == $prefix) {
! // Calculate similarity
! $target = substr($termText, $prefixByteLength);
!
! $maxDistance = isset($this->_maxDistances[strlen($target)])?
! $this->_maxDistances[strlen($target)] :
! $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
!
! if ($termRestLength == 0) {
! // we don't have anything to compare. That means if we just add
! // the letters for current term we get the new word
! $similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
! } else if (strlen($target) == 0) {
! $similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
! } else if ($maxDistance < abs($termRestLength - strlen($target))){
! //just adding the characters of term to target or vice-versa results in too many edits
! //for example "pre" length is 3 and "prefixes" length is 8. We can see that
! //given this optimal circumstance, the edit distance cannot be less than 5.
! //which is 8-3 or more precisesly abs(3-8).
! //if our maximum edit distance is 4, then we can discard this word
! //without looking at it.
! $similarity = 0;
! } else {
! $similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
! }
!
! if ($similarity > $this->_minimumSimilarity) {
! $words[] = $termText;
! }
! }
! }
!
! $highlighter->highlight($words);
! }
!
! /**
! * Print a query
! *
! * @return string
! */
! public function __toString()
! {
! // It's used only for query visualisation, so we don't care about characters escaping
! return (($this->_term->field === null)? '' : $this->_term->field . ':')
! . $this->_term->text . '~'
! . (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '')
! . (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
! }
! }
!
--- 1,493 ----
! <?php
! /**
! * Zend Framework
! *
! * LICENSE
! *
! * This source file is subject to the new BSD license that is bundled
! * with this package in the file LICENSE.txt.
! * It is also available through the world-wide-web at this URL:
! * http://framework.zend.com/license/new-bsd
! * If you did not receive a copy of the license and are unable to
! * obtain it through the world-wide-web, please send an email
! * to license at zend.com so we can send you a copy immediately.
! *
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! * @version $Id$
! */
!
!
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
!
!
! /**
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! */
! class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Query
! {
! /** Default minimum similarity */
! const DEFAULT_MIN_SIMILARITY = 0.5;
!
! /**
! * Maximum number of matched terms.
! * Apache Lucene defines this limitation as boolean query maximum number of clauses:
! * org.apache.lucene.search.BooleanQuery.getMaxClauseCount()
! */
! const MAX_CLAUSE_COUNT = 1024;
!
! /**
! * Array of precalculated max distances
! *
! * keys are integers representing a word size
! */
! private $_maxDistances = array();
!
! /**
! * Base searching term.
! *
! * @var Zend_Search_Lucene_Index_Term
! */
! private $_term;
!
! /**
! * A value between 0 and 1 to set the required similarity
! * between the query term and the matching terms. For example, for a
! * _minimumSimilarity of 0.5 a term of the same length
! * as the query term is considered similar to the query term if the edit distance
! * between both terms is less than length(term)*0.5
! *
! * @var float
! */
! private $_minimumSimilarity;
!
! /**
! * The length of common (non-fuzzy) prefix
! *
! * @var integer
! */
! private $_prefixLength;
!
! /**
! * Matched terms.
! *
! * Matched terms list.
! * It's filled during the search (rewrite operation) and may be used for search result
! * post-processing
! *
! * Array of Zend_Search_Lucene_Index_Term objects
! *
! * @var array
! */
! private $_matches = null;
!
! /**
! * Matched terms scores
! *
! * @var array
! */
! private $_scores = null;
!
! /**
! * Array of the term keys.
! * Used to sort terms in alphabetical order if terms have the same socres
! *
! * @var array
! */
! private $_termKeys = null;
!
! /**
! * Default non-fuzzy prefix length
! *
! * @var integer
! */
! private static $_defaultPrefixLength = 3;
!
! /**
! * Zend_Search_Lucene_Search_Query_Wildcard constructor.
! *
! * @param Zend_Search_Lucene_Index_Term $term
! * @param float $minimumSimilarity
! * @param integer $prefixLength
! * @throws Zend_Search_Lucene_Exception
! */
! public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = null)
! {
! if ($minimumSimilarity < 0) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
! }
! if ($minimumSimilarity >= 1) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
! }
! if ($prefixLength < 0) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
! }
!
! $this->_term = $term;
! $this->_minimumSimilarity = $minimumSimilarity;
! $this->_prefixLength = ($prefixLength !== null)? $prefixLength : self::$_defaultPrefixLength;
! }
!
! /**
! * Get default non-fuzzy prefix length
! *
! * @return integer
! */
! public static function getDefaultPrefixLength()
! {
! return self::$_defaultPrefixLength;
! }
!
! /**
! * Set default non-fuzzy prefix length
! *
! * @param integer $defaultPrefixLength
! */
! public static function setDefaultPrefixLength($defaultPrefixLength)
! {
! self::$_defaultPrefixLength = $defaultPrefixLength;
! }
!
! /**
! * Calculate maximum distance for specified word length
! *
! * @param integer $prefixLength
! * @param integer $termLength
! * @param integer $length
! * @return integer
! */
! private function _calculateMaxDistance($prefixLength, $termLength, $length)
! {
! $this->_maxDistances[$length] = (int) ((1 - $this->_minimumSimilarity)*(min($termLength, $length) + $prefixLength));
! return $this->_maxDistances[$length];
! }
!
! /**
! * Re-write query into primitive queries in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! * @throws Zend_Search_Lucene_Exception
! */
! public function rewrite(Zend_Search_Lucene_Interface $index)
! {
! $this->_matches = array();
! $this->_scores = array();
! $this->_termKeys = array();
!
! if ($this->_term->field === null) {
! // Search through all fields
! $fields = $index->getFieldNames(true /* indexed fields list */);
! } else {
! $fields = array($this->_term->field);
! }
!
! require_once 'Zend/Search/Lucene/Index/Term.php';
! $prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
! $prefixByteLength = strlen($prefix);
! $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
!
! $termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
!
! $termRest = substr($this->_term->text, $prefixByteLength);
! // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
! $termRestLength = strlen($termRest);
!
! $scaleFactor = 1/(1 - $this->_minimumSimilarity);
!
! require_once 'Zend/Search/Lucene.php';
! $maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
! foreach ($fields as $field) {
! $index->resetTermsStream();
!
! require_once 'Zend/Search/Lucene/Index/Term.php';
! if ($prefix != '') {
! $index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
!
! while ($index->currentTerm() !== null &&
! $index->currentTerm()->field == $field &&
! substr($index->currentTerm()->text, 0, $prefixByteLength) == $prefix) {
! // Calculate similarity
! $target = substr($index->currentTerm()->text, $prefixByteLength);
!
! $maxDistance = isset($this->_maxDistances[strlen($target)])?
! $this->_maxDistances[strlen($target)] :
! $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
!
! if ($termRestLength == 0) {
! // we don't have anything to compare. That means if we just add
! // the letters for current term we get the new word
! $similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
! } else if (strlen($target) == 0) {
! $similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
! } else if ($maxDistance < abs($termRestLength - strlen($target))){
! //just adding the characters of term to target or vice-versa results in too many edits
! //for example "pre" length is 3 and "prefixes" length is 8. We can see that
! //given this optimal circumstance, the edit distance cannot be less than 5.
! //which is 8-3 or more precisesly abs(3-8).
! //if our maximum edit distance is 4, then we can discard this word
! //without looking at it.
! $similarity = 0;
! } else {
! $similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
! }
!
! if ($similarity > $this->_minimumSimilarity) {
! $this->_matches[] = $index->currentTerm();
! $this->_termKeys[] = $index->currentTerm()->key();
! $this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
!
! if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
! }
! }
!
! $index->nextTerm();
! }
! } else {
! $index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
!
! while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
! // Calculate similarity
! $target = $index->currentTerm()->text;
!
! $maxDistance = isset($this->_maxDistances[strlen($target)])?
! $this->_maxDistances[strlen($target)] :
! $this->_calculateMaxDistance(0, $termRestLength, strlen($target));
!
! if ($maxDistance < abs($termRestLength - strlen($target))){
! //just adding the characters of term to target or vice-versa results in too many edits
! //for example "pre" length is 3 and "prefixes" length is 8. We can see that
! //given this optimal circumstance, the edit distance cannot be less than 5.
! //which is 8-3 or more precisesly abs(3-8).
! //if our maximum edit distance is 4, then we can discard this word
! //without looking at it.
! $similarity = 0;
! } else {
! $similarity = 1 - levenshtein($termRest, $target)/min($termRestLength, strlen($target));
! }
!
! if ($similarity > $this->_minimumSimilarity) {
! $this->_matches[] = $index->currentTerm();
! $this->_termKeys[] = $index->currentTerm()->key();
! $this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
!
! if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
! }
! }
!
! $index->nextTerm();
! }
! }
!
! $index->closeTermsStream();
! }
!
! if (count($this->_matches) == 0) {
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! } else if (count($this->_matches) == 1) {
! require_once 'Zend/Search/Lucene/Search/Query/Term.php';
! return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
! } else {
! require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
! $rewrittenQuery = new Zend_Search_Lucene_Search_Query_Boolean();
!
! array_multisort($this->_scores, SORT_DESC, SORT_NUMERIC,
! $this->_termKeys, SORT_ASC, SORT_STRING,
! $this->_matches);
!
! $termCount = 0;
! require_once 'Zend/Search/Lucene/Search/Query/Term.php';
! foreach ($this->_matches as $id => $matchedTerm) {
! $subquery = new Zend_Search_Lucene_Search_Query_Term($matchedTerm);
! $subquery->setBoost($this->_scores[$id]);
!
! $rewrittenQuery->addSubquery($subquery);
!
! $termCount++;
! if ($termCount >= self::MAX_CLAUSE_COUNT) {
! break;
! }
! }
!
! return $rewrittenQuery;
! }
! }
!
! /**
! * Optimize query in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function optimize(Zend_Search_Lucene_Interface $index)
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
! /**
! * Return query terms
! *
! * @return array
! * @throws Zend_Search_Lucene_Exception
! */
! public function getQueryTerms()
! {
! if ($this->_matches === null) {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
! }
!
! return $this->_matches;
! }
!
! /**
! * Constructs an appropriate Weight implementation for this query.
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @return Zend_Search_Lucene_Search_Weight
! * @throws Zend_Search_Lucene_Exception
! */
! public function createWeight(Zend_Search_Lucene_Interface $reader)
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
!
! /**
! * Execute query in context of index reader
! * It also initializes necessary internal structures
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
! * @throws Zend_Search_Lucene_Exception
! */
! public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
! /**
! * Get document ids likely matching the query
! *
! * It's an array with document ids as keys (performance considerations)
! *
! * @return array
! * @throws Zend_Search_Lucene_Exception
! */
! public function matchedDocs()
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
! /**
! * Score specified document
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! * @throws Zend_Search_Lucene_Exception
! */
! public function score($docId, Zend_Search_Lucene_Interface $reader)
! {
! require_once 'Zend/Search/Lucene/Exception.php';
! throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
! }
!
! /**
! * Query specific matches highlighting
! *
! * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
! */
! protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
! {
! $words = array();
!
! require_once 'Zend/Search/Lucene/Index/Term.php';
! $prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
! $prefixByteLength = strlen($prefix);
! $prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
!
! $termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
!
! $termRest = substr($this->_term->text, $prefixByteLength);
! // we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
! $termRestLength = strlen($termRest);
!
! $scaleFactor = 1/(1 - $this->_minimumSimilarity);
!
! $docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
! require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
! $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
! foreach ($tokens as $token) {
! $termText = $token->getTermText();
!
! if (substr($termText, 0, $prefixByteLength) == $prefix) {
! // Calculate similarity
! $target = substr($termText, $prefixByteLength);
!
! $maxDistance = isset($this->_maxDistances[strlen($target)])?
! $this->_maxDistances[strlen($target)] :
! $this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
!
! if ($termRestLength == 0) {
! // we don't have anything to compare. That means if we just add
! // the letters for current term we get the new word
! $similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
! } else if (strlen($target) == 0) {
! $similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
! } else if ($maxDistance < abs($termRestLength - strlen($target))){
! //just adding the characters of term to target or vice-versa results in too many edits
! //for example "pre" length is 3 and "prefixes" length is 8. We can see that
! //given this optimal circumstance, the edit distance cannot be less than 5.
! //which is 8-3 or more precisesly abs(3-8).
! //if our maximum edit distance is 4, then we can discard this word
! //without looking at it.
! $similarity = 0;
! } else {
! $similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
! }
!
! if ($similarity > $this->_minimumSimilarity) {
! $words[] = $termText;
! }
! }
! }
!
! $highlighter->highlight($words);
! }
!
! /**
! * Print a query
! *
! * @return string
! */
! public function __toString()
! {
! // It's used only for query visualisation, so we don't care about characters escaping
! return (($this->_term->field === null)? '' : $this->_term->field . ':')
! . $this->_term->text . '~'
! . (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '')
! . (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
! }
! }
!
Modified: branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Insignificant.php
==============================================================================
*** branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Insignificant.php (original)
--- branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/Insignificant.php Sat Nov 20 18:23:22 2010
***************
*** 1,139 ****
! <?php
! /**
! * Zend Framework
! *
! * LICENSE
! *
! * This source file is subject to the new BSD license that is bundled
! * with this package in the file LICENSE.txt.
! * It is also available through the world-wide-web at this URL:
! * http://framework.zend.com/license/new-bsd
! * If you did not receive a copy of the license and are unable to
! * obtain it through the world-wide-web, please send an email
! * to license at zend.com so we can send you a copy immediately.
! *
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! * @version $Id$
! */
!
!
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
!
!
! /**
! * The insignificant query returns empty result, but doesn't limit result set as a part of other queries
! *
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! */
! class Zend_Search_Lucene_Search_Query_Insignificant extends Zend_Search_Lucene_Search_Query
! {
! /**
! * Re-write query into primitive queries in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function rewrite(Zend_Search_Lucene_Interface $index)
! {
! return $this;
! }
!
! /**
! * Optimize query in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function optimize(Zend_Search_Lucene_Interface $index)
! {
! return $this;
! }
!
! /**
! * Constructs an appropriate Weight implementation for this query.
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @return Zend_Search_Lucene_Search_Weight
! */
! public function createWeight(Zend_Search_Lucene_Interface $reader)
! {
! require_once 'Zend/Search/Lucene/Search/Weight/Empty.php';
! return new Zend_Search_Lucene_Search_Weight_Empty();
! }
!
! /**
! * Execute query in context of index reader
! * It also initializes necessary internal structures
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
! */
! public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
! {
! // Do nothing
! }
!
! /**
! * Get document ids likely matching the query
! *
! * It's an array with document ids as keys (performance considerations)
! *
! * @return array
! */
! public function matchedDocs()
! {
! return array();
! }
!
! /**
! * Score specified document
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! */
! public function score($docId, Zend_Search_Lucene_Interface $reader)
! {
! return 0;
! }
!
! /**
! * Return query terms
! *
! * @return array
! */
! public function getQueryTerms()
! {
! return array();
! }
!
! /**
! * Query specific matches highlighting
! *
! * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
! */
! protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
! {
! // Do nothing
! }
!
! /**
! * Print a query
! *
! * @return string
! */
! public function __toString()
! {
! return '<InsignificantQuery>';
! }
! }
!
--- 1,139 ----
! <?php
! /**
! * Zend Framework
! *
! * LICENSE
! *
! * This source file is subject to the new BSD license that is bundled
! * with this package in the file LICENSE.txt.
! * It is also available through the world-wide-web at this URL:
! * http://framework.zend.com/license/new-bsd
! * If you did not receive a copy of the license and are unable to
! * obtain it through the world-wide-web, please send an email
! * to license at zend.com so we can send you a copy immediately.
! *
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! * @version $Id$
! */
!
!
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
!
!
! /**
! * The insignificant query returns empty result, but doesn't limit result set as a part of other queries
! *
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! */
! class Zend_Search_Lucene_Search_Query_Insignificant extends Zend_Search_Lucene_Search_Query
! {
! /**
! * Re-write query into primitive queries in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function rewrite(Zend_Search_Lucene_Interface $index)
! {
! return $this;
! }
!
! /**
! * Optimize query in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function optimize(Zend_Search_Lucene_Interface $index)
! {
! return $this;
! }
!
! /**
! * Constructs an appropriate Weight implementation for this query.
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @return Zend_Search_Lucene_Search_Weight
! */
! public function createWeight(Zend_Search_Lucene_Interface $reader)
! {
! require_once 'Zend/Search/Lucene/Search/Weight/Empty.php';
! return new Zend_Search_Lucene_Search_Weight_Empty();
! }
!
! /**
! * Execute query in context of index reader
! * It also initializes necessary internal structures
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
! */
! public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
! {
! // Do nothing
! }
!
! /**
! * Get document ids likely matching the query
! *
! * It's an array with document ids as keys (performance considerations)
! *
! * @return array
! */
! public function matchedDocs()
! {
! return array();
! }
!
! /**
! * Score specified document
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! */
! public function score($docId, Zend_Search_Lucene_Interface $reader)
! {
! return 0;
! }
!
! /**
! * Return query terms
! *
! * @return array
! */
! public function getQueryTerms()
! {
! return array();
! }
!
! /**
! * Query specific matches highlighting
! *
! * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
! */
! protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
! {
! // Do nothing
! }
!
! /**
! * Print a query
! *
! * @return string
! */
! public function __toString()
! {
! return '<InsignificantQuery>';
! }
! }
!
Modified: branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/MultiTerm.php
==============================================================================
*** branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/MultiTerm.php (original)
--- branches/stable/titania/includes/library/Zend/Search/Lucene/Search/Query/MultiTerm.php Sat Nov 20 18:23:22 2010
***************
*** 1,668 ****
! <?php
! /**
! * Zend Framework
! *
! * LICENSE
! *
! * This source file is subject to the new BSD license that is bundled
! * with this package in the file LICENSE.txt.
! * It is also available through the world-wide-web at this URL:
! * http://framework.zend.com/license/new-bsd
! * If you did not receive a copy of the license and are unable to
! * obtain it through the world-wide-web, please send an email
! * to license at zend.com so we can send you a copy immediately.
! *
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! * @version $Id$
! */
!
!
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
!
!
! /**
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! */
! class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
! {
!
! /**
! * Terms to find.
! * Array of Zend_Search_Lucene_Index_Term
! *
! * @var array
! */
! private $_terms = array();
!
! /**
! * Term signs.
! * If true then term is required.
! * If false then term is prohibited.
! * If null then term is neither prohibited, nor required
! *
! * If array is null then all terms are required
! *
! * @var array
! */
! private $_signs;
!
! /**
! * Result vector.
! *
! * @var array
! */
! private $_resVector = null;
!
! /**
! * Terms positions vectors.
! * Array of Arrays:
! * term1Id => (docId => freq, ...)
! * term2Id => (docId => freq, ...)
! *
! * @var array
! */
! private $_termsFreqs = array();
!
!
! /**
! * A score factor based on the fraction of all query terms
! * that a document contains.
! * float for conjunction queries
! * array of float for non conjunction queries
! *
! * @var mixed
! */
! private $_coord = null;
!
!
! /**
! * Terms weights
! * array of Zend_Search_Lucene_Search_Weight
! *
! * @var array
! */
! private $_weights = array();
!
!
! /**
! * Class constructor. Create a new multi-term query object.
! *
! * if $signs array is omitted then all terms are required
! * it differs from addTerm() behavior, but should never be used
! *
! * @param array $terms Array of Zend_Search_Lucene_Index_Term objects
! * @param array $signs Array of signs. Sign is boolean|null.
! * @throws Zend_Search_Lucene_Exception
! */
! public function __construct($terms = null, $signs = null)
! {
! if (is_array($terms)) {
! require_once 'Zend/Search/Lucene.php';
! if (count($terms) > Zend_Search_Lucene::getTermsPerQueryLimit()) {
! throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
! }
!
! $this->_terms = $terms;
!
! $this->_signs = null;
! // Check if all terms are required
! if (is_array($signs)) {
! foreach ($signs as $sign ) {
! if ($sign !== true) {
! $this->_signs = $signs;
! break;
! }
! }
! }
! }
! }
!
!
! /**
! * Add a $term (Zend_Search_Lucene_Index_Term) to this query.
! *
! * The sign is specified as:
! * TRUE - term is required
! * FALSE - term is prohibited
! * NULL - term is neither prohibited, nor required
! *
! * @param Zend_Search_Lucene_Index_Term $term
! * @param boolean|null $sign
! * @return void
! */
! public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
! if ($sign !== true || $this->_signs !== null) { // Skip, if all terms are required
! if ($this->_signs === null) { // Check, If all previous terms are required
! $this->_signs = array();
! foreach ($this->_terms as $prevTerm) {
! $this->_signs[] = true;
! }
! }
! $this->_signs[] = $sign;
! }
!
! $this->_terms[] = $term;
! }
!
!
! /**
! * Re-write query into primitive queries in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function rewrite(Zend_Search_Lucene_Interface $index)
! {
! if (count($this->_terms) == 0) {
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! }
!
! // Check, that all fields are qualified
! $allQualified = true;
! foreach ($this->_terms as $term) {
! if ($term->field === null) {
! $allQualified = false;
! break;
! }
! }
!
! if ($allQualified) {
! return $this;
! } else {
! /** transform multiterm query to boolean and apply rewrite() method to subqueries. */
! require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
! $query = new Zend_Search_Lucene_Search_Query_Boolean();
! $query->setBoost($this->getBoost());
!
! require_once 'Zend/Search/Lucene/Search/Query/Term.php';
! foreach ($this->_terms as $termId => $term) {
! $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
!
! $query->addSubquery($subquery->rewrite($index),
! ($this->_signs === null)? true : $this->_signs[$termId]);
! }
!
! return $query;
! }
! }
!
! /**
! * Optimize query in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function optimize(Zend_Search_Lucene_Interface $index)
! {
! $terms = $this->_terms;
! $signs = $this->_signs;
!
! foreach ($terms as $id => $term) {
! if (!$index->hasTerm($term)) {
! if ($signs === null || $signs[$id] === true) {
! // Term is required
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! } else {
! // Term is optional or prohibited
! // Remove it from terms and signs list
! unset($terms[$id]);
! unset($signs[$id]);
! }
! }
! }
!
! // Check if all presented terms are prohibited
! $allProhibited = true;
! if ($signs === null) {
! $allProhibited = false;
! } else {
! foreach ($signs as $sign) {
! if ($sign !== false) {
! $allProhibited = false;
! break;
! }
! }
! }
! if ($allProhibited) {
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! }
!
! /**
! * @todo make an optimization for repeated terms
! * (they may have different signs)
! */
!
! if (count($terms) == 1) {
! // It's already checked, that it's not a prohibited term
!
! // It's one term query with one required or optional element
! require_once 'Zend/Search/Lucene/Search/Query/Term.php';
! $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
! $optimizedQuery->setBoost($this->getBoost());
!
! return $optimizedQuery;
! }
!
! if (count($terms) == 0) {
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! }
!
! $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
! $optimizedQuery->setBoost($this->getBoost());
! return $optimizedQuery;
! }
!
!
! /**
! * Returns query term
! *
! * @return array
! */
! public function getTerms()
! {
! return $this->_terms;
! }
!
!
! /**
! * Return terms signs
! *
! * @return array
! */
! public function getSigns()
! {
! return $this->_signs;
! }
!
!
! /**
! * Set weight for specified term
! *
! * @param integer $num
! * @param Zend_Search_Lucene_Search_Weight_Term $weight
! */
! public function setWeight($num, $weight)
! {
! $this->_weights[$num] = $weight;
! }
!
!
! /**
! * Constructs an appropriate Weight implementation for this query.
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @return Zend_Search_Lucene_Search_Weight
! */
! public function createWeight(Zend_Search_Lucene_Interface $reader)
! {
! require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
! $this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
! return $this->_weight;
! }
!
!
! /**
! * Calculate result vector for Conjunction query
! * (like '+something +another')
! *
! * @param Zend_Search_Lucene_Interface $reader
! */
! private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
! {
! $this->_resVector = null;
!
! if (count($this->_terms) == 0) {
! $this->_resVector = array();
! }
!
! // Order terms by selectivity
! $docFreqs = array();
! $ids = array();
! foreach ($this->_terms as $id => $term) {
! $docFreqs[] = $reader->docFreq($term);
! $ids[] = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison
! }
! array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC,
! $ids, SORT_ASC, SORT_NUMERIC,
! $this->_terms);
!
! require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
! $docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
! foreach ($this->_terms as $termId => $term) {
! $termDocs = $reader->termDocs($term, $docsFilter);
! }
! // Treat last retrieved docs vector as a result set
! // (filter collects data for other terms)
! $this->_resVector = array_flip($termDocs);
!
! foreach ($this->_terms as $termId => $term) {
! $this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter);
! }
!
! // ksort($this->_resVector, SORT_NUMERIC);
! // Docs are returned ordered. Used algorithms doesn't change elements order.
! }
!
!
! /**
! * Calculate result vector for non Conjunction query
! * (like '+something -another')
! *
! * @param Zend_Search_Lucene_Interface $reader
! */
! private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
! {
! $requiredVectors = array();
! $requiredVectorsSizes = array();
! $requiredVectorsIds = array(); // is used to prevent arrays comparison
!
! $optional = array();
! $prohibited = array();
!
! foreach ($this->_terms as $termId => $term) {
! $termDocs = array_flip($reader->termDocs($term));
!
! if ($this->_signs[$termId] === true) {
! // required
! $requiredVectors[] = $termDocs;
! $requiredVectorsSizes[] = count($termDocs);
! $requiredVectorsIds[] = $termId;
! } elseif ($this->_signs[$termId] === false) {
! // prohibited
! // array union
! $prohibited += $termDocs;
! } else {
! // neither required, nor prohibited
! // array union
! $optional += $termDocs;
! }
!
! $this->_termsFreqs[$termId] = $reader->termFreqs($term);
! }
!
! // sort resvectors in order of subquery cardinality increasing
! array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
! $requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
! $requiredVectors);
!
! $required = null;
! foreach ($requiredVectors as $nextResVector) {
! if($required === null) {
! $required = $nextResVector;
! } else {
! //$required = array_intersect_key($required, $nextResVector);
!
! /**
! * This code is used as workaround for array_intersect_key() slowness problem.
! */
! $updatedVector = array();
! foreach ($required as $id => $value) {
! if (isset($nextResVector[$id])) {
! $updatedVector[$id] = $value;
! }
! }
! $required = $updatedVector;
! }
!
! if (count($required) == 0) {
! // Empty result set, we don't need to check other terms
! break;
! }
! }
!
! if ($required !== null) {
! $this->_resVector = $required;
! } else {
! $this->_resVector = $optional;
! }
!
! if (count($prohibited) != 0) {
! // $this->_resVector = array_diff_key($this->_resVector, $prohibited);
!
! /**
! * This code is used as workaround for array_diff_key() slowness problem.
! */
! if (count($this->_resVector) < count($prohibited)) {
! $updatedVector = $this->_resVector;
! foreach ($this->_resVector as $id => $value) {
! if (isset($prohibited[$id])) {
! unset($updatedVector[$id]);
! }
! }
! $this->_resVector = $updatedVector;
! } else {
! $updatedVector = $this->_resVector;
! foreach ($prohibited as $id => $value) {
! unset($updatedVector[$id]);
! }
! $this->_resVector = $updatedVector;
! }
! }
!
! ksort($this->_resVector, SORT_NUMERIC);
! }
!
!
! /**
! * Score calculator for conjunction queries (all terms are required)
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! */
! public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
! {
! if ($this->_coord === null) {
! $this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
! count($this->_terms) );
! }
!
! $score = 0.0;
!
! foreach ($this->_terms as $termId => $term) {
! /**
! * We don't need to check that term freq is not 0
! * Score calculation is performed only for matched docs
! */
! $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
! $this->_weights[$termId]->getValue() *
! $reader->norm($docId, $term->field);
! }
!
! return $score * $this->_coord * $this->getBoost();
! }
!
!
! /**
! * Score calculator for non conjunction queries (not all terms are required)
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! */
! public function _nonConjunctionScore($docId, $reader)
! {
! if ($this->_coord === null) {
! $this->_coord = array();
!
! $maxCoord = 0;
! foreach ($this->_signs as $sign) {
! if ($sign !== false /* not prohibited */) {
! $maxCoord++;
! }
! }
!
! for ($count = 0; $count <= $maxCoord; $count++) {
! $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
! }
! }
!
! $score = 0.0;
! $matchedTerms = 0;
! foreach ($this->_terms as $termId=>$term) {
! // Check if term is
! if ($this->_signs[$termId] !== false && // not prohibited
! isset($this->_termsFreqs[$termId][$docId]) // matched
! ) {
! $matchedTerms++;
!
! /**
! * We don't need to check that term freq is not 0
! * Score calculation is performed only for matched docs
! */
! $score +=
! $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
! $this->_weights[$termId]->getValue() *
! $reader->norm($docId, $term->field);
! }
! }
!
! return $score * $this->_coord[$matchedTerms] * $this->getBoost();
! }
!
! /**
! * Execute query in context of index reader
! * It also initializes necessary internal structures
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
! */
! public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
! {
! if ($this->_signs === null) {
! $this->_calculateConjunctionResult($reader);
! } else {
! $this->_calculateNonConjunctionResult($reader);
! }
!
! // Initialize weight if it's not done yet
! $this->_initWeight($reader);
! }
!
! /**
! * Get document ids likely matching the query
! *
! * It's an array with document ids as keys (performance considerations)
! *
! * @return array
! */
! public function matchedDocs()
! {
! return $this->_resVector;
! }
!
! /**
! * Score specified document
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! */
! public function score($docId, Zend_Search_Lucene_Interface $reader)
! {
! if (isset($this->_resVector[$docId])) {
! if ($this->_signs === null) {
! return $this->_conjunctionScore($docId, $reader);
! } else {
! return $this->_nonConjunctionScore($docId, $reader);
! }
! } else {
! return 0;
! }
! }
!
! /**
! * Return query terms
! *
! * @return array
! */
! public function getQueryTerms()
! {
! if ($this->_signs === null) {
! return $this->_terms;
! }
!
! $terms = array();
!
! foreach ($this->_signs as $id => $sign) {
! if ($sign !== false) {
! $terms[] = $this->_terms[$id];
! }
! }
!
! return $terms;
! }
!
! /**
! * Query specific matches highlighting
! *
! * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
! */
! protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
! {
! $words = array();
!
! if ($this->_signs === null) {
! foreach ($this->_terms as $term) {
! $words[] = $term->text;
! }
! } else {
! foreach ($this->_signs as $id => $sign) {
! if ($sign !== false) {
! $words[] = $this->_terms[$id]->text;
! }
! }
! }
!
! $highlighter->highlight($words);
! }
!
! /**
! * Print a query
! *
! * @return string
! */
! public function __toString()
! {
! // It's used only for query visualisation, so we don't care about characters escaping
!
! $query = '';
!
! foreach ($this->_terms as $id => $term) {
! if ($id != 0) {
! $query .= ' ';
! }
!
! if ($this->_signs === null || $this->_signs[$id] === true) {
! $query .= '+';
! } else if ($this->_signs[$id] === false) {
! $query .= '-';
! }
!
! if ($term->field !== null) {
! $query .= $term->field . ':';
! }
! $query .= $term->text;
! }
!
! if ($this->getBoost() != 1) {
! $query = '(' . $query . ')^' . round($this->getBoost(), 4);
! }
!
! return $query;
! }
! }
!
--- 1,668 ----
! <?php
! /**
! * Zend Framework
! *
! * LICENSE
! *
! * This source file is subject to the new BSD license that is bundled
! * with this package in the file LICENSE.txt.
! * It is also available through the world-wide-web at this URL:
! * http://framework.zend.com/license/new-bsd
! * If you did not receive a copy of the license and are unable to
! * obtain it through the world-wide-web, please send an email
! * to license at zend.com so we can send you a copy immediately.
! *
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! * @version $Id$
! */
!
!
! /** Zend_Search_Lucene_Search_Query */
! require_once 'Zend/Search/Lucene/Search/Query.php';
!
!
! /**
! * @category Zend
! * @package Zend_Search_Lucene
! * @subpackage Search
! * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
! * @license http://framework.zend.com/license/new-bsd New BSD License
! */
! class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
! {
!
! /**
! * Terms to find.
! * Array of Zend_Search_Lucene_Index_Term
! *
! * @var array
! */
! private $_terms = array();
!
! /**
! * Term signs.
! * If true then term is required.
! * If false then term is prohibited.
! * If null then term is neither prohibited, nor required
! *
! * If array is null then all terms are required
! *
! * @var array
! */
! private $_signs;
!
! /**
! * Result vector.
! *
! * @var array
! */
! private $_resVector = null;
!
! /**
! * Terms positions vectors.
! * Array of Arrays:
! * term1Id => (docId => freq, ...)
! * term2Id => (docId => freq, ...)
! *
! * @var array
! */
! private $_termsFreqs = array();
!
!
! /**
! * A score factor based on the fraction of all query terms
! * that a document contains.
! * float for conjunction queries
! * array of float for non conjunction queries
! *
! * @var mixed
! */
! private $_coord = null;
!
!
! /**
! * Terms weights
! * array of Zend_Search_Lucene_Search_Weight
! *
! * @var array
! */
! private $_weights = array();
!
!
! /**
! * Class constructor. Create a new multi-term query object.
! *
! * if $signs array is omitted then all terms are required
! * it differs from addTerm() behavior, but should never be used
! *
! * @param array $terms Array of Zend_Search_Lucene_Index_Term objects
! * @param array $signs Array of signs. Sign is boolean|null.
! * @throws Zend_Search_Lucene_Exception
! */
! public function __construct($terms = null, $signs = null)
! {
! if (is_array($terms)) {
! require_once 'Zend/Search/Lucene.php';
! if (count($terms) > Zend_Search_Lucene::getTermsPerQueryLimit()) {
! throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
! }
!
! $this->_terms = $terms;
!
! $this->_signs = null;
! // Check if all terms are required
! if (is_array($signs)) {
! foreach ($signs as $sign ) {
! if ($sign !== true) {
! $this->_signs = $signs;
! break;
! }
! }
! }
! }
! }
!
!
! /**
! * Add a $term (Zend_Search_Lucene_Index_Term) to this query.
! *
! * The sign is specified as:
! * TRUE - term is required
! * FALSE - term is prohibited
! * NULL - term is neither prohibited, nor required
! *
! * @param Zend_Search_Lucene_Index_Term $term
! * @param boolean|null $sign
! * @return void
! */
! public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign = null) {
! if ($sign !== true || $this->_signs !== null) { // Skip, if all terms are required
! if ($this->_signs === null) { // Check, If all previous terms are required
! $this->_signs = array();
! foreach ($this->_terms as $prevTerm) {
! $this->_signs[] = true;
! }
! }
! $this->_signs[] = $sign;
! }
!
! $this->_terms[] = $term;
! }
!
!
! /**
! * Re-write query into primitive queries in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function rewrite(Zend_Search_Lucene_Interface $index)
! {
! if (count($this->_terms) == 0) {
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! }
!
! // Check, that all fields are qualified
! $allQualified = true;
! foreach ($this->_terms as $term) {
! if ($term->field === null) {
! $allQualified = false;
! break;
! }
! }
!
! if ($allQualified) {
! return $this;
! } else {
! /** transform multiterm query to boolean and apply rewrite() method to subqueries. */
! require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
! $query = new Zend_Search_Lucene_Search_Query_Boolean();
! $query->setBoost($this->getBoost());
!
! require_once 'Zend/Search/Lucene/Search/Query/Term.php';
! foreach ($this->_terms as $termId => $term) {
! $subquery = new Zend_Search_Lucene_Search_Query_Term($term);
!
! $query->addSubquery($subquery->rewrite($index),
! ($this->_signs === null)? true : $this->_signs[$termId]);
! }
!
! return $query;
! }
! }
!
! /**
! * Optimize query in the context of specified index
! *
! * @param Zend_Search_Lucene_Interface $index
! * @return Zend_Search_Lucene_Search_Query
! */
! public function optimize(Zend_Search_Lucene_Interface $index)
! {
! $terms = $this->_terms;
! $signs = $this->_signs;
!
! foreach ($terms as $id => $term) {
! if (!$index->hasTerm($term)) {
! if ($signs === null || $signs[$id] === true) {
! // Term is required
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! } else {
! // Term is optional or prohibited
! // Remove it from terms and signs list
! unset($terms[$id]);
! unset($signs[$id]);
! }
! }
! }
!
! // Check if all presented terms are prohibited
! $allProhibited = true;
! if ($signs === null) {
! $allProhibited = false;
! } else {
! foreach ($signs as $sign) {
! if ($sign !== false) {
! $allProhibited = false;
! break;
! }
! }
! }
! if ($allProhibited) {
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! }
!
! /**
! * @todo make an optimization for repeated terms
! * (they may have different signs)
! */
!
! if (count($terms) == 1) {
! // It's already checked, that it's not a prohibited term
!
! // It's one term query with one required or optional element
! require_once 'Zend/Search/Lucene/Search/Query/Term.php';
! $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
! $optimizedQuery->setBoost($this->getBoost());
!
! return $optimizedQuery;
! }
!
! if (count($terms) == 0) {
! require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
! return new Zend_Search_Lucene_Search_Query_Empty();
! }
!
! $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $signs);
! $optimizedQuery->setBoost($this->getBoost());
! return $optimizedQuery;
! }
!
!
! /**
! * Returns query term
! *
! * @return array
! */
! public function getTerms()
! {
! return $this->_terms;
! }
!
!
! /**
! * Return terms signs
! *
! * @return array
! */
! public function getSigns()
! {
! return $this->_signs;
! }
!
!
! /**
! * Set weight for specified term
! *
! * @param integer $num
! * @param Zend_Search_Lucene_Search_Weight_Term $weight
! */
! public function setWeight($num, $weight)
! {
! $this->_weights[$num] = $weight;
! }
!
!
! /**
! * Constructs an appropriate Weight implementation for this query.
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @return Zend_Search_Lucene_Search_Weight
! */
! public function createWeight(Zend_Search_Lucene_Interface $reader)
! {
! require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
! $this->_weight = new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
! return $this->_weight;
! }
!
!
! /**
! * Calculate result vector for Conjunction query
! * (like '+something +another')
! *
! * @param Zend_Search_Lucene_Interface $reader
! */
! private function _calculateConjunctionResult(Zend_Search_Lucene_Interface $reader)
! {
! $this->_resVector = null;
!
! if (count($this->_terms) == 0) {
! $this->_resVector = array();
! }
!
! // Order terms by selectivity
! $docFreqs = array();
! $ids = array();
! foreach ($this->_terms as $id => $term) {
! $docFreqs[] = $reader->docFreq($term);
! $ids[] = $id; // Used to keep original order for terms with the same selectivity and omit terms comparison
! }
! array_multisort($docFreqs, SORT_ASC, SORT_NUMERIC,
! $ids, SORT_ASC, SORT_NUMERIC,
! $this->_terms);
!
! require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
! $docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
! foreach ($this->_terms as $termId => $term) {
! $termDocs = $reader->termDocs($term, $docsFilter);
! }
! // Treat last retrieved docs vector as a result set
! // (filter collects data for other terms)
! $this->_resVector = array_flip($termDocs);
!
! foreach ($this->_terms as $termId => $term) {
! $this->_termsFreqs[$termId] = $reader->termFreqs($term, $docsFilter);
! }
!
! // ksort($this->_resVector, SORT_NUMERIC);
! // Docs are returned ordered. Used algorithms doesn't change elements order.
! }
!
!
! /**
! * Calculate result vector for non Conjunction query
! * (like '+something -another')
! *
! * @param Zend_Search_Lucene_Interface $reader
! */
! private function _calculateNonConjunctionResult(Zend_Search_Lucene_Interface $reader)
! {
! $requiredVectors = array();
! $requiredVectorsSizes = array();
! $requiredVectorsIds = array(); // is used to prevent arrays comparison
!
! $optional = array();
! $prohibited = array();
!
! foreach ($this->_terms as $termId => $term) {
! $termDocs = array_flip($reader->termDocs($term));
!
! if ($this->_signs[$termId] === true) {
! // required
! $requiredVectors[] = $termDocs;
! $requiredVectorsSizes[] = count($termDocs);
! $requiredVectorsIds[] = $termId;
! } elseif ($this->_signs[$termId] === false) {
! // prohibited
! // array union
! $prohibited += $termDocs;
! } else {
! // neither required, nor prohibited
! // array union
! $optional += $termDocs;
! }
!
! $this->_termsFreqs[$termId] = $reader->termFreqs($term);
! }
!
! // sort resvectors in order of subquery cardinality increasing
! array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
! $requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
! $requiredVectors);
!
! $required = null;
! foreach ($requiredVectors as $nextResVector) {
! if($required === null) {
! $required = $nextResVector;
! } else {
! //$required = array_intersect_key($required, $nextResVector);
!
! /**
! * This code is used as workaround for array_intersect_key() slowness problem.
! */
! $updatedVector = array();
! foreach ($required as $id => $value) {
! if (isset($nextResVector[$id])) {
! $updatedVector[$id] = $value;
! }
! }
! $required = $updatedVector;
! }
!
! if (count($required) == 0) {
! // Empty result set, we don't need to check other terms
! break;
! }
! }
!
! if ($required !== null) {
! $this->_resVector = $required;
! } else {
! $this->_resVector = $optional;
! }
!
! if (count($prohibited) != 0) {
! // $this->_resVector = array_diff_key($this->_resVector, $prohibited);
!
! /**
! * This code is used as workaround for array_diff_key() slowness problem.
! */
! if (count($this->_resVector) < count($prohibited)) {
! $updatedVector = $this->_resVector;
! foreach ($this->_resVector as $id => $value) {
! if (isset($prohibited[$id])) {
! unset($updatedVector[$id]);
! }
! }
! $this->_resVector = $updatedVector;
! } else {
! $updatedVector = $this->_resVector;
! foreach ($prohibited as $id => $value) {
! unset($updatedVector[$id]);
! }
! $this->_resVector = $updatedVector;
! }
! }
!
! ksort($this->_resVector, SORT_NUMERIC);
! }
!
!
! /**
! * Score calculator for conjunction queries (all terms are required)
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! */
! public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
! {
! if ($this->_coord === null) {
! $this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
! count($this->_terms) );
! }
!
! $score = 0.0;
!
! foreach ($this->_terms as $termId => $term) {
! /**
! * We don't need to check that term freq is not 0
! * Score calculation is performed only for matched docs
! */
! $score += $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
! $this->_weights[$termId]->getValue() *
! $reader->norm($docId, $term->field);
! }
!
! return $score * $this->_coord * $this->getBoost();
! }
!
!
! /**
! * Score calculator for non conjunction queries (not all terms are required)
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! */
! public function _nonConjunctionScore($docId, $reader)
! {
! if ($this->_coord === null) {
! $this->_coord = array();
!
! $maxCoord = 0;
! foreach ($this->_signs as $sign) {
! if ($sign !== false /* not prohibited */) {
! $maxCoord++;
! }
! }
!
! for ($count = 0; $count <= $maxCoord; $count++) {
! $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
! }
! }
!
! $score = 0.0;
! $matchedTerms = 0;
! foreach ($this->_terms as $termId=>$term) {
! // Check if term is
! if ($this->_signs[$termId] !== false && // not prohibited
! isset($this->_termsFreqs[$termId][$docId]) // matched
! ) {
! $matchedTerms++;
!
! /**
! * We don't need to check that term freq is not 0
! * Score calculation is performed only for matched docs
! */
! $score +=
! $reader->getSimilarity()->tf($this->_termsFreqs[$termId][$docId]) *
! $this->_weights[$termId]->getValue() *
! $reader->norm($docId, $term->field);
! }
! }
!
! return $score * $this->_coord[$matchedTerms] * $this->getBoost();
! }
!
! /**
! * Execute query in context of index reader
! * It also initializes necessary internal structures
! *
! * @param Zend_Search_Lucene_Interface $reader
! * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
! */
! public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
! {
! if ($this->_signs === null) {
! $this->_calculateConjunctionResult($reader);
! } else {
! $this->_calculateNonConjunctionResult($reader);
! }
!
! // Initialize weight if it's not done yet
! $this->_initWeight($reader);
! }
!
! /**
! * Get document ids likely matching the query
! *
! * It's an array with document ids as keys (performance considerations)
! *
! * @return array
! */
! public function matchedDocs()
! {
! return $this->_resVector;
! }
!
! /**
! * Score specified document
! *
! * @param integer $docId
! * @param Zend_Search_Lucene_Interface $reader
! * @return float
! */
! public function score($docId, Zend_Search_Lucene_Interface $reader)
! {
! if (isset($this->_resVector[$docId])) {
! if ($this->_signs === null) {
! return $this->_conjunctionScore($docId, $reader);
! } else {
! return $this->_nonConjunctionScore($docId, $reader);
! }
! } else {
! return 0;
! }
! }
!
! /**
! * Return query terms
! *
! * @return array
! */
! public function getQueryTerms()
! {
! if ($this->_signs === null) {
! return $this->_terms;
! }
!
! $terms = array();
!
! foreach ($this->_signs as $id => $sign) {
! if ($sign !== false) {
! $terms[] = $this->_terms[$id];
! }
! }
!
! return $terms;
! }
!
! /**
! * Query specific matches highlighting
! *
! * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
! */
! protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
! {
! $words = array();
!
! if ($this->_signs === null) {
! foreach ($this->_terms as $term) {
! $words[] = $term->text;
! }
! } else {
! foreach ($this->_signs as $id => $sign) {
! if ($sign !== false) {
! $words[] = $this->_terms[$id]->text;
! }
! }
! }
!
! $highlighter->highlight($words);
! }
!
! /**
! * Print a query
! *
! * @return string
! */
! public function __toString()
! {
! // It's used only for query visualisation, so we don't care about characters escaping
!
! $query = '';
!
! foreach ($this->_terms as $id => $term) {
! if ($id != 0) {
! $query .= ' ';
! }
!
! if ($this->_signs === null || $this->_signs[$id] === true) {
! $query .= '+';
! } else if ($this->_signs[$id] === false) {
! $query .= '-';
! }
!
! if ($term->field !== null) {
! $query .= $term->field . ':';
! }
! $query .= $term->text;
! }
!
! if ($this->getBoost() != 1) {
! $query = '(' . $query . ')^' . round($this->getBoost(), 4);
! }
!
! return $query;
! }
! }
!
More information about the customisationdb-commits
mailing list